Total coverage: 70598 (4%)of 1836237
15 15 5 5 5 2 2 2 6 6 6 13 13 13 13 13 13 13 50 51 49 50 50 51 51 51 51 51 50 51 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 13 2 2 2 16 16 16 16 16 3 13 3 13 16 16 3 13 16 38 38 38 38 38 38 37 38 38 38 38 38 38 38 38 38 38 38 37 37 38 38 37 38 38 37 38 38 37 38 38 37 38 38 38 38 38 38 38 38 38 37 38 37 38 38 38 38 38 38 37 38 37 38 38 4 4 2 2 3 3 3 2 2 19 3 16 9 7 7 7 7 7 7 7 7 7 7 13 13 13 16 16 11 11 11 11 11 11 11 11 11 10 10 6 6 6 6 37 38 38 35 13 22 22 22 22 22 22 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719 7720 7721 7722 7723 7724 7725 7726 7727 7728 7729 7730 7731 7732 7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747 7748 7749 7750 7751 7752 7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777 7778 7779 7780 7781 7782 7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886 7887 7888 7889 7890 7891 7892 7893 7894 7895 7896 7897 7898 7899 7900 7901 7902 7903 7904 7905 7906 7907 7908 7909 7910 7911 7912 7913 7914 7915 7916 7917 7918 7919 7920 7921 7922 7923 7924 7925 7926 7927 7928 7929 7930 7931 7932 7933 7934 7935 7936 7937 7938 7939 7940 7941 7942 7943 7944 7945 7946 7947 7948 7949 7950 7951 7952 7953 7954 7955 7956 7957 7958 7959 7960 7961 7962 7963 7964 7965 7966 7967 7968 7969 7970 7971 7972 7973 7974 7975 7976 7977 7978 7979 7980 7981 7982 7983 7984 7985 7986 7987 7988 7989 7990 7991 7992 7993 7994 7995 7996 7997 7998 7999 8000 8001 8002 8003 8004 8005 8006 8007 8008 8009 8010 8011 8012 8013 8014 8015 8016 8017 8018 8019 8020 8021 8022 8023 8024 8025 8026 8027 8028 8029 8030 8031 8032 8033 8034 8035 8036 8037 8038 8039 8040 8041 8042 8043 8044 8045 8046 8047 8048 8049 8050 8051 8052 8053 8054 8055 8056 8057 8058 8059 8060 8061 8062 8063 8064 8065 8066 8067 8068 8069 8070 8071 8072 8073 8074 8075 8076 8077 8078 8079 8080 8081 8082 8083 8084 8085 8086 8087 8088 8089 8090 8091 8092 8093 8094 8095 8096 8097 8098 8099 8100 8101 8102 8103 8104 8105 8106 8107 8108 8109 8110 8111 8112 8113 8114 8115 8116 8117 8118 8119 8120 8121 8122 8123 8124 8125 8126 8127 8128 8129 8130 8131 8132 8133 8134 8135 8136 8137 8138 8139 8140 8141 8142 8143 8144 8145 8146 8147 8148 8149 8150 8151 8152 8153 8154 8155 8156 8157 8158 8159 8160 8161 8162 8163 8164 8165 8166 8167 8168 8169 8170 8171 8172 8173 8174 8175 8176 8177 8178 8179 8180 8181 8182 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 8196 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 8218 8219 8220 8221 8222 8223 8224 8225 8226 8227 8228 8229 8230 8231 8232 8233 8234 8235 8236 8237 8238 8239 8240 8241 8242 8243 8244 8245 8246 8247 8248 8249 8250 8251 8252 8253 8254 8255 8256 8257 8258 8259 8260 8261 8262 8263 8264 8265 8266 8267 8268 8269 8270 8271 8272 8273 8274 8275 8276 8277 8278 8279 8280 8281 8282 8283 8284 8285 8286 8287 8288 8289 8290 8291 8292 8293 8294 8295 8296 8297 8298 8299 8300 8301 8302 8303 8304 8305 8306 8307 8308 8309 8310 8311 8312 8313 8314 8315 8316 8317 8318 8319 8320 8321 8322 8323 8324 8325 8326 8327 8328 8329 8330 8331 8332 8333 8334 8335 8336 8337 8338 8339 8340 8341 8342 8343 8344 8345 8346 8347 8348 8349 8350 8351 8352 8353 8354 8355 8356 8357 8358 8359 8360 8361 8362 8363 8364 8365 8366 8367 8368 8369 8370 8371 8372 8373 8374 8375 8376 8377 8378 8379 8380 8381 8382 8383 8384 8385 8386 8387 8388 8389 8390 8391 8392 8393 8394 8395 8396 8397 8398 8399 8400 8401 8402 8403 8404 8405 8406 8407 8408 8409 8410 8411 8412 8413 8414 8415 8416 8417 8418 8419 8420 8421 8422 8423 8424 8425 8426 8427 8428 8429 8430 8431 8432 8433 8434 8435 8436 8437 8438 8439 8440 8441 8442 8443 8444 8445 8446 8447 8448 8449 8450 8451 8452 8453 8454 8455 8456 8457 8458 8459 8460 8461 8462 8463 8464 8465 8466 8467 8468 8469 8470 8471 8472 8473 8474 8475 8476 8477 8478 8479 8480 8481 8482 8483 8484 8485 8486 8487 8488 8489 8490 8491 8492 8493 8494 8495 8496 8497 8498 8499 8500 8501 8502 8503 8504 8505 8506 8507 8508 8509 8510 8511 8512 8513 8514 8515 8516 8517 8518 8519 8520 8521 8522 8523 8524 8525 8526 8527 8528 8529 8530 8531 8532 8533 8534 8535 8536 8537 8538 8539 8540 8541 8542 8543 8544 8545 8546 8547 8548 8549 8550 8551 8552 8553 8554 8555 8556 8557 8558 8559 8560 8561 8562 8563 8564 8565 8566 8567 8568 8569 8570 8571 8572 8573 8574 8575 8576 8577 8578 8579 8580 8581 8582 8583 8584 8585 8586 8587 8588 8589 8590 8591 8592 8593 8594 8595 8596 8597 8598 8599 8600 8601 8602 8603 8604 8605 8606 8607 8608 8609 8610 8611 8612 8613 8614 8615 8616 8617 8618 8619 8620 8621 8622 8623 8624 8625 8626 8627 8628 8629 8630 8631 8632 8633 8634 8635 8636 8637 8638 8639 8640 8641 8642 8643 8644 8645 8646 8647 8648 8649 8650 8651 8652 8653 8654 8655 8656 8657 8658 8659 8660 8661 8662 8663 8664 8665 8666 8667 8668 8669 8670 8671 8672 8673 8674 8675 8676 8677 8678 8679 8680 8681 8682 8683 8684 8685 8686 8687 8688 8689 8690 8691 8692 8693 8694 8695 8696 8697 8698 8699 8700 8701 8702 8703 8704 8705 8706 8707 8708 8709 8710 8711 8712 8713 8714 8715 8716 8717 8718 8719 8720 8721 8722 8723 8724 8725 8726 8727 8728 8729 8730 8731 8732 8733 8734 8735 8736 8737 8738 8739 8740 8741 8742 8743 8744 8745 8746 8747 8748 8749 8750 8751 8752 8753 8754 8755 8756 8757 8758 8759 8760 8761 8762 8763 8764 8765 8766 8767 8768 8769 8770 8771 8772 8773 8774 8775 8776 8777 8778 8779 8780 8781 8782 8783 8784 8785 8786 8787 8788 8789 8790 8791 8792 8793 8794 8795 8796 8797 8798 8799 8800 8801 8802 8803 8804 8805 8806 8807 8808 8809 8810 8811 8812 8813 8814 8815 8816 8817 8818 8819 8820 8821 8822 8823 8824 8825 8826 8827 8828 8829 8830 8831 8832 8833 8834 8835 8836 8837 8838 8839 8840 8841 8842 8843 8844 8845 8846 8847 8848 8849 8850 8851 8852 8853 8854 8855 8856 8857 8858 8859 8860 8861 8862 8863 8864 8865 8866 8867 8868 8869 8870 8871 8872 8873 8874 8875 8876 8877 8878 8879 8880 8881 8882 8883 8884 8885 8886 8887 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899 8900 8901 8902 8903 8904 8905 8906 8907 8908 8909 8910 8911 8912 8913 8914 8915 8916 8917 8918 8919 8920 8921 8922 8923 8924 8925 8926 8927 8928 8929 8930 8931 8932 8933 8934 8935 8936 8937 8938 8939 8940 8941 8942 8943 8944 8945 8946 8947 8948 8949 8950 8951 8952 8953 8954 8955 8956 8957 8958 8959 8960 8961 8962 8963 8964 8965 8966 8967 8968 8969 8970 8971 8972 8973 8974 8975 8976 8977 8978 8979 8980 8981 8982 8983 8984 8985 8986 8987 8988 8989 8990 8991 8992 8993 8994 8995 8996 8997 8998 8999 9000 9001 9002 9003 9004 9005 9006 9007 9008 9009 9010 9011 9012 9013 9014 9015 9016 9017 9018 9019 9020 9021 9022 9023 9024 9025 9026 9027 9028 9029 9030 9031 9032 9033 9034 9035 9036 9037 9038 9039 9040 9041 9042 9043 9044 9045 9046 9047 9048 9049 9050 9051 9052 9053 9054 9055 9056 9057 9058 9059 9060 9061 9062 9063 9064 9065 9066 9067 9068 9069 9070 9071 9072 9073 9074 9075 9076 9077 9078 9079 9080 9081 9082 9083 9084 9085 9086 9087 9088 9089 9090 9091 9092 9093 9094 9095 9096 9097 9098 9099 9100 9101 9102 9103 9104 9105 9106 9107 9108 9109 9110 9111 9112 9113 9114 9115 9116 9117 9118 9119 9120 9121 9122 9123 9124 9125 9126 9127 9128 9129 9130 9131 9132 9133 9134 9135 9136 9137 9138 9139 9140 9141 9142 9143 9144 9145 9146 9147 9148 9149 9150 9151 9152 9153 9154 9155 9156 9157 9158 9159 9160 9161 9162 9163 9164 9165 9166 9167 9168 9169 9170 9171 9172 9173 9174 9175 9176 9177 9178 9179 9180 9181 9182 9183 9184 9185 9186 9187 9188 9189 9190 9191 9192 9193 9194 9195 9196 9197 9198 9199 9200 9201 9202 9203 9204 9205 9206 9207 9208 9209 9210 9211 9212 9213 9214 9215 9216 9217 9218 9219 9220 9221 9222 9223 9224 9225 9226 9227 9228 9229 9230 9231 9232 9233 9234 9235 9236 9237 9238 9239 9240 9241 9242 9243 9244 9245 9246 9247 9248 9249 9250 9251 9252 9253 9254 9255 9256 9257 9258 9259 9260 9261 9262 9263 9264 9265 9266 9267 9268 9269 9270 9271 9272 9273 9274 9275 9276 9277 9278 9279 9280 9281 9282 9283 9284 9285 9286 9287 9288 9289 9290 9291 9292 9293 9294 9295 9296 9297 9298 9299 9300 9301 9302 9303 9304 9305 9306 9307 9308 9309 9310 9311 9312 9313 9314 9315 9316 9317 9318 9319 9320 9321 9322 9323 9324 9325 9326 9327 9328 9329 9330 9331 9332 9333 9334 9335 9336 9337 9338 9339 9340 9341 9342 9343 9344 9345 9346 9347 9348 9349 9350 9351 9352 9353 9354 9355 9356 9357 9358 9359 9360 9361 9362 9363 9364 9365 9366 9367 9368 9369 9370 9371 9372 9373 9374 9375 9376 9377 9378 9379 9380 9381 9382 9383 9384 9385 9386 9387 9388 9389 9390 9391 9392 9393 9394 9395 9396 9397 9398 9399 9400 9401 9402 9403 9404 9405 9406 9407 9408 9409 9410 9411 9412 9413 9414 9415 9416 9417 9418 9419 9420 9421 9422 9423 9424 9425 9426 9427 9428 9429 9430 9431 9432 9433 9434 9435 9436 9437 9438 9439 9440 9441 9442 9443 9444 9445 9446 9447 9448 9449 9450 9451 9452 9453 9454 9455 9456 9457 9458 9459 9460 9461 9462 9463 9464 9465 9466 9467 9468 9469 9470 9471 9472 9473 9474 9475 9476 9477 9478 9479 9480 9481 9482 9483 9484 9485 9486 9487 9488 9489 9490 9491 9492 9493 9494 9495 9496 9497 9498 9499 9500 9501 9502 9503 9504 9505 9506 9507 9508 9509 9510 9511 9512 9513 9514 9515 9516 9517 9518 9519 9520 9521 9522 9523 9524 9525 9526 9527 9528 9529 9530 9531 9532 9533 9534 9535 9536 9537 9538 9539 9540 9541 9542 9543 9544 9545 9546 9547 9548 9549 9550 9551 9552 9553 9554 9555 9556 9557 9558 9559 9560 9561 9562 9563 9564 9565 9566 9567 9568 9569 9570 9571 9572 9573 9574 9575 9576 9577 9578 9579 9580 9581 9582 9583 9584 9585 9586 9587 9588 9589 9590 9591 9592 9593 9594 9595 9596 9597 9598 9599 9600 9601 9602 9603 9604 9605 9606 9607 9608 9609 9610 9611 9612 9613 9614 9615 9616 9617 9618 9619 9620 9621 9622 9623 9624 9625 9626 9627 9628 9629 9630 9631 9632 9633 9634 9635 9636 9637 9638 9639 9640 9641 9642 9643 9644 9645 9646 9647 9648 9649 9650 9651 9652 9653 9654 9655 9656 9657 9658 9659 9660 9661 9662 9663 9664 9665 9666 9667 9668 9669 9670 9671 9672 9673 9674 9675 9676 9677 9678 9679 9680 9681 9682 9683 9684 9685 9686 9687 9688 9689 9690 9691 9692 9693 9694 9695 9696 9697 9698 9699 9700 9701 9702 9703 9704 9705 9706 9707 9708 9709 9710 9711 9712 9713 9714 9715 9716 9717 9718 9719 9720 9721 9722 9723 9724 9725 9726 9727 9728 9729 9730 9731 9732 9733 9734 9735 9736 9737 9738 9739 9740 9741 9742 9743 9744 9745 9746 9747 9748 9749 9750 9751 9752 9753 9754 9755 9756 9757 9758 9759 9760 9761 9762 9763 9764 9765 9766 9767 9768 9769 9770 9771 9772 9773 9774 9775 9776 9777 9778 9779 9780 9781 9782 9783 9784 9785 9786 9787 9788 9789 9790 9791 9792 9793 9794 9795 9796 9797 9798 9799 9800 9801 9802 9803 9804 9805 9806 9807 9808 9809 9810 9811 9812 9813 9814 9815 9816 9817 9818 9819 9820 9821 9822 9823 9824 9825 9826 9827 9828 9829 9830 9831 9832 9833 9834 9835 9836 9837 9838 9839 9840 9841 9842 9843 9844 9845 9846 9847 9848 9849 9850 9851 9852 9853 9854 9855 9856 9857 9858 9859 9860 9861 9862 9863 9864 9865 9866 9867 9868 9869 9870 9871 9872 9873 9874 9875 9876 9877 9878 9879 9880 9881 9882 9883 9884 9885 9886 9887 9888 9889 9890 9891 9892 9893 9894 9895 9896 9897 9898 9899 9900 9901 9902 9903 9904 9905 9906 9907 9908 9909 9910 9911 9912 9913 9914 9915 9916 9917 9918 9919 9920 9921 9922 9923 9924 9925 9926 9927 9928 9929 9930 9931 9932 9933 9934 9935 9936 9937 9938 9939 9940 9941 9942 9943 9944 9945 9946 9947 9948 9949 9950 9951 9952 9953 9954 9955 9956 9957 9958 9959 9960 9961 9962 9963 9964 9965 9966 9967 9968 9969 9970 9971 9972 9973 9974 9975 9976 9977 9978 9979 9980 9981 9982 9983 9984 9985 9986 9987 9988 9989 9990 9991 9992 9993 9994 9995 9996 9997 9998 9999 10000 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 10011 10012 10013 10014 10015 10016 10017 10018 10019 10020 10021 10022 10023 10024 10025 10026 10027 10028 10029 10030 10031 10032 10033 10034 10035 10036 10037 10038 10039 10040 10041 10042 10043 10044 10045 10046 10047 10048 10049 10050 10051 10052 10053 10054 10055 10056 10057 10058 10059 10060 10061 10062 10063 10064 10065 10066 10067 10068 10069 10070 10071 10072 10073 10074 10075 10076 10077 10078 10079 10080 10081 10082 10083 10084 10085 10086 10087 10088 10089 10090 10091 10092 10093 10094 10095 10096 10097 10098 10099 10100 10101 10102 10103 10104 10105 10106 10107 10108 10109 10110 10111 10112 10113 10114 10115 10116 10117 10118 10119 10120 10121 10122 10123 10124 10125 10126 10127 10128 10129 10130 10131 10132 10133 10134 10135 10136 10137 10138 10139 10140 10141 10142 10143 10144 10145 10146 10147 10148 10149 10150 10151 10152 10153 10154 10155 10156 10157 10158 10159 10160 10161 10162 10163 10164 10165 10166 10167 10168 10169 10170 10171 10172 10173 10174 10175 10176 10177 10178 10179 10180 10181 10182 10183 10184 10185 10186 10187 10188 10189 10190 10191 10192 10193 10194 10195 10196 10197 10198 10199 10200 10201 10202 10203 10204 10205 10206 10207 10208 10209 10210 10211 10212 10213 10214 10215 10216 10217 10218 10219 10220 10221 10222 10223 10224 10225 10226 10227 10228 10229 10230 10231 10232 10233 10234 10235 10236 10237 10238 10239 10240 10241 10242 10243 10244 10245 10246 10247 10248 10249 10250 10251 10252 10253 10254 10255 10256 10257 10258 10259 10260 10261 10262 10263 10264 10265 10266 10267 10268 10269 10270 10271 10272 10273 10274 10275 10276 10277 10278 10279 10280 10281 10282 10283 10284 10285 10286 10287 10288 10289 10290 10291 10292 10293 10294 10295 10296 10297 10298 10299 10300 10301 10302 10303 10304 10305 10306 10307 10308 10309 10310 10311 10312 10313 10314 10315 10316 10317 10318 10319 10320 10321 10322 10323 10324 10325 10326 10327 10328 10329 10330 10331 10332 10333 10334 10335 10336 10337 10338 10339 10340 10341 10342 10343 10344 10345 10346 10347 10348 10349 10350 10351 10352 10353 10354 10355 10356 10357 10358 10359 10360 10361 10362 10363 10364 10365 10366 10367 10368 10369 10370 10371 10372 10373 10374 10375 10376 10377 10378 10379 10380 10381 10382 10383 10384 10385 10386 10387 10388 10389 10390 10391 10392 10393 10394 10395 10396 10397 10398 10399 10400 10401 10402 10403 10404 10405 10406 10407 10408 10409 10410 10411 10412 10413 10414 10415 10416 10417 10418 10419 10420 10421 10422 10423 10424 10425 10426 10427 10428 10429 10430 10431 10432 10433 10434 10435 10436 10437 10438 10439 10440 10441 10442 10443 10444 10445 10446 10447 10448 10449 10450 10451 10452 10453 10454 10455 10456 10457 10458 10459 10460 10461 10462 10463 10464 10465 10466 10467 10468 10469 10470 10471 10472 10473 10474 10475 10476 10477 10478 10479 10480 10481 10482 10483 10484 10485 10486 10487 10488 10489 10490 10491 10492 10493 10494 10495 10496 10497 10498 10499 10500 10501 10502 10503 10504 10505 10506 10507 10508 10509 10510 10511 10512 10513 10514 10515 10516 10517 10518 10519 10520 10521 10522 10523 10524 10525 10526 10527 10528 10529 10530 10531 10532 10533 10534 10535 10536 10537 10538 10539 10540 10541 10542 10543 10544 10545 10546 10547 10548 10549 10550 10551 10552 10553 10554 10555 10556 10557 10558 10559 10560 10561 10562 10563 10564 10565 10566 10567 10568 10569 10570 10571 10572 10573 10574 10575 10576 10577 10578 10579 10580 10581 10582 10583 10584 10585 10586 10587 10588 10589 10590 10591 10592 10593 10594 10595 10596 10597 10598 10599 10600 10601 10602 10603 10604 10605 10606 10607 10608 10609 10610 10611 10612 10613 10614 10615 10616 10617 10618 10619 10620 10621 10622 10623 10624 10625 10626 10627 10628 10629 10630 10631 10632 10633 10634 10635 10636 10637 10638 10639 10640 10641 10642 10643 10644 10645 10646 10647 10648 10649 10650 10651 10652 10653 10654 10655 10656 10657 10658 10659 10660 10661 10662 10663 10664 10665 10666 10667 10668 10669 10670 10671 10672 10673 10674 10675 10676 10677 10678 10679 10680 10681 10682 10683 10684 10685 10686 10687 10688 10689 10690 10691 10692 10693 10694 10695 10696 10697 10698 10699 10700 10701 10702 10703 10704 10705 10706 10707 10708 10709 10710 10711 10712 10713 10714 10715 10716 10717 10718 10719 10720 10721 10722 10723 10724 10725 10726 10727 10728 10729 10730 10731 10732 10733 10734 10735 10736 10737 10738 10739 10740 10741 10742 10743 10744 10745 10746 10747 10748 10749 10750 10751 10752 10753 10754 10755 10756 10757 10758 10759 10760 10761 10762 10763 10764 10765 10766 10767 10768 10769 10770 10771 10772 10773 10774 10775 10776 10777 10778 10779 10780 10781 10782 10783 10784 10785 10786 10787 10788 10789 10790 10791 10792 10793 10794 10795 10796 10797 10798 10799 10800 10801 10802 10803 10804 10805 10806 10807 10808 10809 10810 10811 10812 10813 10814 10815 10816 10817 10818 10819 10820 10821 10822 10823 10824 10825 10826 10827 10828 10829 10830 10831 10832 10833 10834 10835 10836 10837 10838 10839 10840 10841 10842 10843 10844 10845 10846 10847 10848 10849 10850 10851 10852 10853 10854 10855 10856 10857 10858 10859 10860 10861 10862 10863 10864 10865 10866 10867 10868 10869 10870 10871 10872 10873 10874 10875 10876 10877 10878 10879 10880 10881 10882 10883 10884 10885 10886 10887 10888 10889 10890 10891 10892 10893 10894 10895 10896 10897 10898 10899 10900 10901 10902 10903 10904 10905 10906 10907 10908 10909 10910 10911 10912 10913 10914 10915 10916 10917 10918 10919 10920 10921 10922 10923 10924 10925 10926 10927 10928 10929 10930 10931 10932 10933 10934 10935 10936 10937 10938 10939 10940 10941 10942 10943 10944 10945 10946 10947 10948 10949 10950 10951 10952 10953 10954 10955 10956 10957 10958 10959 10960 10961 10962 10963 10964 10965 10966 10967 10968 10969 10970 10971 10972 10973 10974 10975 10976 10977 10978 10979 10980 10981 10982 10983 10984 10985 10986 10987 10988 10989 10990 10991 10992 10993 10994 10995 10996 10997 10998 10999 11000 11001 11002 11003 11004 11005 11006 11007 11008 11009 11010 11011 11012 11013 11014 11015 11016 11017 11018 11019 11020 11021 11022 11023 11024 11025 11026 11027 11028 11029 11030 11031 11032 11033 11034 11035 11036 11037 11038 11039 11040 11041 11042 11043 11044 11045 11046 11047 11048 11049 11050 11051 11052 11053 11054 11055 11056 11057 11058 11059 11060 11061 11062 11063 11064 11065 11066 11067 11068 11069 11070 11071 11072 11073 11074 11075 11076 11077 11078 11079 11080 11081 11082 11083 11084 11085 11086 11087 11088 11089 11090 11091 11092 11093 11094 11095 11096 11097 11098 11099 11100 11101 11102 11103 11104 11105 11106 11107 11108 11109 11110 11111 11112 11113 11114 11115 11116 11117 11118 11119 11120 11121 11122 11123 11124 11125 11126 11127 11128 11129 11130 11131 11132 11133 11134 11135 11136 11137 11138 11139 11140 11141 11142 11143 11144 11145 11146 11147 11148 11149 11150 11151 11152 11153 11154 11155 11156 11157 11158 11159 11160 11161 11162 11163 11164 11165 11166 11167 11168 11169 11170 11171 11172 11173 11174 11175 11176 11177 11178 11179 11180 11181 11182 11183 11184 11185 11186 11187 11188 11189 11190 11191 11192 11193 11194 11195 11196 11197 11198 11199 11200 11201 11202 11203 11204 11205 11206 11207 11208 11209 11210 11211 11212 11213 11214 11215 11216 11217 11218 11219 11220 11221 11222 11223 11224 11225 11226 11227 11228 11229 11230 11231 11232 11233 11234 11235 11236 11237 11238 11239 11240 11241 11242 11243 11244 11245 11246 11247 11248 11249 11250 11251 11252 11253 11254 11255 11256 11257 11258 11259 11260 11261 11262 11263 11264 11265 11266 11267 11268 11269 11270 11271 11272 11273 11274 11275 11276 11277 11278 11279 11280 11281 11282 11283 11284 11285 11286 11287 11288 11289 11290 11291 11292 11293 11294 11295 11296 11297 11298 11299 11300 11301 11302 11303 11304 11305 11306 11307 11308 11309 11310 11311 11312 11313 11314 11315 11316 11317 11318 11319 11320 11321 11322 11323 11324 11325 11326 11327 11328 11329 11330 11331 11332 11333 11334 11335 11336 11337 11338 11339 11340 11341 11342 11343 11344 11345 11346 11347 11348 11349 11350 11351 11352 11353 11354 11355 11356 11357 11358 11359 11360 11361 11362 11363 11364 11365 11366 11367 11368 11369 11370 11371 11372 11373 11374 11375 11376 11377 11378 11379 11380 11381 11382 11383 11384 11385 11386 11387 11388 11389 11390 11391 11392 11393 11394 11395 11396 11397 11398 11399 11400 11401 11402 11403 11404 11405 11406 11407 11408 11409 11410 11411 11412 11413 11414 11415 11416 11417 11418 11419 11420 11421 11422 11423 11424 11425 11426 11427 11428 11429 11430 11431 11432 11433 11434 11435 11436 11437 11438 11439 11440 11441 11442 11443 11444 11445 11446 11447 11448 11449 11450 11451 11452 11453 11454 11455 11456 11457 11458 11459 11460 11461 11462 11463 11464 11465 11466 11467 11468 11469 11470 11471 11472 11473 11474 11475 11476 11477 11478 11479 11480 11481 11482 11483 11484 11485 11486 11487 11488 11489 11490 11491 11492 11493 11494 11495 11496 11497 11498 11499 11500 11501 11502 11503 11504 11505 11506 11507 11508 11509 11510 11511 11512 11513 11514 11515 11516 11517 11518 11519 11520 11521 11522 11523 11524 11525 11526 11527 11528 11529 11530 11531 11532 11533 11534 11535 11536 11537 11538 11539 11540 11541 11542 11543 11544 11545 11546 11547 11548 11549 11550 11551 11552 11553 11554 11555 11556 11557 11558 11559 11560 11561 11562 11563 11564 11565 11566 11567 11568 11569 11570 11571 11572 11573 11574 11575 11576 11577 11578 11579 11580 11581 11582 11583 11584 11585 11586 11587 11588 11589 11590 11591 11592 11593 11594 11595 11596 11597 11598 11599 11600 11601 11602 11603 11604 11605 11606 11607 11608 11609 11610 11611 11612 11613 11614 11615 11616 11617 11618 11619 11620 11621 11622 11623 11624 11625 11626 11627 11628 11629 11630 11631 11632 11633 11634 11635 11636 11637 11638 11639 11640 11641 11642 11643 11644 11645 11646 11647 11648 11649 11650 11651 11652 11653 11654 11655 11656 11657 11658 11659 11660 11661 11662 11663 11664 11665 11666 11667 11668 11669 11670 11671 11672 11673 11674 11675 11676 11677 11678 11679 11680 11681 11682 11683 11684 11685 11686 11687 11688 11689 11690 11691 11692 11693 11694 11695 11696 11697 11698 11699 11700 11701 11702 11703 11704 11705 11706 11707 11708 11709 11710 11711 11712 11713 11714 11715 11716 11717 11718 11719 11720 11721 11722 11723 11724 11725 11726 11727 11728 11729 11730 11731 11732 11733 11734 11735 11736 11737 11738 11739 11740 11741 11742 11743 11744 11745 11746 11747 11748 11749 11750 11751 11752 11753 11754 11755 11756 11757 11758 11759 11760 11761 11762 11763 11764 11765 11766 11767 11768 11769 11770 11771 11772 11773 11774 11775 11776 11777 11778 11779 11780 11781 11782 11783 11784 11785 11786 11787 11788 11789 11790 11791 11792 11793 11794 11795 11796 11797 11798 11799 11800 11801 11802 11803 11804 11805 11806 11807 11808 11809 11810 11811 11812 11813 11814 11815 11816 11817 11818 11819 11820 11821 11822 11823 11824 11825 11826 11827 11828 11829 11830 11831 11832 11833 11834 11835 11836 11837 11838 11839 11840 11841 11842 11843 11844 11845 11846 11847 11848 11849 11850 11851 11852 11853 11854 11855 11856 11857 11858 11859 11860 11861 11862 11863 11864 11865 11866 11867 11868 11869 11870 11871 11872 11873 11874 11875 11876 11877 11878 11879 11880 11881 11882 11883 11884 11885 11886 11887 11888 11889 11890 11891 11892 11893 11894 11895 11896 11897 11898 11899 11900 11901 11902 11903 11904 11905 11906 11907 11908 11909 11910 11911 11912 11913 11914 11915 11916 11917 11918 11919 11920 11921 11922 11923 11924 11925 11926 11927 11928 11929 11930 11931 11932 11933 11934 11935 11936 11937 11938 11939 11940 11941 11942 11943 11944 11945 11946 11947 11948 11949 11950 11951 11952 11953 11954 11955 11956 11957 11958 11959 11960 11961 11962 11963 11964 11965 11966 11967 11968 11969 11970 11971 11972 11973 11974 11975 11976 11977 11978 11979 11980 11981 11982 11983 11984 11985 11986 11987 11988 11989 11990 11991 11992 11993 11994 11995 11996 11997 11998 11999 12000 12001 12002 12003 12004 12005 12006 12007 12008 12009 12010 12011 12012 12013 12014 12015 12016 12017 12018 12019 12020 12021 12022 12023 12024 12025 12026 12027 12028 12029 12030 12031 12032 12033 12034 12035 12036 12037 12038 12039 12040 12041 12042 12043 12044 12045 12046 12047 12048 12049 12050 12051 12052 12053 12054 12055 12056 12057 12058 12059 12060 12061 12062 12063 12064 12065 12066 12067 12068 12069 12070 12071 12072 12073 12074 12075 12076 12077 12078 12079 12080 12081 12082 12083 12084 12085 12086 12087 12088 12089 12090 12091 12092 12093 12094 12095 12096 12097 12098 12099 12100 12101 12102 12103 12104 12105 12106 12107 12108 12109 12110 12111 12112 12113 12114 12115 12116 12117 12118 12119 12120 12121 12122 12123 12124 12125 12126 12127 12128 12129 12130 12131 12132 12133 12134 12135 12136 12137 12138 12139 12140 12141 12142 12143 12144 12145 12146 12147 12148 12149 12150 12151 12152 12153 12154 12155 12156 12157 12158 12159 12160 12161 12162 12163 12164 12165 12166 12167 12168 12169 12170 12171 12172 12173 12174 12175 12176 12177 12178 12179 12180 12181 12182 12183 12184 12185 12186 12187 12188 12189 12190 12191 12192 12193 12194 12195 12196 12197 12198 12199 12200 12201 12202 12203 12204 12205 12206 12207 12208 12209 12210 12211 12212 12213 12214 12215 12216 12217 12218 12219 12220 12221 12222 12223 12224 12225 12226 12227 12228 12229 12230 12231 12232 12233 12234 12235 12236 12237 12238 12239 12240 12241 12242 12243 12244 12245 12246 12247 12248 12249 12250 12251 12252 12253 12254 12255 12256 12257 12258 12259 12260 12261 12262 12263 12264 12265 12266 12267 12268 12269 12270 12271 12272 12273 12274 12275 12276 12277 12278 12279 12280 12281 12282 12283 12284 12285 12286 12287 12288 12289 12290 12291 12292 12293 12294 12295 12296 12297 12298 12299 12300 12301 12302 12303 12304 12305 12306 12307 12308 12309 12310 12311 12312 12313 12314 12315 12316 12317 12318 12319 12320 12321 12322 12323 12324 12325 12326 12327 12328 12329 12330 12331 12332 12333 12334 12335 12336 12337 12338 12339 12340 12341 12342 12343 12344 12345 12346 12347 12348 12349 12350 12351 12352 12353 12354 12355 12356 12357 12358 12359 12360 12361 12362 12363 12364 12365 12366 12367 12368 12369 12370 12371 12372 12373 12374 12375 12376 12377 12378 12379 12380 12381 12382 12383 12384 12385 12386 12387 12388 12389 12390 12391 12392 12393 12394 12395 12396 12397 12398 12399 12400 12401 12402 12403 12404 12405 12406 12407 12408 12409 12410 12411 12412 12413 12414 12415 12416 12417 12418 12419 12420 12421 12422 12423 12424 12425 12426 12427 12428 12429 12430 12431 12432 12433 12434 12435 12436 12437 12438 12439 12440 12441 12442 12443 12444 12445 12446 12447 12448 12449 12450 12451 12452 12453 12454 12455 12456 12457 12458 12459 12460 12461 12462 12463 12464 12465 12466 12467 12468 12469 12470 12471 12472 12473 12474 12475 12476 12477 12478 12479 12480 12481 12482 12483 12484 12485 12486 12487 12488 12489 12490 12491 12492 12493 12494 12495 12496 12497 12498 12499 12500 12501 12502 12503 12504 12505 12506 12507 12508 12509 12510 12511 12512 12513 12514 12515 12516 12517 12518 12519 12520 12521 12522 12523 12524 12525 12526 12527 12528 12529 12530 12531 12532 12533 12534 12535 12536 12537 12538 12539 12540 12541 12542 12543 12544 12545 12546 12547 12548 12549 12550 12551 12552 12553 12554 12555 12556 12557 12558 12559 12560 12561 12562 12563 12564 12565 12566 12567 12568 12569 12570 12571 12572 12573 12574 12575 12576 12577 12578 12579 12580 12581 12582 12583 12584 12585 12586 12587 12588 12589 12590 12591 12592 12593 12594 12595 12596 12597 12598 12599 12600 12601 12602 12603 12604 12605 12606 12607 12608 12609 12610 12611 12612 12613 12614 12615 12616 12617 12618 12619 12620 12621 12622 12623 12624 12625 12626 12627 12628 12629 12630 12631 12632 12633 12634 12635 12636 12637 12638 12639 12640 12641 12642 12643 12644 12645 12646 12647 12648 12649 12650 12651 12652 12653 12654 12655 12656 12657 12658 12659 12660 12661 12662 12663 12664 12665 12666 12667 12668 12669 12670 12671 12672 12673 12674 12675 12676 12677 12678 12679 12680 12681 12682 12683 12684 12685 12686 12687 12688 12689 12690 12691 12692 12693 12694 12695 12696 12697 12698 12699 12700 12701 12702 12703 12704 12705 12706 12707 12708 12709 12710 12711 12712 12713 12714 12715 12716 12717 12718 12719 12720 12721 12722 12723 12724 12725 12726 12727 12728 12729 12730 12731 12732 12733 12734 12735 12736 12737 12738 12739 12740 12741 12742 12743 12744 12745 12746 12747 12748 12749 12750 12751 12752 12753 12754 12755 12756 12757 12758 12759 12760 12761 12762 12763 12764 12765 12766 12767 12768 12769 12770 12771 12772 12773 12774 12775 12776 12777 12778 12779 12780 12781 12782 12783 12784 12785 12786 12787 12788 12789 12790 12791 12792 12793 12794 12795 12796 12797 12798 12799 12800 12801 12802 12803 12804 12805 12806 12807 12808 12809 12810 12811 12812 12813 12814 12815 12816 12817 12818 12819 12820 12821 12822 12823 12824 12825 12826 12827 12828 12829 12830 12831 12832 12833 12834 12835 12836 12837 12838 12839 12840 12841 12842 12843 12844 12845 12846 12847 12848 12849 12850 12851 12852 12853 12854 12855 12856 12857 12858 12859 12860 12861 12862 12863 12864 12865 12866 12867 12868 12869 12870 12871 12872 12873 12874 12875 12876 12877 12878 12879 12880 12881 12882 12883 12884 12885 12886 12887 12888 12889 12890 12891 12892 12893 12894 12895 12896 12897 12898 12899 12900 12901 12902 12903 12904 12905 12906 12907 12908 12909 12910 12911 12912 12913 12914 12915 12916 12917 12918 12919 12920 12921 12922 12923 12924 12925 12926 12927 12928 12929 12930 12931 12932 12933 12934 12935 12936 12937 12938 12939 12940 12941 12942 12943 12944 12945 12946 12947 12948 12949 12950 12951 12952 12953 12954 12955 12956 12957 12958 12959 12960 12961 12962 12963 12964 12965 12966 12967 12968 12969 12970 12971 12972 12973 12974 12975 12976 12977 12978 12979 12980 12981 12982 12983 12984 12985 12986 12987 12988 12989 12990 12991 12992 12993 12994 12995 12996 12997 12998 12999 13000 13001 13002 13003 13004 13005 13006 13007 13008 13009 13010 13011 13012 13013 13014 13015 13016 13017 13018 13019 13020 13021 13022 13023 13024 13025 13026 13027 13028 13029 13030 13031 13032 13033 13034 13035 13036 13037 13038 13039 13040 13041 13042 13043 13044 13045 13046 13047 13048 13049 13050 13051 13052 13053 13054 13055 13056 13057 13058 13059 13060 13061 13062 13063 13064 13065 13066 13067 13068 13069 13070 13071 13072 13073 13074 13075 13076 13077 13078 13079 13080 13081 13082 13083 13084 13085 13086 13087 13088 13089 13090 13091 13092 13093 13094 13095 13096 13097 13098 13099 13100 13101 13102 13103 13104 13105 13106 13107 13108 13109 13110 13111 13112 13113 13114 13115 13116 13117 13118 13119 13120 13121 13122 13123 13124 13125 13126 13127 13128 13129 13130 13131 13132 13133 13134 13135 13136 13137 13138 13139 13140 13141 13142 13143 13144 13145 13146 13147 13148 13149 13150 13151 13152 13153 13154 13155 13156 13157 13158 13159 13160 13161 13162 13163 13164 13165 13166 13167 13168 13169 13170 13171 13172 13173 13174 13175 13176 13177 13178 13179 13180 13181 13182 13183 13184 13185 13186 13187 13188 13189 13190 13191 13192 13193 13194 13195 13196 13197 13198 13199 13200 13201 13202 13203 13204 13205 13206 13207 13208 13209 13210 13211 13212 13213 13214 13215 13216 13217 13218 13219 13220 13221 13222 13223 13224 13225 13226 13227 13228 13229 13230 13231 13232 13233 13234 13235 13236 13237 13238 13239 13240 13241 13242 13243 13244 13245 13246 13247 13248 13249 13250 13251 13252 13253 13254 13255 13256 13257 13258 13259 13260 13261 13262 13263 13264 13265 13266 13267 13268 13269 13270 13271 13272 13273 13274 13275 13276 13277 13278 13279 13280 13281 13282 13283 13284 13285 13286 13287 13288 13289 13290 13291 13292 13293 13294 13295 13296 13297 13298 13299 13300 13301 13302 13303 13304 13305 13306 13307 13308 13309 13310 13311 13312 13313 13314 13315 13316 13317 13318 13319 13320 13321 13322 13323 13324 13325 13326 13327 13328 13329 13330 13331 13332 13333 13334 13335 13336 13337 13338 13339 13340 13341 13342 13343 13344 13345 13346 13347 13348 13349 13350 13351 13352 13353 13354 13355 13356 13357 13358 13359 13360 13361 13362 13363 13364 13365 13366 13367 13368 13369 13370 13371 13372 13373 13374 13375 13376 13377 13378 13379 13380 13381 13382 13383 13384 13385 13386 13387 13388 13389 13390 13391 13392 13393 13394 13395 13396 13397 13398 13399 13400 13401 13402 13403 13404 13405 13406 13407 13408 13409 13410 13411 13412 13413 13414 13415 13416 13417 13418 13419 13420 13421 13422 13423 13424 13425 13426 13427 13428 13429 13430 13431 13432 13433 13434 13435 13436 13437 13438 13439 13440 13441 13442 13443 13444 13445 13446 13447 13448 13449 13450 13451 13452 13453 13454 13455 13456 13457 13458 13459 13460 13461 13462 13463 13464 13465 13466 13467 13468 13469 13470 13471 13472 13473 13474 13475 13476 13477 13478 13479 13480 13481 13482 13483 13484 13485 13486 13487 13488 13489 13490 13491 13492 13493 13494 13495 13496 13497 13498 13499 13500 13501 13502 13503 13504 13505 13506 13507 13508 13509 13510 13511 13512 13513 13514 13515 13516 13517 13518 13519 13520 13521 13522 13523 13524 13525 13526 13527 13528 13529 13530 13531 13532 13533 13534 13535 13536 13537 13538 13539 13540 13541 13542 13543 13544 13545 13546 13547 13548 13549 13550 13551 13552 13553 13554 13555 13556 13557 13558 13559 13560 13561 13562 13563 13564 13565 13566 13567 13568 13569 13570 13571 13572 13573 13574 13575 13576 13577 13578 13579 13580 13581 13582 13583 13584 13585 13586 13587 13588 13589 13590 13591 13592 13593 13594 13595 13596 13597 13598 13599 13600 13601 13602 13603 13604 13605 13606 13607 13608 13609 13610 13611 13612 13613 13614 13615 13616 13617 13618 13619 13620 13621 13622 13623 13624 13625 13626 13627 13628 13629 13630 13631 13632 13633 13634 13635 13636 13637 13638 13639 13640 13641 13642 13643 13644 13645 13646 13647 13648 13649 13650 13651 13652 13653 13654 13655 13656 13657 13658 13659 13660 13661 13662 13663 13664 13665 13666 13667 13668 13669 13670 13671 13672 13673 13674 13675 13676 13677 13678 13679 13680 13681 13682 13683 13684 13685 13686 13687 13688 13689 13690 13691 13692 13693 13694 13695 13696 13697 13698 13699 13700 13701 13702 13703 13704 13705 13706 13707 13708 13709 13710 13711 13712 13713 13714 13715 13716 13717 13718 13719 13720 13721 13722 13723 13724 13725 13726 13727 13728 13729 13730 13731 13732 13733 13734 13735 13736 13737 13738 13739 13740 13741 13742 13743 13744 13745 13746 13747 13748 13749 13750 13751 13752 13753 13754 13755 13756 13757 13758 13759 13760 13761 13762 13763 13764 13765 13766 13767 13768 13769 13770 13771 13772 13773 13774 13775 13776 13777 13778 13779 13780 13781 13782 13783 13784 13785 13786 13787 13788 13789 13790 13791 13792 13793 13794 13795 13796 13797 13798 13799 13800 13801 13802 13803 13804 13805 13806 13807 13808 13809 13810 13811 13812 13813 13814 13815 13816 13817 13818 13819 13820 13821 13822 13823 13824 13825 13826 13827 13828 13829 13830 13831 13832 13833 13834 13835 13836 13837 13838 13839 13840 13841 13842 13843 13844 13845 13846 13847 13848 13849 13850 13851 13852 13853 13854 13855 13856 13857 13858 13859 13860 13861 13862 13863 13864 13865 13866 13867 13868 13869 13870 13871 13872 13873 13874 13875 13876 13877 13878 13879 13880 13881 13882 13883 13884 13885 13886 13887 13888 13889 13890 13891 13892 13893 13894 13895 13896 13897 13898 13899 13900 13901 13902 13903 13904 13905 13906 13907 13908 13909 13910 13911 13912 13913 13914 13915 13916 13917 13918 13919 13920 13921 13922 13923 13924 13925 13926 13927 13928 13929 13930 13931 13932 13933 13934 13935 13936 13937 13938 13939 13940 13941 13942 13943 13944 13945 13946 13947 13948 13949 13950 13951 13952 13953 13954 13955 13956 13957 13958 13959 13960 13961 13962 13963 13964 13965 13966 13967 13968 13969 13970 13971 13972 13973 13974 13975 13976 13977 13978 13979 13980 13981 13982 13983 13984 13985 13986 13987 13988 13989 13990 13991 13992 13993 13994 13995 13996 13997 13998 13999 14000 14001 14002 14003 14004 14005 14006 14007 14008 14009 14010 14011 14012 14013 14014 14015 14016 14017 14018 14019 14020 14021 14022 14023 14024 14025 14026 14027 14028 14029 14030 14031 14032 14033 14034 14035 14036 14037 14038 14039 14040 14041 14042 14043 14044 14045 14046 14047 14048 14049 14050 14051 14052 14053 14054 14055 14056 14057 14058 14059 14060 14061 14062 14063 14064 14065 14066 14067 14068 14069 14070 14071 14072 14073 14074 14075 14076 14077 14078 14079 14080 14081 14082 14083 14084 14085 14086 14087 14088 14089 14090 14091 14092 14093 14094 14095 14096 14097 14098 14099 14100 14101 14102 14103 14104 14105 14106 14107 14108 14109 14110 14111 14112 14113 14114 14115 14116 14117 14118 14119 14120 14121 14122 14123 14124 14125 14126 14127 14128 14129 14130 14131 14132 14133 14134 14135 14136 14137 14138 14139 14140 14141 14142 14143 14144 14145 14146 14147 14148 14149 14150 14151 14152 14153 14154 14155 14156 14157 14158 14159 14160 14161 14162 14163 14164 14165 14166 14167 14168 14169 14170 14171 14172 14173 14174 14175 14176 14177 14178 14179 14180 14181 14182 14183 14184 14185 14186 14187 14188 14189 14190 14191 14192 14193 14194 14195 14196 14197 14198 14199 14200 14201 14202 14203 14204 14205 14206 14207 14208 14209 14210 14211 14212 14213 14214 14215 14216 14217 14218 14219 14220 14221 14222 14223 14224 14225 14226 14227 14228 14229 14230 14231 14232 14233 14234 14235 14236 14237 14238 14239 14240 14241 14242 14243 14244 14245 14246 14247 14248 14249 14250 14251 14252 14253 14254 14255 14256 14257 14258 14259 14260 14261 14262 14263 14264 14265 14266 14267 14268 14269 14270 14271 14272 14273 14274 14275 14276 14277 14278 14279 14280 14281 14282 14283 14284 14285 14286 14287 14288 14289 14290 14291 14292 14293 14294 14295 14296 14297 14298 14299 14300 14301 14302 14303 14304 14305 14306 14307 14308 14309 14310 14311 14312 14313 14314 14315 14316 14317 14318 14319 14320 14321 14322 14323 14324 14325 14326 14327 14328 14329 14330 14331 14332 14333 14334 14335 14336 14337 14338 14339 14340 14341 14342 14343 14344 14345 14346 14347 14348 14349 14350 14351 14352 14353 14354 14355 14356 14357 14358 14359 14360 14361 14362 14363 14364 14365 14366 14367 14368 14369 14370 14371 14372 14373 14374 14375 14376 14377 14378 14379 14380 14381 14382 14383 14384 14385 14386 14387 14388 14389 14390 14391 14392 14393 14394 14395 14396 14397 14398 14399 14400 14401 14402 14403 14404 14405 14406 14407 14408 14409 14410 14411 14412 14413 14414 14415 14416 14417 14418 14419 14420 14421 14422 14423 14424 14425 14426 14427 14428 14429 14430 14431 14432 14433 14434 14435 14436 14437 14438 14439 14440 14441 14442 14443 14444 14445 14446 14447 14448 14449 14450 14451 14452 14453 14454 14455 14456 14457 14458 14459 14460 14461 14462 14463 14464 14465 14466 14467 14468 14469 14470 14471 14472 14473 14474 14475 14476 14477 14478 14479 14480 14481 14482 14483 14484 14485 14486 14487 14488 14489 14490 14491 14492 14493 14494 14495 14496 14497 14498 14499 14500 14501 14502 14503 14504 14505 14506 14507 14508 14509 14510 14511 14512 14513 14514 14515 14516 14517 14518 14519 14520 14521 14522 14523 14524 14525 14526 14527 14528 14529 14530 14531 14532 14533 14534 14535 14536 14537 14538 14539 14540 14541 14542 14543 14544 14545 14546 14547 14548 14549 14550 14551 14552 14553 14554 14555 14556 14557 14558 14559 14560 14561 14562 14563 14564 14565 14566 14567 14568 14569 14570 14571 14572 14573 14574 14575 14576 14577 14578 14579 14580 14581 14582 14583 14584 14585 14586 14587 14588 14589 14590 14591 14592 14593 14594 14595 14596 14597 14598 14599 14600 14601 14602 14603 14604 14605 14606 14607 14608 14609 14610 14611 14612 14613 14614 14615 14616 14617 14618 14619 14620 14621 14622 14623 14624 14625 14626 14627 14628 14629 14630 14631 14632 14633 14634 14635 14636 14637 14638 14639 14640 14641 14642 14643 14644 14645 14646 14647 14648 14649 14650 14651 14652 14653 14654 14655 14656 14657 14658 14659 14660 14661 14662 14663 14664 14665 14666 14667 14668 14669 14670 14671 14672 14673 14674 14675 14676 14677 14678 14679 14680 14681 14682 14683 14684 14685 14686 14687 14688 14689 14690 14691 14692 14693 14694 14695 14696 14697 14698 14699 14700 14701 14702 14703 14704 14705 14706 14707 14708 14709 14710 14711 14712 14713 14714 14715 14716 14717 14718 14719 14720 14721 14722 14723 14724 14725 14726 14727 14728 14729 14730 14731 14732 14733 14734 14735 14736 14737 14738 14739 14740 14741 14742 14743 14744 14745 14746 14747 14748 14749 14750 14751 14752 14753 14754 14755 14756 14757 14758 14759 14760 14761 14762 14763 14764 14765 14766 14767 14768 14769 14770 14771 14772 14773 14774 14775 14776 14777 14778 14779 14780 14781 14782 14783 14784 14785 14786 14787 14788 14789 14790 14791 14792 14793 14794 14795 14796 14797 14798 14799 14800 14801 14802 14803 14804 14805 14806 14807 14808 14809 14810 14811 14812 14813 14814 14815 14816 14817 14818 14819 14820 14821 14822 14823 14824 14825 14826 14827 14828 14829 14830 14831 14832 14833 14834 14835 14836 14837 14838 14839 14840 14841 14842 14843 14844 14845 14846 14847 14848 14849 14850 14851 14852 14853 14854 14855 14856 14857 14858 14859 14860 14861 14862 14863 14864 14865 14866 14867 14868 14869 14870 14871 14872 14873 14874 14875 14876 14877 14878 14879 14880 14881 14882 14883 14884 14885 14886 14887 14888 14889 14890 14891 14892 14893 14894 14895 14896 14897 14898 14899 14900 14901 14902 14903 14904 14905 14906 14907 14908 14909 14910 14911 14912 14913 14914 14915 14916 14917 14918 14919 14920 14921 14922 14923 14924 14925 14926 14927 14928 14929 14930 14931 14932 14933 14934 14935 14936 14937 14938 14939 14940 14941 14942 14943 14944 14945 14946 14947 14948 14949 14950 14951 14952 14953 14954 14955 14956 14957 14958 14959 14960 14961 14962 14963 14964 14965 14966 14967 14968 14969 14970 14971 14972 14973 14974 14975 14976 14977 14978 14979 14980 14981 14982 14983 14984 14985 14986 14987 14988 14989 14990 14991 14992 14993 14994 14995 14996 14997 14998 14999 15000 15001 15002 15003 15004 15005 15006 15007 15008 15009 15010 15011 15012 15013 15014 15015 15016 15017 15018 15019 15020 15021 15022 15023 15024 15025 15026 15027 15028 15029 15030 15031 15032 15033 15034 15035 15036 15037 15038 15039 15040 15041 15042 15043 15044 15045 15046 15047 15048 15049 15050 15051 15052 15053 15054 15055 15056 15057 15058 15059 15060 15061 15062 15063 15064 15065 15066 15067 15068 15069 15070 15071 15072 15073 15074 15075 15076 15077 15078 15079 15080 15081 15082 15083 15084 15085 15086 15087 15088 15089 15090 15091 15092 15093 15094 15095 15096 15097 15098 15099 15100 15101 15102 15103 15104 15105 15106 15107 15108 15109 15110 15111 15112 15113 15114 15115 15116 15117 15118 15119 15120 15121 15122 15123 15124 15125 15126 15127 15128 15129 15130 15131 15132 15133 15134 15135 15136 15137 15138 15139 15140 15141 15142 15143 15144 15145 15146 15147 15148 15149 15150 15151 15152 15153 15154 15155 15156 15157 15158 15159 15160 15161 15162 15163 15164 15165 15166 15167 15168 15169 15170 15171 15172 15173 15174 15175 15176 15177 15178 15179 15180 15181 15182 15183 15184 15185 15186 15187 15188 15189 15190 15191 15192 15193 15194 15195 15196 15197 15198 15199 15200 15201 15202 15203 15204 15205 15206 15207 15208 15209 15210 15211 15212 15213 15214 15215 15216 15217 15218 15219 15220 15221 15222 15223 15224 15225 15226 15227 15228 15229 15230 15231 15232 15233 15234 15235 15236 15237 15238 15239 15240 15241 15242 15243 15244 15245 15246 15247 15248 15249 15250 15251 15252 15253 15254 15255 15256 15257 15258 15259 15260 15261 15262 15263 15264 15265 15266 15267 15268 15269 15270 15271 15272 15273 15274 15275 15276 15277 15278 15279 15280 15281 15282 15283 15284 15285 15286 15287 15288 15289 15290 15291 15292 15293 15294 15295 15296 15297 15298 15299 15300 15301 15302 15303 15304 15305 15306 15307 15308 15309 15310 15311 15312 15313 15314 15315 15316 15317 15318 15319 15320 15321 15322 15323 15324 15325 15326 15327 15328 15329 15330 15331 15332 15333 15334 15335 15336 15337 15338 15339 15340 15341 15342 15343 15344 15345 15346 15347 15348 15349 15350 15351 15352 15353 15354 15355 15356 15357 15358 15359 15360 15361 15362 15363 15364 15365 15366 15367 15368 15369 15370 15371 15372 15373 15374 15375 15376 15377 15378 15379 15380 15381 15382 15383 15384 15385 15386 15387 15388 15389 15390 15391 15392 15393 15394 15395 15396 15397 15398 15399 15400 15401 15402 15403 15404 15405 15406 15407 15408 15409 15410 15411 15412 15413 15414 15415 15416 15417 15418 15419 15420 15421 15422 15423 15424 15425 15426 15427 15428 15429 15430 15431 15432 15433 15434 15435 15436 15437 15438 15439 15440 15441 15442 15443 15444 15445 15446 15447 15448 15449 15450 15451 15452 15453 15454 15455 15456 15457 15458 15459 15460 15461 15462 15463 15464 15465 15466 15467 15468 15469 15470 15471 15472 15473 15474 15475 15476 15477 15478 15479 15480 15481 15482 15483 15484 15485 15486 15487 15488 15489 15490 15491 15492 15493 15494 15495 15496 15497 15498 15499 15500 15501 15502 15503 15504 15505 15506 15507 15508 15509 15510 15511 15512 15513 15514 15515 15516 15517 15518 15519 15520 15521 15522 15523 15524 15525 15526 15527 15528 15529 15530 15531 15532 15533 15534 15535 15536 15537 15538 15539 15540 15541 15542 15543 15544 15545 15546 15547 15548 15549 15550 15551 15552 15553 15554 15555 15556 15557 15558 15559 15560 15561 15562 15563 15564 15565 15566 15567 15568 15569 15570 15571 15572 15573 15574 15575 15576 15577 15578 15579 15580 15581 15582 15583 15584 15585 15586 15587 15588 15589 15590 15591 15592 15593 15594 15595 15596 15597 15598 15599 15600 15601 15602 15603 15604 15605 15606 15607 15608 15609 15610 15611 15612 15613 15614 15615 15616 15617 15618 15619 15620 15621 15622 15623 15624 15625 15626 15627 15628 15629 15630 15631 15632 15633 15634 15635 15636 15637 15638 15639 15640 15641 15642 15643 15644 15645 15646 15647 15648 15649 15650 15651 15652 15653 15654 15655 15656 15657 15658 15659 15660 15661 15662 15663 15664 15665 15666 15667 15668 15669 15670 15671 15672 15673 15674 15675 15676 15677 15678 15679 15680 15681 15682 15683 15684 15685 15686 15687 15688 15689 15690 15691 15692 15693 15694 15695 15696 15697 15698 15699 15700 15701 15702 15703 15704 15705 15706 15707 15708 15709 15710 15711 15712 15713 15714 15715 15716 15717 15718 15719 15720 15721 15722 15723 15724 15725 15726 15727 15728 15729 15730 15731 15732 15733 15734 15735 15736 15737 15738 15739 15740 15741 15742 15743 15744 15745 15746 15747 15748 15749 15750 15751 15752 15753 15754 15755 15756 15757 15758 15759 15760 15761 15762 15763 15764 15765 15766 15767 15768 15769 15770 15771 15772 15773 15774 15775 15776 15777 15778 15779 15780 15781 15782 15783 15784 15785 15786 15787 15788 15789 15790 15791 15792 15793 15794 15795 15796 15797 15798 15799 15800 15801 15802 15803 15804 15805 15806 15807 15808 15809 15810 15811 15812 15813 15814 15815 15816 15817 15818 15819 15820 15821 15822 15823 15824 15825 15826 15827 15828 15829 15830 15831 15832 15833 15834 15835 15836 15837 15838 15839 15840 15841 15842 15843 15844 15845 15846 15847 15848 15849 15850 15851 15852 15853 15854 15855 15856 15857 15858 15859 15860 15861 15862 15863 15864 15865 15866 15867 15868 15869 15870 15871 15872 15873 15874 15875 15876 15877 15878 15879 15880 15881 15882 15883 15884 15885 15886 15887 15888 15889 15890 15891 15892 15893 15894 15895 15896 15897 15898 15899 15900 15901 15902 15903 15904 15905 15906 15907 15908 15909 15910 15911 15912 15913 15914 15915 15916 15917 15918 15919 15920 15921 15922 15923 15924 15925 15926 15927 15928 15929 15930 15931 15932 15933 15934 15935 15936 15937 15938 15939 15940 15941 15942 15943 15944 15945 15946 15947 15948 15949 15950 15951 15952 15953 15954 15955 15956 15957 15958 15959 15960 15961 15962 15963 15964 15965 15966 15967 15968 15969 15970 15971 15972 15973 15974 15975 15976 15977 15978 15979 15980 15981 15982 15983 15984 15985 15986 15987 15988 15989 15990 15991 15992 15993 15994 15995 15996 15997 15998 15999 16000 16001 16002 16003 16004 16005 16006 16007 16008 16009 16010 16011 16012 16013 16014 16015 16016 16017 16018 16019 16020 16021 16022 16023 16024 16025 16026 16027 16028 16029 16030 16031 16032 16033 16034 16035 16036 16037 16038 16039 16040 16041 16042 16043 16044 16045 16046 16047 16048 16049 16050 16051 16052 16053 16054 16055 16056 16057 16058 16059 16060 16061 16062 16063 16064 16065 16066 16067 16068 16069 16070 16071 16072 16073 16074 16075 16076 16077 16078 16079 16080 16081 16082 16083 16084 16085 16086 16087 16088 16089 16090 16091 16092 16093 16094 16095 16096 16097 16098 16099 16100 16101 16102 16103 16104 16105 16106 16107 16108 16109 16110 16111 16112 16113 16114 16115 16116 16117 16118 16119 16120 16121 16122 16123 16124 16125 16126 16127 16128 16129 16130 16131 16132 16133 16134 16135 16136 16137 16138 16139 16140 16141 16142 16143 16144 16145 16146 16147 16148 16149 16150 16151 16152 16153 16154 16155 16156 16157 16158 16159 16160 16161 16162 16163 16164 16165 16166 16167 16168 16169 16170 16171 16172 16173 16174 16175 16176 16177 16178 16179 16180 16181 16182 16183 16184 16185 16186 16187 16188 16189 16190 16191 16192 16193 16194 16195 16196 16197 16198 16199 16200 16201 16202 16203 16204 16205 16206 16207 16208 16209 16210 16211 16212 16213 16214 16215 16216 16217 16218 16219 16220 16221 16222 16223 16224 16225 16226 16227 16228 16229 16230 16231 16232 16233 16234 16235 16236 16237 16238 16239 16240 16241 16242 16243 16244 16245 16246 16247 16248 16249 16250 16251 16252 16253 16254 16255 16256 16257 16258 16259 16260 16261 16262 16263 16264 16265 16266 16267 16268 16269 16270 16271 16272 16273 16274 16275 16276 16277 16278 16279 16280 16281 16282 16283 16284 16285 16286 16287 16288 16289 16290 16291 16292 16293 16294 16295 16296 16297 16298 16299 16300 16301 16302 16303 16304 16305 16306 16307 16308 16309 16310 16311 16312 16313 16314 16315 16316 16317 16318 16319 16320 16321 16322 16323 16324 16325 16326 16327 16328 16329 16330 16331 16332 16333 16334 16335 16336 16337 16338 16339 16340 16341 16342 16343 16344 16345 16346 16347 16348 16349 16350 16351 16352 16353 16354 16355 16356 16357 16358 16359 16360 16361 16362 16363 16364 16365 16366 16367 16368 16369 16370 16371 16372 16373 16374 16375 16376 16377 16378 16379 16380 16381 16382 16383 16384 16385 16386 16387 16388 16389 16390 16391 16392 16393 16394 16395 16396 16397 16398 16399 16400 16401 16402 16403 16404 16405 16406 16407 16408 16409 16410 16411 16412 16413 16414 16415 16416 16417 16418 16419 16420 16421 16422 16423 16424 16425 16426 16427 16428 16429 16430 16431 16432 16433 16434 16435 16436 16437 16438 16439 16440 16441 16442 16443 16444 16445 16446 16447 16448 16449 16450 16451 16452 16453 16454 16455 16456 16457 16458 16459 16460 16461 16462 16463 16464 16465 16466 16467 16468 16469 16470 16471 16472 16473 16474 16475 16476 16477 16478 16479 16480 16481 16482 16483 16484 16485 16486 16487 16488 16489 16490 16491 16492 16493 16494 16495 16496 16497 16498 16499 16500 16501 16502 16503 16504 16505 16506 16507 16508 16509 16510 16511 16512 16513 16514 16515 16516 16517 16518 16519 16520 16521 16522 16523 16524 16525 16526 16527 16528 16529 16530 16531 16532 16533 16534 16535 16536 16537 16538 16539 16540 16541 16542 16543 16544 16545 16546 16547 16548 16549 16550 16551 16552 16553 16554 16555 16556 16557 16558 16559 16560 16561 16562 16563 16564 16565 16566 16567 16568 16569 16570 16571 16572 16573 16574 16575 16576 16577 16578 16579 16580 16581 16582 16583 16584 16585 16586 16587 16588 16589 16590 16591 16592 16593 16594 16595 16596 16597 16598 16599 16600 16601 16602 16603 16604 16605 16606 16607 16608 16609 16610 16611 16612 16613 16614 16615 16616 16617 16618 16619 16620 16621 16622 16623 16624 16625 16626 16627 16628 16629 16630 16631 16632 16633 16634 16635 16636 16637 16638 16639 16640 16641 16642 16643 16644 16645 16646 16647 16648 16649 16650 16651 16652 16653 16654 16655 16656 16657 16658 16659 16660 16661 16662 16663 16664 16665 16666 16667 16668 16669 16670 16671 16672 16673 16674 16675 16676 16677 16678 16679 16680 16681 16682 16683 16684 16685 16686 16687 16688 16689 16690 16691 16692 16693 16694 16695 16696 16697 16698 16699 16700 16701 16702 16703 16704 16705 16706 16707 16708 16709 16710 16711 16712 16713 16714 16715 16716 16717 16718 16719 16720 16721 16722 16723 16724 16725 16726 16727 16728 16729 16730 16731 16732 16733 16734 16735 16736 16737 16738 16739 16740 16741 16742 16743 16744 16745 16746 16747 16748 16749 16750 16751 16752 16753 16754 16755 16756 16757 16758 16759 16760 16761 16762 16763 16764 16765 16766 16767 16768 16769 16770 16771 16772 16773 16774 16775 16776 16777 16778 16779 16780 16781 16782 16783 16784 16785 16786 16787 16788 16789 16790 16791 16792 16793 16794 16795 16796 16797 16798 16799 16800 16801 16802 16803 16804 16805 16806 16807 16808 16809 16810 16811 16812 16813 16814 16815 16816 16817 16818 16819 16820 16821 16822 16823 16824 16825 16826 16827 16828 16829 16830 16831 16832 16833 16834 16835 16836 16837 16838 16839 16840 16841 16842 16843 16844 16845 16846 16847 16848 16849 16850 16851 16852 16853 16854 16855 16856 16857 16858 16859 16860 16861 16862 16863 16864 16865 16866 16867 16868 16869 16870 16871 16872 16873 16874 16875 16876 16877 16878 16879 16880 16881 16882 16883 16884 16885 16886 16887 16888 16889 16890 16891 16892 16893 16894 16895 16896 16897 16898 16899 16900 16901 16902 16903 16904 16905 16906 16907 16908 16909 16910 16911 16912 16913 16914 16915 16916 16917 16918 16919 16920 16921 16922 16923 16924 16925 16926 16927 16928 16929 16930 16931 16932 16933 16934 16935 16936 16937 16938 16939 16940 16941 16942 16943 16944 16945 16946 16947 16948 16949 16950 16951 16952 16953 16954 16955 16956 16957 16958 16959 16960 16961 16962 16963 16964 16965 16966 16967 16968 16969 16970 16971 16972 16973 16974 16975 16976 16977 16978 16979 16980 16981 16982 16983 16984 16985 16986 16987 16988 16989 16990 16991 16992 16993 16994 16995 16996 16997 16998 16999 17000 17001 17002 17003 17004 17005 17006 17007 17008 17009 17010 17011 17012 17013 17014 17015 17016 17017 17018 17019 17020 17021 17022 17023 17024 17025 17026 17027 17028 17029 17030 17031 17032 17033 17034 17035 17036 17037 17038 17039 17040 17041 17042 17043 17044 17045 17046 17047 17048 17049 17050 17051 17052 17053 17054 17055 17056 17057 17058 17059 17060 17061 17062 17063 17064 17065 17066 17067 17068 17069 17070 17071 17072 17073 17074 17075 17076 17077 17078 17079 17080 17081 17082 17083 17084 17085 17086 17087 17088 17089 17090 17091 17092 17093 17094 17095 17096 17097 17098 17099 17100 17101 17102 17103 17104 17105 17106 17107 17108 17109 17110 17111 17112 17113 17114 17115 17116 17117 17118 17119 17120 17121 17122 17123 17124 17125 17126 17127 17128 17129 17130 17131 17132 17133 17134 17135 17136 17137 17138 17139 17140 17141 17142 17143 17144 17145 17146 17147 17148 17149 17150 17151 17152 17153 17154 17155 17156 17157 17158 17159 17160 17161 17162 17163 17164 17165 17166 17167 17168 17169 17170 17171 17172 17173 17174 17175 17176 17177 17178 17179 17180 17181 17182 17183 17184 17185 17186 17187 17188 17189 17190 17191 17192 17193 17194 17195 17196 17197 17198 17199 17200 17201 17202 17203 17204 17205 17206 17207 17208 17209 17210 17211 17212 17213 17214 17215 17216 17217 17218 17219 17220 17221 17222 17223 17224 17225 17226 17227 17228 17229 17230 17231 17232 17233 17234 17235 17236 17237 17238 17239 17240 17241 17242 17243 17244 17245 17246 17247 17248 17249 17250 17251 17252 17253 17254 17255 17256 17257 17258 17259 17260 17261 17262 17263 17264 17265 17266 17267 17268 17269 17270 17271 17272 17273 17274 17275 17276 17277 17278 17279 17280 17281 17282 17283 17284 17285 17286 17287 17288 17289 17290 17291 17292 17293 17294 17295 17296 17297 17298 17299 17300 17301 17302 17303 17304 17305 17306 17307 17308 17309 17310 17311 17312 17313 17314 17315 17316 17317 17318 17319 17320 17321 17322 17323 17324 17325 17326 17327 17328 17329 17330 17331 17332 17333 17334 17335 17336 17337 17338 17339 17340 17341 17342 17343 17344 17345 17346 17347 17348 17349 17350 17351 17352 17353 17354 17355 17356 17357 17358 17359 17360 17361 17362 17363 17364 17365 17366 17367 17368 17369 17370 17371 17372 17373 17374 17375 17376 17377 17378 17379 17380 17381 17382 17383 17384 17385 17386 17387 17388 17389 17390 17391 17392 17393 17394 17395 17396 17397 17398 17399 17400 17401 17402 17403 17404 17405 17406 17407 17408 17409 17410 17411 17412 17413 17414 17415 17416 17417 17418 17419 17420 17421 17422 17423 17424 17425 17426 17427 17428 17429 17430 17431 17432 17433 17434 17435 17436 17437 17438 17439 17440 17441 17442 17443 17444 17445 17446 17447 17448 17449 17450 17451 17452 17453 17454 17455 17456 17457 17458 17459 17460 17461 17462 17463 17464 17465 17466 17467 17468 17469 17470 17471 17472 17473 17474 17475 17476 17477 17478 17479 17480 17481 17482 17483 17484 17485 17486 17487 17488 17489 17490 17491 17492 17493 17494 17495 17496 17497 17498 17499 17500 17501 17502 17503 17504 17505 17506 17507 17508 17509 17510 17511 17512 17513 17514 17515 17516 17517 17518 17519 17520 17521 17522 17523 17524 17525 17526 17527 17528 17529 17530 17531 17532 17533 17534 17535 17536 17537 17538 17539 17540 17541 17542 17543 17544 17545 17546 17547 17548 17549 17550 17551 17552 17553 17554 17555 17556 17557 17558 17559 17560 17561 17562 17563 17564 17565 17566 17567 17568 17569 17570 17571 17572 17573 17574 17575 17576 17577 17578 17579 17580 17581 17582 17583 17584 17585 17586 17587 17588 17589 17590 17591 17592 17593 17594 17595 17596 17597 17598 17599 17600 17601 17602 17603 17604 17605 17606 17607 17608 17609 17610 17611 17612 17613 17614 17615 17616 17617 17618 17619 17620 17621 17622 17623 17624 17625 17626 17627 17628 17629 17630 17631 17632 17633 17634 17635 17636 17637 17638 17639 17640 17641 17642 17643 17644 17645 17646 17647 17648 17649 17650 17651 17652 17653 17654 17655 17656 17657 17658 17659 17660 17661 17662 17663 17664 17665 17666 17667 17668 17669 17670 17671 17672 17673 17674 17675 17676 17677 17678 17679 17680 17681 17682 17683 17684 17685 17686 17687 17688 17689 17690 17691 17692 17693 17694 17695 17696 17697 17698 17699 17700 17701 17702 17703 17704 17705 17706 17707 17708 17709 17710 17711 17712 17713 17714 17715 17716 17717 17718 17719 17720 17721 17722 17723 17724 17725 17726 17727 17728 17729 17730 17731 17732 17733 17734 17735 17736 17737 17738 17739 17740 17741 17742 17743 17744 17745 17746 17747 17748 17749 17750 17751 17752 17753 17754 17755 17756 17757 17758 17759 17760 17761 17762 17763 17764 17765 17766 17767 17768 17769 17770 17771 17772 17773 17774 17775 17776 17777 17778 17779 17780 17781 17782 17783 17784 17785 17786 17787 17788 17789 17790 17791 17792 17793 17794 17795 17796 17797 17798 17799 17800 17801 17802 17803 17804 17805 17806 17807 17808 17809 17810 17811 17812 17813 17814 17815 17816 17817 17818 17819 17820 17821 17822 17823 17824 17825 17826 17827 17828 17829 17830 17831 17832 17833 17834 17835 17836 17837 17838 17839 17840 17841 17842 17843 17844 17845 17846 17847 17848 17849 17850 17851 17852 17853 17854 17855 17856 17857 17858 17859 17860 17861 17862 17863 17864 17865 17866 17867 17868 17869 17870 17871 17872 17873 17874 17875 17876 17877 17878 17879 17880 17881 17882 17883 17884 17885 17886 17887 17888 17889 17890 17891 17892 17893 17894 17895 17896 17897 17898 17899 17900 17901 17902 17903 17904 17905 17906 17907 17908 17909 17910 17911 17912 17913 17914 17915 17916 17917 17918 17919 17920 17921 17922 17923 17924 17925 17926 17927 17928 17929 17930 17931 17932 17933 17934 17935 17936 17937 17938 17939 17940 17941 17942 17943 17944 17945 17946 17947 17948 17949 17950 17951 17952 17953 17954 17955 17956 17957 17958 17959 17960 17961 17962 17963 17964 17965 17966 17967 17968 17969 17970 17971 17972 17973 17974 17975 17976 17977 17978 17979 17980 17981 17982 17983 17984 17985 17986 17987 17988 17989 17990 17991 17992 17993 17994 17995 17996 17997 17998 17999 18000 18001 18002 18003 18004 18005 18006 18007 18008 18009 18010 18011 18012 18013 18014 18015 18016 18017 18018 18019 18020 18021 18022 18023 18024 18025 18026 18027 18028 18029 18030 18031 18032 18033 18034 18035 18036 18037 18038 18039 18040 18041 18042 18043 18044 18045 18046 18047 18048 18049 18050 18051 18052 18053 18054 18055 18056 18057 18058 18059 18060 18061 18062 18063 18064 18065 18066 18067 18068 18069 18070 18071 18072 18073 18074 18075 18076 18077 18078 18079 18080 18081 18082 18083 18084 18085 18086 18087 18088 18089 18090 18091 18092 18093 18094 18095 18096 18097 18098 18099 18100 18101 18102 18103 18104 18105 18106 18107 18108 18109 18110 18111 18112 18113 18114 18115 18116 18117 18118 18119 18120 18121 18122 18123 18124 18125 18126 18127 18128 18129 18130 18131 18132 18133 18134 18135 18136 18137 18138 18139 18140 18141 18142 18143 18144 18145 18146 18147 18148 18149 18150 18151 18152 18153 18154 18155 18156 18157 18158 18159 18160 18161 18162 18163 18164 18165 18166 18167 18168 18169 18170 18171 18172 18173 18174 18175 18176 18177 18178 18179 18180 18181 18182 18183 18184 18185 18186 18187 18188 18189 18190 18191 18192 18193 18194 18195 18196 18197 18198 18199 18200 18201 18202 18203 18204 18205 18206 18207 18208 18209 18210 18211 18212 18213 18214 18215 18216 18217 18218 18219 18220 18221 18222 18223 18224 18225 18226 18227 18228 18229 18230 18231 18232 18233 18234 18235 18236 18237 18238 18239 18240 18241 18242 18243 18244 18245 18246 18247 18248 18249 18250 18251 18252 18253 18254 18255 18256 18257 18258 18259 18260 18261 18262 18263 18264 18265 18266 18267 18268 18269 18270 18271 18272 18273 18274 18275 18276 18277 18278 18279 18280 18281 18282 18283 18284 18285 18286 18287 18288 18289 18290 18291 18292 18293 18294 18295 18296 18297 18298 18299 18300 18301 18302 18303 18304 18305 18306 18307 18308 18309 18310 18311 18312 18313 18314 18315 18316 18317 18318 18319 18320 18321 18322 18323 18324 18325 18326 18327 18328 18329 18330 18331 18332 18333 18334 18335 18336 18337 18338 18339 18340 18341 18342 18343 18344 18345 18346 18347 18348 18349 18350 18351 18352 18353 18354 18355 18356 18357 18358 18359 18360 18361 18362 18363 18364 18365 18366 18367 18368 18369 18370 18371 18372 18373 18374 18375 18376 18377 18378 18379 18380 18381 18382 18383 18384 18385 18386 18387 18388 18389 18390 18391 18392 18393 18394 18395 18396 18397 18398 18399 18400 18401 18402 18403 18404 18405 18406 18407 18408 18409 18410 18411 18412 18413 18414 18415 18416 18417 18418 18419 18420 18421 18422 18423 18424 18425 18426 18427 18428 18429 18430 18431 18432 18433 18434 18435 18436 18437 18438 18439 18440 18441 18442 18443 18444 18445 18446 18447 18448 18449 18450 18451 18452 18453 18454 18455 18456 18457 18458 18459 18460 18461 18462 18463 18464 18465 18466 18467 18468 18469 18470 18471 18472 18473 18474 18475 18476 18477 18478 18479 18480 18481 18482 18483 18484 18485 18486 18487 18488 18489 18490 18491 18492 18493 18494 18495 18496 18497 18498 18499 18500 18501 18502 18503 18504 18505 18506 18507 18508 18509 18510 18511 18512 18513 18514 18515 18516 18517 18518 18519 18520 18521 18522 18523 18524 18525 18526 18527 18528 18529 18530 18531 18532 18533 18534 18535 18536 18537 18538 18539 18540 18541 18542 18543 18544 18545 18546 18547 18548 18549 18550 18551 18552 18553 18554 18555 18556 18557 18558 18559 18560 18561 18562 18563 18564 18565 18566 18567 18568 18569 18570 18571 18572 18573 18574 18575 18576 18577 18578 18579 18580 18581 18582 18583 18584 18585 18586 18587 18588 18589 18590 18591 18592 18593 18594 18595 18596 18597 18598 18599 18600 18601 18602 18603 18604 18605 18606 18607 18608 18609 18610 18611 18612 18613 18614 18615 18616 18617 18618 18619 18620 18621 18622 18623 18624 18625 18626 18627 18628 18629 18630 18631 18632 18633 18634 18635 18636 18637 18638 18639 18640 18641 18642 18643 18644 18645 18646 18647 18648 18649 18650 18651 18652 18653 18654 18655 18656 18657 18658 18659 18660 18661 18662 18663 18664 18665 18666 18667 18668 18669 18670 18671 18672 18673 18674 18675 18676 18677 18678 18679 18680 18681 18682 18683 18684 18685 18686 18687 18688 18689 18690 18691 18692 18693 18694 18695 18696 18697 18698 18699 18700 18701 18702 18703 18704 18705 18706 18707 18708 18709 18710 18711 18712 18713 18714 18715 18716 18717 18718 18719 18720 18721 18722 18723 18724 18725 18726 18727 18728 18729 18730 18731 18732 18733 18734 18735 18736 18737 18738 18739 18740 18741 18742 18743 18744 18745 18746 18747 18748 18749 18750 18751 18752 18753 18754 18755 18756 18757 18758 18759 18760 18761 18762 18763 18764 18765 18766 18767 18768 18769 18770 18771 18772 18773 18774 18775 18776 18777 18778 18779 18780 18781 18782 18783 18784 18785 18786 18787 18788 18789 18790 18791 18792 18793 18794 18795 18796 18797 18798 18799 18800 18801 18802 18803 18804 18805 18806 18807 18808 18809 18810 18811 18812 18813 18814 18815 18816 18817 18818 18819 18820 18821 18822 18823 18824 18825 18826 18827 18828 18829 18830 18831 18832 18833 18834 18835 18836 18837 18838 18839 18840 18841 18842 18843 18844 18845 18846 18847 18848 18849 18850 18851 18852 18853 18854 18855 18856 18857 18858 18859 18860 18861 18862 18863 18864 18865 18866 18867 18868 18869 18870 18871 18872 18873 18874 18875 18876 18877 18878 18879 18880 18881 18882 18883 18884 18885 18886 18887 18888 18889 18890 18891 18892 18893 18894 18895 18896 18897 18898 18899 18900 18901 18902 18903 18904 18905 18906 18907 18908 18909 18910 18911 18912 18913 18914 18915 18916 18917 18918 18919 18920 18921 18922 18923 18924 18925 18926 18927 18928 18929 18930 18931 18932 18933 18934 18935 18936 18937 18938 18939 18940 18941 18942 18943 18944 18945 18946 18947 18948 18949 18950 18951 18952 18953 18954 18955 18956 18957 18958 18959 18960 18961 18962 18963 18964 18965 18966 18967 18968 18969 18970 18971 18972 18973 18974 18975 18976 18977 18978 18979 18980 18981 18982 18983 18984 18985 18986 18987 18988 18989 18990 18991 18992 18993 18994 18995 18996 18997 18998 18999 19000 19001 19002 19003 19004 19005 19006 19007 19008 19009 19010 19011 19012 19013 19014 19015 19016 19017 19018 19019 19020 19021 19022 19023 19024 19025 19026 19027 19028 19029 19030 19031 19032 19033 19034 19035 19036 19037 19038 19039 19040 19041 19042 19043 19044 19045 19046 19047 19048 19049 19050 19051 19052 19053 19054 19055 19056 19057 19058 19059 19060 19061 19062 19063 19064 19065 19066 19067 19068 19069 19070 19071 19072 19073 19074 19075 19076 19077 19078 19079 19080 19081 19082 19083 19084 19085 19086 19087 19088 19089 19090 19091 19092 19093 19094 19095 19096 19097 19098 19099 19100 19101 19102 19103 19104 19105 19106 19107 19108 19109 19110 19111 19112 19113 19114 19115 19116 19117 19118 19119 19120 19121 19122 19123 19124 19125 19126 19127 19128 19129 19130 19131 19132 19133 19134 19135 19136 19137 19138 19139 19140 19141 19142 19143 19144 19145 19146 19147 19148 19149 19150 19151 19152 19153 19154 19155 19156 19157 19158 19159 19160 19161 19162 19163 19164 19165 19166 19167 19168 19169 19170 19171 19172 19173 19174 19175 19176 19177 19178 19179 19180 19181 19182 19183 19184 19185 19186 19187 19188 19189 19190 19191 19192 19193 19194 19195 19196 19197 19198 19199 19200 19201 19202 19203 19204 19205 19206 19207 19208 19209 19210 19211 19212 19213 19214 19215 19216 19217 19218 19219 19220 19221 19222 19223 19224 19225 19226 19227 19228 19229 19230 19231 19232 19233 19234 19235 19236 19237 19238 19239 19240 19241 19242 19243 19244 19245 19246 19247 19248 19249 19250 19251 19252 19253 19254 19255 19256 19257 19258 19259 19260 19261 19262 19263 19264 19265 19266 19267 19268 19269 19270 19271 19272 19273 19274 19275 19276 19277 19278 19279 19280 19281 19282 19283 19284 19285 19286 19287 19288 19289 19290 19291 19292 19293 19294 19295 19296 19297 19298 19299 19300 19301 19302 19303 19304 19305 19306 19307 19308 19309 19310 19311 19312 19313 19314 19315 19316 19317 19318 19319 19320 19321 19322 19323 19324 19325 19326 19327 19328 19329 19330 19331 19332 19333 19334 19335 19336 19337 19338 19339 19340 19341 19342 19343 19344 19345 19346 19347 19348 19349 19350 19351 19352 19353 19354 19355 19356 19357 19358 19359 19360 19361 19362 19363 19364 19365 19366 19367 19368 19369 19370 19371 19372 19373 19374 19375 19376 19377 19378 19379 19380 19381 19382 19383 19384 19385 19386 19387 19388 19389 19390 19391 19392 19393 19394 19395 19396 19397 19398 19399 19400 19401 19402 19403 19404 19405 19406 19407 19408 19409 19410 19411 19412 19413 19414 19415 19416 19417 19418 19419 19420 19421 19422 19423 19424 19425 19426 19427 19428 19429 19430 19431 19432 19433 19434 19435 19436 19437 19438 19439 19440 19441 19442 19443 19444 19445 19446 19447 19448 19449 19450 19451 19452 19453 19454 19455 19456 19457 19458 19459 19460 19461 19462 19463 19464 19465 19466 19467 19468 19469 19470 19471 19472 19473 19474 19475 19476 19477 19478 19479 19480 19481 19482 19483 19484 19485 19486 19487 19488 19489 19490 19491 19492 19493 19494 19495 19496 19497 19498 19499 19500 19501 19502 19503 19504 19505 19506 19507 19508 19509 19510 19511 19512 19513 19514 19515 19516 19517 19518 19519 19520 19521 19522 19523 19524 19525 19526 19527 19528 19529 19530 19531 19532 19533 19534 19535 19536 19537 19538 19539 19540 19541 19542 19543 19544 19545 19546 19547 19548 19549 19550 19551 19552 19553 19554 19555 19556 19557 19558 19559 19560 19561 19562 19563 19564 19565 19566 19567 19568 19569 19570 19571 19572 19573 19574 19575 19576 19577 19578 19579 19580 19581 19582 19583 19584 19585 19586 19587 19588 19589 19590 19591 19592 19593 19594 19595 19596 19597 19598 19599 19600 19601 19602 19603 19604 19605 19606 19607 19608 19609 19610 19611 19612 19613 19614 19615 19616 19617 19618 19619 19620 19621 19622 19623 19624 19625 19626 19627 19628 19629 19630 19631 19632 19633 19634 19635 19636 19637 19638 19639 19640 19641 19642 19643 19644 19645 19646 19647 19648 19649 19650 19651 19652 19653 19654 19655 19656 19657 19658 19659 19660 19661 19662 19663 19664 19665 19666 19667 19668 19669 19670 19671 19672 19673 19674 19675 19676 19677 19678 19679 19680 19681 19682 19683 19684 19685 19686 19687 19688 19689 19690 19691 19692 19693 19694 19695 19696 19697 19698 19699 19700 19701 19702 19703 19704 19705 19706 19707 19708 19709 19710 19711 19712 19713 19714 19715 19716 19717 19718 19719 19720 19721 19722 19723 19724 19725 19726 19727 19728 19729 19730 19731 19732 19733 19734 19735 19736 19737 19738 19739 19740 19741 19742 19743 19744 19745 19746 19747 19748 19749 19750 19751 19752 19753 19754 19755 19756 19757 19758 19759 19760 19761 19762 19763 19764 19765 19766 19767 19768 19769 19770 19771 19772 19773 19774 19775 19776 19777 19778 19779 19780 19781 19782 19783 19784 19785 19786 19787 19788 19789 19790 19791 19792 19793 19794 19795 19796 19797 19798 19799 19800 19801 19802 19803 19804 19805 19806 19807 19808 19809 19810 19811 19812 19813 19814 19815 19816 19817 19818 19819 19820 19821 19822 19823 19824 19825 19826 19827 19828 19829 19830 19831 19832 19833 19834 19835 19836 19837 19838 19839 19840 19841 19842 19843 19844 19845 19846 19847 19848 19849 19850 19851 19852 19853 19854 19855 19856 19857 19858 19859 19860 19861 19862 19863 19864 19865 19866 19867 19868 19869 19870 19871 19872 19873 19874 19875 19876 19877 19878 19879 19880 19881 19882 19883 19884 19885 19886 19887 19888 19889 19890 19891 19892 19893 19894 19895 19896 19897 19898 19899 19900 19901 19902 19903 19904 19905 19906 19907 19908 19909 19910 19911 19912 19913 19914 19915 19916 19917 19918 19919 19920 19921 19922 19923 19924 19925 19926 19927 19928 19929 19930 19931 19932 19933 19934 19935 19936 19937 19938 19939 19940 19941 19942 19943 19944 19945 19946 19947 19948 19949 19950 19951 19952 19953 19954 19955 19956 19957 19958 19959 19960 19961 19962 19963 19964 19965 19966 19967 19968 19969 19970 19971 19972 19973 19974 19975 19976 19977 19978 19979 19980 19981 19982 19983 19984 19985 19986 19987 19988 19989 19990 19991 19992 19993 19994 19995 19996 19997 19998 19999 20000 20001 20002 20003 20004 20005 20006 20007 20008 20009 20010 20011 20012 20013 20014 20015 20016 20017 20018 20019 20020 20021 20022 20023 20024 20025 20026 20027 20028 20029 20030 20031 20032 20033 20034 20035 20036 20037 20038 20039 20040 20041 20042 20043 20044 20045 20046 20047 20048 20049 20050 20051 20052 20053 20054 20055 20056 20057 20058 20059 20060 20061 20062 20063 20064 20065 20066 20067 20068 20069 20070 20071 20072 20073 20074 20075 20076 20077 20078 20079 20080 20081 20082 20083 20084 20085 20086 20087 20088 20089 20090 20091 20092 20093 20094 20095 20096 20097 20098 20099 20100 20101 20102 20103 20104 20105 20106 20107 20108 20109 20110 20111 20112 20113 20114 20115 20116 20117 20118 20119 20120 20121 20122 20123 20124 20125 20126 20127 20128 20129 20130 20131 20132 20133 20134 20135 20136 20137 20138 20139 20140 20141 20142 20143 20144 20145 20146 20147 20148 20149 20150 20151 20152 20153 20154 20155 20156 20157 20158 20159 20160 20161 20162 20163 20164 20165 20166 20167 20168 20169 20170 20171 20172 20173 20174 20175 20176 20177 20178 20179 20180 20181 20182 20183 20184 20185 20186 20187 20188 20189 20190 20191 20192 20193 20194 20195 20196 20197 20198 20199 20200 20201 20202 20203 20204 20205 20206 20207 20208 20209 20210 20211 20212 20213 20214 20215 20216 20217 20218 20219 20220 20221 20222 20223 20224 20225 20226 20227 20228 20229 20230 20231 20232 20233 20234 20235 20236 20237 20238 20239 20240 20241 20242 20243 20244 20245 20246 20247 20248 20249 20250 20251 20252 20253 20254 20255 20256 20257 20258 20259 20260 20261 20262 20263 20264 20265 20266 20267 20268 20269 20270 20271 20272 20273 20274 20275 20276 20277 20278 20279 20280 20281 20282 20283 20284 20285 20286 20287 20288 20289 20290 20291 20292 20293 20294 20295 20296 20297 20298 20299 20300 20301 20302 20303 20304 20305 20306 20307 20308 20309 20310 20311 20312 20313 20314 20315 20316 20317 20318 20319 20320 20321 20322 20323 20324 20325 20326 20327 20328 20329 20330 20331 20332 20333 20334 20335 20336 20337 20338 20339 20340 20341 20342 20343 20344 20345 20346 20347 20348 20349 20350 20351 20352 20353 20354 20355 20356 20357 20358 20359 20360 20361 20362 20363 20364 20365 20366 20367 20368 20369 20370 20371 20372 20373 20374 20375 20376 20377 20378 20379 20380 20381 20382 20383 20384 20385 20386 20387 20388 20389 20390 20391 20392 20393 20394 20395 20396 20397 20398 20399 20400 20401 20402 20403 20404 20405 20406 20407 20408 20409 20410 20411 20412 20413 20414 20415 20416 20417 20418 20419 20420 20421 20422 20423 20424 20425 20426 20427 20428 20429 20430 20431 20432 20433 20434 20435 20436 20437 20438 20439 20440 20441 20442 20443 20444 20445 20446 20447 20448 20449 20450 20451 20452 20453 20454 20455 20456 20457 20458 20459 20460 20461 20462 20463 20464 20465 20466 20467 20468 20469 20470 20471 20472 20473 20474 20475 20476 20477 20478 20479 20480 20481 20482 20483 20484 20485 20486 20487 20488 20489 20490 20491 20492 20493 20494 20495 20496 20497 20498 20499 20500 20501 20502 20503 20504 20505 20506 20507 20508 20509 20510 20511 20512 20513 20514 20515 20516 20517 20518 20519 20520 20521 20522 20523 20524 20525 20526 20527 20528 20529 20530 20531 20532 20533 20534 20535 20536 20537 20538 20539 20540 20541 20542 20543 20544 20545 20546 20547 20548 20549 20550 20551 20552 20553 20554 20555 20556 20557 20558 20559 20560 20561 20562 20563 20564 20565 20566 // SPDX-License-Identifier: GPL-2.0-only /* * This is the new netlink-based wireless configuration interface. * * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/if.h> #include <linux/module.h> #include <linux/err.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/if_ether.h> #include <linux/ieee80211.h> #include <linux/nl80211.h> #include <linux/rtnetlink.h> #include <linux/netlink.h> #include <linux/nospec.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> #include <net/net_namespace.h> #include <net/genetlink.h> #include <net/cfg80211.h> #include <net/sock.h> #include <net/inet_connection_sock.h> #include "core.h" #include "nl80211.h" #include "reg.h" #include "rdev-ops.h" static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit); /* the netlink family */ static struct genl_family nl80211_fam; /* multicast groups */ enum nl80211_multicast_groups { NL80211_MCGRP_CONFIG, NL80211_MCGRP_SCAN, NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, NL80211_MCGRP_VENDOR, NL80211_MCGRP_NAN, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG }, [NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN }, [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif }; /* returns ERR_PTR values */ static struct wireless_dev * __cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev, struct net *netns, struct nlattr **attrs) { struct wireless_dev *result = NULL; bool have_ifidx = attrs[NL80211_ATTR_IFINDEX]; bool have_wdev_id = attrs[NL80211_ATTR_WDEV]; u64 wdev_id = 0; int wiphy_idx = -1; int ifidx = -1; if (!have_ifidx && !have_wdev_id) return ERR_PTR(-EINVAL); if (have_ifidx) ifidx = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); if (have_wdev_id) { wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); wiphy_idx = wdev_id >> 32; } if (rdev) { struct wireless_dev *wdev; lockdep_assert_held(&rdev->wiphy.mtx); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { result = wdev; break; } if (have_wdev_id && wdev->identifier == (u32)wdev_id) { result = wdev; break; } } return result ?: ERR_PTR(-ENODEV); } ASSERT_RTNL(); for_each_rdev(rdev) { struct wireless_dev *wdev; if (wiphy_net(&rdev->wiphy) != netns) continue; if (have_wdev_id && rdev->wiphy_idx != wiphy_idx) continue; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (have_ifidx && wdev->netdev && wdev->netdev->ifindex == ifidx) { result = wdev; break; } if (have_wdev_id && wdev->identifier == (u32)wdev_id) { result = wdev; break; } } if (result) break; } if (result) return result; return ERR_PTR(-ENODEV); } static struct cfg80211_registered_device * __cfg80211_rdev_from_attrs(struct net *netns, struct nlattr **attrs) { struct cfg80211_registered_device *rdev = NULL, *tmp; struct net_device *netdev; ASSERT_RTNL(); if (!attrs[NL80211_ATTR_WIPHY] && !attrs[NL80211_ATTR_IFINDEX] && !attrs[NL80211_ATTR_WDEV]) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_WIPHY]) rdev = cfg80211_rdev_by_wiphy_idx( nla_get_u32(attrs[NL80211_ATTR_WIPHY])); if (attrs[NL80211_ATTR_WDEV]) { u64 wdev_id = nla_get_u64(attrs[NL80211_ATTR_WDEV]); struct wireless_dev *wdev; bool found = false; tmp = cfg80211_rdev_by_wiphy_idx(wdev_id >> 32); if (tmp) { /* make sure wdev exists */ list_for_each_entry(wdev, &tmp->wiphy.wdev_list, list) { if (wdev->identifier != (u32)wdev_id) continue; found = true; break; } if (!found) tmp = NULL; if (rdev && tmp != rdev) return ERR_PTR(-EINVAL); rdev = tmp; } } if (attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(attrs[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(netns, ifindex); if (netdev) { if (netdev->ieee80211_ptr) tmp = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); else tmp = NULL; /* not wireless device -- return error */ if (!tmp) return ERR_PTR(-EINVAL); /* mismatch -- return error */ if (rdev && tmp != rdev) return ERR_PTR(-EINVAL); rdev = tmp; } } if (!rdev) return ERR_PTR(-ENODEV); if (netns != wiphy_net(&rdev->wiphy)) return ERR_PTR(-ENODEV); return rdev; } /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. * * The result of this can be a PTR_ERR and hence must * be checked with IS_ERR() for errors. */ static struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct net *netns, struct genl_info *info) { return __cfg80211_rdev_from_attrs(netns, info->attrs); } static int validate_beacon_head(const struct nlattr *attr, struct netlink_ext_ack *extack) { const u8 *data = nla_data(attr); unsigned int len = nla_len(attr); const struct element *elem; const struct ieee80211_mgmt *mgmt = (void *)data; unsigned int fixedlen, hdrlen; bool s1g_bcn; if (len < offsetofend(typeof(*mgmt), frame_control)) goto err; s1g_bcn = ieee80211_is_s1g_beacon(mgmt->frame_control); if (s1g_bcn) { fixedlen = offsetof(struct ieee80211_ext, u.s1g_beacon.variable); hdrlen = offsetof(struct ieee80211_ext, u.s1g_beacon); } else { fixedlen = offsetof(struct ieee80211_mgmt, u.beacon.variable); hdrlen = offsetof(struct ieee80211_mgmt, u.beacon); } if (len < fixedlen) goto err; if (ieee80211_hdrlen(mgmt->frame_control) != hdrlen) goto err; data += fixedlen; len -= fixedlen; for_each_element(elem, data, len) { /* nothing */ } if (for_each_element_completed(elem, data, len)) return 0; err: NL_SET_ERR_MSG_ATTR(extack, attr, "malformed beacon head"); return -EINVAL; } static int validate_ie_attr(const struct nlattr *attr, struct netlink_ext_ack *extack) { const u8 *data = nla_data(attr); unsigned int len = nla_len(attr); const struct element *elem; for_each_element(elem, data, len) { /* nothing */ } if (for_each_element_completed(elem, data, len)) return 0; NL_SET_ERR_MSG_ATTR(extack, attr, "malformed information elements"); return -EINVAL; } static int validate_he_capa(const struct nlattr *attr, struct netlink_ext_ack *extack) { if (!ieee80211_he_capa_size_ok(nla_data(attr), nla_len(attr))) return -EINVAL; return 0; } static int validate_supported_selectors(const struct nlattr *attr, struct netlink_ext_ack *extack) { const u8 *supported_selectors = nla_data(attr); u8 supported_selectors_len = nla_len(attr); /* The top bit must not be set as it is not part of the selector */ for (int i = 0; i < supported_selectors_len; i++) { if (supported_selectors[i] & 0x80) return -EINVAL; } return 0; } /* policy for the attributes */ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR]; static const struct nla_policy nl80211_ftm_responder_policy[NL80211_FTM_RESP_ATTR_MAX + 1] = { [NL80211_FTM_RESP_ATTR_ENABLED] = { .type = NLA_FLAG, }, [NL80211_FTM_RESP_ATTR_LCI] = { .type = NLA_BINARY, .len = U8_MAX }, [NL80211_FTM_RESP_ATTR_CIVICLOC] = { .type = NLA_BINARY, .len = U8_MAX }, }; static const struct nla_policy nl80211_pmsr_ftm_req_attr_policy[NL80211_PMSR_FTM_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_FTM_REQ_ATTR_ASAP] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_PREAMBLE] = { .type = NLA_U32 }, [NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD] = { .type = NLA_U16 }, [NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION] = NLA_POLICY_MAX(NLA_U8, 15), [NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES] = { .type = NLA_U8 }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_REQUEST_CIVICLOC] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_TRIGGER_BASED] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_NON_TRIGGER_BASED] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_LMR_FEEDBACK] = { .type = NLA_FLAG }, [NL80211_PMSR_FTM_REQ_ATTR_BSS_COLOR] = { .type = NLA_U8 }, }; static const struct nla_policy nl80211_pmsr_req_data_policy[NL80211_PMSR_TYPE_MAX + 1] = { [NL80211_PMSR_TYPE_FTM] = NLA_POLICY_NESTED(nl80211_pmsr_ftm_req_attr_policy), }; static const struct nla_policy nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = { [NL80211_PMSR_REQ_ATTR_DATA] = NLA_POLICY_NESTED(nl80211_pmsr_req_data_policy), [NL80211_PMSR_REQ_ATTR_GET_AP_TSF] = { .type = NLA_FLAG }, }; static const struct nla_policy nl80211_pmsr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = { [NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR, [NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy), [NL80211_PMSR_PEER_ATTR_REQ] = NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy), [NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT }, }; static const struct nla_policy nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = { [NL80211_PMSR_ATTR_MAX_PEERS] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_REPORT_AP_TSF] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_TYPE_CAPA] = { .type = NLA_REJECT }, [NL80211_PMSR_ATTR_PEERS] = NLA_POLICY_NESTED_ARRAY(nl80211_pmsr_peer_attr_policy), }; static const struct nla_policy he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = { [NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] = NLA_POLICY_RANGE(NLA_U8, 1, 20), [NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] = NLA_POLICY_RANGE(NLA_U8, 1, 20), [NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET] = NLA_POLICY_RANGE(NLA_U8, 1, 20), [NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP] = NLA_POLICY_EXACT_LEN(8), [NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP] = NLA_POLICY_EXACT_LEN(8), [NL80211_HE_OBSS_PD_ATTR_SR_CTRL] = { .type = NLA_U8 }, }; static const struct nla_policy he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { [NL80211_HE_BSS_COLOR_ATTR_COLOR] = NLA_POLICY_RANGE(NLA_U8, 1, 63), [NL80211_HE_BSS_COLOR_ATTR_DISABLED] = { .type = NLA_FLAG }, [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, }; static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, [NL80211_TXRATE_HT] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_HT_RATES }, [NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)), [NL80211_TXRATE_GI] = { .type = NLA_U8 }, [NL80211_TXRATE_HE] = NLA_POLICY_EXACT_LEN(sizeof(struct nl80211_txrate_he)), [NL80211_TXRATE_HE_GI] = NLA_POLICY_RANGE(NLA_U8, NL80211_RATE_INFO_HE_GI_0_8, NL80211_RATE_INFO_HE_GI_3_2), [NL80211_TXRATE_HE_LTF] = NLA_POLICY_RANGE(NLA_U8, NL80211_RATE_INFO_HE_1XLTF, NL80211_RATE_INFO_HE_4XLTF), }; static const struct nla_policy nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 }, [NL80211_TID_CONFIG_ATTR_PEER_SUPP] = { .type = NLA_U64 }, [NL80211_TID_CONFIG_ATTR_OVERRIDE] = { .type = NLA_FLAG }, [NL80211_TID_CONFIG_ATTR_TIDS] = NLA_POLICY_RANGE(NLA_U16, 1, 0xff), [NL80211_TID_CONFIG_ATTR_NOACK] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_AMPDU_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_AMSDU_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE] = NLA_POLICY_MAX(NLA_U8, NL80211_TX_RATE_FIXED), [NL80211_TID_CONFIG_ATTR_TX_RATE] = NLA_POLICY_NESTED(nl80211_txattr_policy), }; static const struct nla_policy nl80211_fils_discovery_policy[NL80211_FILS_DISCOVERY_ATTR_MAX + 1] = { [NL80211_FILS_DISCOVERY_ATTR_INT_MIN] = NLA_POLICY_MAX(NLA_U32, 10000), [NL80211_FILS_DISCOVERY_ATTR_INT_MAX] = NLA_POLICY_MAX(NLA_U32, 10000), [NL80211_FILS_DISCOVERY_ATTR_TMPL] = NLA_POLICY_RANGE(NLA_BINARY, NL80211_FILS_DISCOVERY_TMPL_MIN_LEN, IEEE80211_MAX_DATA_LEN), }; static const struct nla_policy nl80211_unsol_bcast_probe_resp_policy[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1] = { [NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] = NLA_POLICY_MAX(NLA_U32, 20), [NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN } }; static const struct nla_policy sar_specs_policy[NL80211_SAR_ATTR_SPECS_MAX + 1] = { [NL80211_SAR_ATTR_SPECS_POWER] = { .type = NLA_S32 }, [NL80211_SAR_ATTR_SPECS_RANGE_INDEX] = {.type = NLA_U32 }, }; static const struct nla_policy sar_policy[NL80211_SAR_ATTR_MAX + 1] = { [NL80211_SAR_ATTR_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_SAR_TYPE), [NL80211_SAR_ATTR_SPECS] = NLA_POLICY_NESTED_ARRAY(sar_specs_policy), }; static const struct nla_policy nl80211_mbssid_config_policy[NL80211_MBSSID_CONFIG_ATTR_MAX + 1] = { [NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES] = NLA_POLICY_MIN(NLA_U8, 2), [NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_MBSSID_CONFIG_ATTR_INDEX] = { .type = NLA_U8 }, [NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX] = { .type = NLA_U32 }, [NL80211_MBSSID_CONFIG_ATTR_EMA] = { .type = NLA_FLAG }, }; static const struct nla_policy nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = { [NL80211_STA_WME_UAPSD_QUEUES] = { .type = NLA_U8 }, [NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 }, }; static const struct netlink_range_validation nl80211_punct_bitmap_range = { .min = 0, .max = 0xffff, }; static const struct netlink_range_validation q_range = { .max = INT_MAX, }; static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8, NL80211_EDMG_CHANNELS_MIN, NL80211_EDMG_CHANNELS_MAX), [NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8, NL80211_EDMG_BW_CONFIG_MIN, NL80211_EDMG_BW_CONFIG_MAX), [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999), [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_ATTR_WIPHY_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_DYN_ACK] = { .type = NLA_FLAG }, [NL80211_ATTR_IFTYPE] = NLA_POLICY_MAX(NLA_U32, NL80211_IFTYPE_MAX), [NL80211_ATTR_IFINDEX] = { .type = NLA_U32 }, [NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, [NL80211_ATTR_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_PREV_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 7), [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, [NL80211_ATTR_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES), [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 }, [NL80211_ATTR_BEACON_HEAD] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_beacon_head, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_BEACON_TAIL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_STA_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_STA_FLAGS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_LISTEN_INTERVAL] = { .type = NLA_U16 }, [NL80211_ATTR_STA_SUPPORTED_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_STA_PLINK_ACTION] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_ACTIONS - 1), [NL80211_ATTR_STA_TX_POWER_SETTING] = NLA_POLICY_RANGE(NLA_U8, NL80211_TX_POWER_AUTOMATIC, NL80211_TX_POWER_FIXED), [NL80211_ATTR_STA_TX_POWER] = { .type = NLA_S16 }, [NL80211_ATTR_STA_VLAN] = { .type = NLA_U32 }, [NL80211_ATTR_MNTR_FLAGS] = { /* NLA_NESTED can't be empty */ }, [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, [NL80211_ATTR_BSS_BASIC_RATES] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, [NL80211_ATTR_BSS_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED }, [NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG }, [NL80211_ATTR_HT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_HT_CAPABILITY_LEN), [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 }, [NL80211_ATTR_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED }, [NL80211_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, [NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_FREQ_FIXED] = { .type = NLA_FLAG }, [NL80211_ATTR_TIMED_OUT] = { .type = NLA_FLAG }, [NL80211_ATTR_USE_MFP] = NLA_POLICY_RANGE(NLA_U32, NL80211_MFP_NO, NL80211_MFP_OPTIONAL), [NL80211_ATTR_STA_FLAGS2] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_sta_flag_update)), [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_CONTROL_PORT_OVER_NL80211] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_STATUS_CODE] = { .type = NLA_U16 }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = NLA_POLICY_RANGE(NLA_U32, 0, NL80211_WPA_VERSION_1 | NL80211_WPA_VERSION_2 | NL80211_WPA_VERSION_3), [NL80211_ATTR_PID] = { .type = NLA_U32 }, [NL80211_ATTR_4ADDR] = { .type = NLA_U8 }, [NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN), [NL80211_ATTR_DURATION] = { .type = NLA_U32 }, [NL80211_ATTR_COOKIE] = { .type = NLA_U64 }, [NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED }, [NL80211_ATTR_FRAME] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, }, [NL80211_ATTR_PS_STATE] = NLA_POLICY_RANGE(NLA_U32, NL80211_PS_DISABLED, NL80211_PS_ENABLED), [NL80211_ATTR_CQM] = { .type = NLA_NESTED, }, [NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, [NL80211_ATTR_WIPHY_ANTENNA_TX] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_ANTENNA_RX] = { .type = NLA_U32 }, [NL80211_ATTR_MCAST_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_OFFCHANNEL_TX_OK] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_ATTR_WOWLAN_TRIGGERS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_PLINK_STATE] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_PLINK_STATES - 1), [NL80211_ATTR_MEASUREMENT_DURATION] = { .type = NLA_U16 }, [NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY] = { .type = NLA_FLAG }, [NL80211_ATTR_MESH_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_SCHED_SCAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_REKEY_DATA] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SUPP_RATES] = { .type = NLA_NESTED }, [NL80211_ATTR_HIDDEN_SSID] = NLA_POLICY_RANGE(NLA_U32, NL80211_HIDDEN_SSID_NOT_IN_USE, NL80211_HIDDEN_SSID_ZERO_CONTENTS), [NL80211_ATTR_IE_PROBE_RESP] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_IE_ASSOC_RESP] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_ATTR_ROAM_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_STA_WME] = NLA_POLICY_NESTED(nl80211_sta_wme_policy), [NL80211_ATTR_SCHED_SCAN_MATCH] = { .type = NLA_NESTED }, [NL80211_ATTR_TX_NO_CCK_RATE] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_ACTION] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_DIALOG_TOKEN] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 }, [NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG }, [NL80211_ATTR_TDLS_INITIATOR] = { .type = NLA_FLAG }, [NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG }, [NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 }, [NL80211_ATTR_DISABLE_HT] = { .type = NLA_FLAG }, [NL80211_ATTR_HT_CAPABILITY_MASK] = { .len = NL80211_HT_CAPABILITY_LEN }, [NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 }, [NL80211_ATTR_INACTIVITY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_ATTR_BG_SCAN_PERIOD] = { .type = NLA_U16 }, [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, /* need to include at least Auth Transaction and Status Code */ [NL80211_ATTR_AUTH_DATA] = NLA_POLICY_MIN_LEN(4), [NL80211_ATTR_VHT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_VHT_CAPABILITY_LEN), [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127), [NL80211_ATTR_P2P_OPPPS] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_ATTR_LOCAL_MESH_POWER_MODE] = NLA_POLICY_RANGE(NLA_U32, NL80211_MESH_POWER_UNKNOWN + 1, NL80211_MESH_POWER_MAX), [NL80211_ATTR_ACL_POLICY] = {. type = NLA_U32 }, [NL80211_ATTR_MAC_ADDRS] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_CAPABILITY] = { .type = NLA_U16 }, [NL80211_ATTR_STA_EXT_CAPABILITY] = { .type = NLA_BINARY, }, [NL80211_ATTR_SPLIT_WIPHY_DUMP] = { .type = NLA_FLAG, }, [NL80211_ATTR_DISABLE_VHT] = { .type = NLA_FLAG }, [NL80211_ATTR_VHT_CAPABILITY_MASK] = { .len = NL80211_VHT_CAPABILITY_LEN, }, [NL80211_ATTR_MDID] = { .type = NLA_U16 }, [NL80211_ATTR_IE_RIC] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_CRIT_PROT_ID] = { .type = NLA_U16 }, [NL80211_ATTR_MAX_CRIT_PROT_DURATION] = NLA_POLICY_MAX(NLA_U16, NL80211_CRIT_PROTO_MAX_DURATION), [NL80211_ATTR_PEER_AID] = NLA_POLICY_RANGE(NLA_U16, 1, IEEE80211_MAX_AID), [NL80211_ATTR_CH_SWITCH_COUNT] = { .type = NLA_U32 }, [NL80211_ATTR_CH_SWITCH_BLOCK_TX] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_IES] = { .type = NLA_NESTED }, [NL80211_ATTR_CNTDWN_OFFS_BEACON] = { .type = NLA_BINARY }, [NL80211_ATTR_CNTDWN_OFFS_PRESP] = { .type = NLA_BINARY }, [NL80211_ATTR_STA_SUPPORTED_CHANNELS] = NLA_POLICY_MIN_LEN(2), /* * The value of the Length field of the Supported Operating * Classes element is between 2 and 253. */ [NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES] = NLA_POLICY_RANGE(NLA_BINARY, 2, 253), [NL80211_ATTR_HANDLE_DFS] = { .type = NLA_FLAG }, [NL80211_ATTR_OPMODE_NOTIF] = { .type = NLA_U8 }, [NL80211_ATTR_VENDOR_ID] = { .type = NLA_U32 }, [NL80211_ATTR_VENDOR_SUBCMD] = { .type = NLA_U32 }, [NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY }, [NL80211_ATTR_QOS_MAP] = NLA_POLICY_RANGE(NLA_BINARY, IEEE80211_QOS_MAP_LEN_MIN, IEEE80211_QOS_MAP_LEN_MAX), [NL80211_ATTR_MAC_HINT] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 }, [NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 }, [NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG }, [NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY }, [NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG }, [NL80211_ATTR_TSID] = NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_TIDS - 1), [NL80211_ATTR_USER_PRIO] = NLA_POLICY_MAX(NLA_U8, IEEE80211_NUM_UPS - 1), [NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 }, [NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 }, [NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 }, [NL80211_ATTR_MAC_MASK] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG }, [NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 }, [NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 }, [NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG }, [NL80211_ATTR_PBSS] = { .type = NLA_FLAG }, [NL80211_ATTR_BSS_SELECT] = { .type = NLA_NESTED }, [NL80211_ATTR_STA_SUPPORT_P2P_PS] = NLA_POLICY_MAX(NLA_U8, NUM_NL80211_P2P_PS_STATUS - 1), [NL80211_ATTR_MU_MIMO_GROUP_DATA] = { .len = VHT_MUMIMO_GROUPS_DATA_LEN }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_NAN_MASTER_PREF] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_ATTR_BANDS] = { .type = NLA_U32 }, [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, [NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY, .len = FILS_MAX_KEK_LEN }, [NL80211_ATTR_FILS_NONCES] = NLA_POLICY_EXACT_LEN_WARN(2 * FILS_NONCE_LEN), [NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, }, [NL80211_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] = { .type = NLA_S8 }, [NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, [NL80211_ATTR_TIMEOUT_REASON] = { .type = NLA_U32 }, [NL80211_ATTR_FILS_ERP_USERNAME] = { .type = NLA_BINARY, .len = FILS_ERP_MAX_USERNAME_LEN }, [NL80211_ATTR_FILS_ERP_REALM] = { .type = NLA_BINARY, .len = FILS_ERP_MAX_REALM_LEN }, [NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 }, [NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY, .len = FILS_ERP_MAX_RRK_LEN }, [NL80211_ATTR_FILS_CACHE_ID] = NLA_POLICY_EXACT_LEN_WARN(2), [NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN }, [NL80211_ATTR_PMKR0_NAME] = NLA_POLICY_EXACT_LEN(WLAN_PMK_NAME_LEN), [NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG }, [NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_TXQ_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_MEMORY_LIMIT] = { .type = NLA_U32 }, [NL80211_ATTR_TXQ_QUANTUM] = NLA_POLICY_FULL_RANGE(NLA_U32, &q_range), [NL80211_ATTR_HE_CAPABILITY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_he_capa, NL80211_HE_MAX_CAPABILITY_LEN), [NL80211_ATTR_FTM_RESPONDER] = NLA_POLICY_NESTED(nl80211_ftm_responder_policy), [NL80211_ATTR_TIMEOUT] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PEER_MEASUREMENTS] = NLA_POLICY_NESTED(nl80211_pmsr_attr_policy), [NL80211_ATTR_AIRTIME_WEIGHT] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY, .len = SAE_PASSWORD_MAX_LEN }, [NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG }, [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), [NL80211_ATTR_TID_CONFIG] = NLA_POLICY_NESTED_ARRAY(nl80211_tid_config_attr_policy), [NL80211_ATTR_CONTROL_PORT_NO_PREAUTH] = { .type = NLA_FLAG }, [NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1), [NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100), [NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG }, [NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999), [NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED }, [NL80211_ATTR_HE_6GHZ_CAPABILITY] = NLA_POLICY_EXACT_LEN(sizeof(struct ieee80211_he_6ghz_capa)), [NL80211_ATTR_FILS_DISCOVERY] = NLA_POLICY_NESTED(nl80211_fils_discovery_policy), [NL80211_ATTR_UNSOL_BCAST_PROBE_RESP] = NLA_POLICY_NESTED(nl80211_unsol_bcast_probe_resp_policy), [NL80211_ATTR_S1G_CAPABILITY] = NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN), [NL80211_ATTR_S1G_CAPABILITY_MASK] = NLA_POLICY_EXACT_LEN(IEEE80211_S1G_CAPABILITY_LEN), [NL80211_ATTR_SAE_PWE] = NLA_POLICY_RANGE(NLA_U8, NL80211_SAE_PWE_HUNT_AND_PECK, NL80211_SAE_PWE_BOTH), [NL80211_ATTR_RECONNECT_REQUESTED] = { .type = NLA_REJECT }, [NL80211_ATTR_SAR_SPEC] = NLA_POLICY_NESTED(sar_policy), [NL80211_ATTR_DISABLE_HE] = { .type = NLA_FLAG }, [NL80211_ATTR_OBSS_COLOR_BITMAP] = { .type = NLA_U64 }, [NL80211_ATTR_COLOR_CHANGE_COUNT] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_COLOR] = { .type = NLA_U8 }, [NL80211_ATTR_COLOR_CHANGE_ELEMS] = NLA_POLICY_NESTED(nl80211_policy), [NL80211_ATTR_MBSSID_CONFIG] = NLA_POLICY_NESTED(nl80211_mbssid_config_policy), [NL80211_ATTR_MBSSID_ELEMS] = { .type = NLA_NESTED }, [NL80211_ATTR_RADAR_BACKGROUND] = { .type = NLA_FLAG }, [NL80211_ATTR_AP_SETTINGS_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_EHT_CAPABILITY] = NLA_POLICY_RANGE(NLA_BINARY, NL80211_EHT_MIN_CAPABILITY_LEN, NL80211_EHT_MAX_CAPABILITY_LEN), [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG }, [NL80211_ATTR_MLO_LINKS] = NLA_POLICY_NESTED_ARRAY(nl80211_policy), [NL80211_ATTR_MLO_LINK_ID] = NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS - 1), [NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN), [NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG }, [NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT }, [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range), [NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS] = { .type = NLA_U16 }, [NL80211_ATTR_HW_TIMESTAMP_ENABLED] = { .type = NLA_FLAG }, [NL80211_ATTR_EMA_RNR_ELEMS] = { .type = NLA_NESTED }, [NL80211_ATTR_MLO_LINK_DISABLED] = { .type = NLA_FLAG }, [NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA] = { .type = NLA_FLAG }, [NL80211_ATTR_MLO_TTLM_DLINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_MLO_TTLM_ULINK] = NLA_POLICY_EXACT_LEN(sizeof(u16) * 8), [NL80211_ATTR_ASSOC_SPP_AMSDU] = { .type = NLA_FLAG }, [NL80211_ATTR_VIF_RADIO_MASK] = { .type = NLA_U32 }, [NL80211_ATTR_SUPPORTED_SELECTORS] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_supported_selectors, NL80211_MAX_SUPP_SELECTORS), [NL80211_ATTR_MLO_RECONF_REM_LINKS] = { .type = NLA_U16 }, [NL80211_ATTR_EPCS] = { .type = NLA_FLAG }, }; /* policy for the key attributes */ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, [NL80211_KEY_TYPE] = NLA_POLICY_MAX(NLA_U32, NUM_NL80211_KEYTYPES - 1), [NL80211_KEY_DEFAULT_TYPES] = { .type = NLA_NESTED }, [NL80211_KEY_MODE] = NLA_POLICY_RANGE(NLA_U8, 0, NL80211_KEY_SET_TX), }; /* policy for the key default flags */ static const struct nla_policy nl80211_key_default_policy[NUM_NL80211_KEY_DEFAULT_TYPES] = { [NL80211_KEY_DEFAULT_TYPE_UNICAST] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_TYPE_MULTICAST] = { .type = NLA_FLAG }, }; #ifdef CONFIG_PM /* policy for WoWLAN attributes */ static const struct nla_policy nl80211_wowlan_policy[NUM_NL80211_WOWLAN_TRIG] = { [NL80211_WOWLAN_TRIG_ANY] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_DISCONNECT] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_MAGIC_PKT] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_PKT_PATTERN] = { .type = NLA_NESTED }, [NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_RFKILL_RELEASE] = { .type = NLA_FLAG }, [NL80211_WOWLAN_TRIG_TCP_CONNECTION] = { .type = NLA_NESTED }, [NL80211_WOWLAN_TRIG_NET_DETECT] = { .type = NLA_NESTED }, }; static const struct nla_policy nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = { [NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 }, [NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 }, [NL80211_WOWLAN_TCP_DST_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 }, [NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 }, [NL80211_WOWLAN_TCP_DATA_PAYLOAD] = NLA_POLICY_MIN_LEN(1), [NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ] = { .len = sizeof(struct nl80211_wowlan_tcp_data_seq) }, [NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN] = { .len = sizeof(struct nl80211_wowlan_tcp_data_token) }, [NL80211_WOWLAN_TCP_DATA_INTERVAL] = { .type = NLA_U32 }, [NL80211_WOWLAN_TCP_WAKE_PAYLOAD] = NLA_POLICY_MIN_LEN(1), [NL80211_WOWLAN_TCP_WAKE_MASK] = NLA_POLICY_MIN_LEN(1), }; #endif /* CONFIG_PM */ /* policy for coalesce rule attributes */ static const struct nla_policy nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = { [NL80211_ATTR_COALESCE_RULE_DELAY] = { .type = NLA_U32 }, [NL80211_ATTR_COALESCE_RULE_CONDITION] = NLA_POLICY_RANGE(NLA_U32, NL80211_COALESCE_CONDITION_MATCH, NL80211_COALESCE_CONDITION_NO_MATCH), [NL80211_ATTR_COALESCE_RULE_PKT_PATTERN] = { .type = NLA_NESTED }, }; /* policy for GTK rekey offload attributes */ static const struct nla_policy nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = { [NL80211_REKEY_DATA_KEK] = { .type = NLA_BINARY, .len = NL80211_KEK_EXT_LEN }, [NL80211_REKEY_DATA_KCK] = { .type = NLA_BINARY, .len = NL80211_KCK_EXT_LEN_32 }, [NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN(NL80211_REPLAY_CTR_LEN), [NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 }, }; static const struct nla_policy nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = { [NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_SSID_LEN }, [NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 }, }; static const struct nla_policy nl80211_plan_policy[NL80211_SCHED_SCAN_PLAN_MAX + 1] = { [NL80211_SCHED_SCAN_PLAN_INTERVAL] = { .type = NLA_U32 }, [NL80211_SCHED_SCAN_PLAN_ITERATIONS] = { .type = NLA_U32 }, }; static const struct nla_policy nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { [NL80211_BSS_SELECT_ATTR_RSSI] = { .type = NLA_FLAG }, [NL80211_BSS_SELECT_ATTR_BAND_PREF] = { .type = NLA_U32 }, [NL80211_BSS_SELECT_ATTR_RSSI_ADJUST] = { .len = sizeof(struct nl80211_bss_select_rssi_adjust) }, }; /* policy for NAN function attributes */ static const struct nla_policy nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = { [NL80211_NAN_FUNC_TYPE] = NLA_POLICY_MAX(NLA_U8, NL80211_NAN_FUNC_MAX_TYPE), [NL80211_NAN_FUNC_SERVICE_ID] = { .len = NL80211_NAN_FUNC_SERVICE_ID_LEN }, [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG }, [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG }, [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN), [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG }, [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 }, [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY, .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN }, [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED }, [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED }, [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED }, [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 }, [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 }, }; /* policy for Service Response Filter attributes */ static const struct nla_policy nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG }, [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY, .len = NL80211_NAN_FUNC_SRF_MAX_LEN }, [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 }, [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; /* policy for packet pattern attributes */ static const struct nla_policy nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, }; static int nl80211_prepare_wdev_dump(struct netlink_callback *cb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev, struct nlattr **attrbuf) { int err; if (!cb->args[0]) { struct nlattr **attrbuf_free = NULL; if (!attrbuf) { attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; attrbuf_free = attrbuf; } err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) { kfree(attrbuf_free); return err; } rtnl_lock(); *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(cb->skb->sk), attrbuf); kfree(attrbuf_free); if (IS_ERR(*wdev)) { rtnl_unlock(); return PTR_ERR(*wdev); } *rdev = wiphy_to_rdev((*wdev)->wiphy); mutex_lock(&(*rdev)->wiphy.mtx); rtnl_unlock(); /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { /* subtract the 1 again here */ struct wiphy *wiphy; struct wireless_dev *tmp; rtnl_lock(); wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); if (!wiphy) { rtnl_unlock(); return -ENODEV; } *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; list_for_each_entry(tmp, &(*rdev)->wiphy.wdev_list, list) { if (tmp->identifier == cb->args[1]) { *wdev = tmp; break; } } if (!*wdev) { rtnl_unlock(); return -ENODEV; } mutex_lock(&(*rdev)->wiphy.mtx); rtnl_unlock(); } return 0; } /* message building helper */ void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_wmm_rules(struct sk_buff *msg, const struct ieee80211_reg_rule *rule) { int j; struct nlattr *nl_wmm_rules = nla_nest_start_noflag(msg, NL80211_FREQUENCY_ATTR_WMM); if (!nl_wmm_rules) goto nla_put_failure; for (j = 0; j < IEEE80211_NUM_ACS; j++) { struct nlattr *nl_wmm_rule = nla_nest_start_noflag(msg, j); if (!nl_wmm_rule) goto nla_put_failure; if (nla_put_u16(msg, NL80211_WMMR_CW_MIN, rule->wmm_rule.client[j].cw_min) || nla_put_u16(msg, NL80211_WMMR_CW_MAX, rule->wmm_rule.client[j].cw_max) || nla_put_u8(msg, NL80211_WMMR_AIFSN, rule->wmm_rule.client[j].aifsn) || nla_put_u16(msg, NL80211_WMMR_TXOP, rule->wmm_rule.client[j].cot)) goto nla_put_failure; nla_nest_end(msg, nl_wmm_rule); } nla_nest_end(msg, nl_wmm_rules); return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, struct ieee80211_channel *chan, bool large) { /* Some channels must be completely excluded from the * list to protect old user-space tools from breaking */ if (!large && chan->flags & (IEEE80211_CHAN_NO_10MHZ | IEEE80211_CHAN_NO_20MHZ)) return 0; if (!large && chan->freq_offset) return 0; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_FREQ, chan->center_freq)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_PSD) && nla_put_s8(msg, NL80211_FREQUENCY_ATTR_PSD, chan->psd)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_DISABLED) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED)) goto nla_put_failure; if (chan->flags & IEEE80211_CHAN_NO_IR) { if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_IR)) goto nla_put_failure; if (nla_put_flag(msg, __NL80211_FREQUENCY_ATTR_NO_IBSS)) goto nla_put_failure; } if (chan->flags & IEEE80211_CHAN_RADAR) { if (nla_put_flag(msg, NL80211_FREQUENCY_ATTR_RADAR)) goto nla_put_failure; if (large) { u32 time; time = elapsed_jiffies_msecs(chan->dfs_state_entered); if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_STATE, chan->dfs_state)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_TIME, time)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_DFS_CAC_TIME, chan->dfs_cac_ms)) goto nla_put_failure; } } if (large) { if ((chan->flags & IEEE80211_CHAN_NO_HT40MINUS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_MINUS)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_HT40PLUS) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HT40_PLUS)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_80MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_80MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_160MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_160MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_HE) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HE)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_1MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_1MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_2MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_2MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_4MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_4MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_8MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_8MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_16MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_16MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_320MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_320MHZ)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_EHT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_EHT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_DFS_CONCURRENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DFS_CONCURRENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_VLP_CLIENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_NO_6GHZ_AFC_CLIENT) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_CAN_MONITOR) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_CAN_MONITOR)) goto nla_put_failure; if ((chan->flags & IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP)) goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, DBM_TO_MBM(chan->max_power))) goto nla_put_failure; if (large) { const struct ieee80211_reg_rule *rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq)); if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) { if (nl80211_msg_put_wmm_rules(msg, rule)) goto nla_put_failure; } } return 0; nla_put_failure: return -ENOBUFS; } static bool nl80211_put_txq_stats(struct sk_buff *msg, struct cfg80211_txq_stats *txqstats, int attrtype) { struct nlattr *txqattr; #define PUT_TXQVAL_U32(attr, memb) do { \ if (txqstats->filled & BIT(NL80211_TXQ_STATS_ ## attr) && \ nla_put_u32(msg, NL80211_TXQ_STATS_ ## attr, txqstats->memb)) \ return false; \ } while (0) txqattr = nla_nest_start_noflag(msg, attrtype); if (!txqattr) return false; PUT_TXQVAL_U32(BACKLOG_BYTES, backlog_bytes); PUT_TXQVAL_U32(BACKLOG_PACKETS, backlog_packets); PUT_TXQVAL_U32(FLOWS, flows); PUT_TXQVAL_U32(DROPS, drops); PUT_TXQVAL_U32(ECN_MARKS, ecn_marks); PUT_TXQVAL_U32(OVERLIMIT, overlimit); PUT_TXQVAL_U32(OVERMEMORY, overmemory); PUT_TXQVAL_U32(COLLISIONS, collisions); PUT_TXQVAL_U32(TX_BYTES, tx_bytes); PUT_TXQVAL_U32(TX_PACKETS, tx_packets); PUT_TXQVAL_U32(MAX_FLOWS, max_flows); nla_nest_end(msg, txqattr); #undef PUT_TXQVAL_U32 return true; } /* netlink command implementations */ /** * nl80211_link_id - return link ID * @attrs: attributes to look at * * Returns: the link ID or 0 if not given * * Note this function doesn't do any validation of the link * ID validity wrt. links that were actually added, so it must * be called only from ops with %NL80211_FLAG_MLO_VALID_LINK_ID * or if additional validation is done. */ static unsigned int nl80211_link_id(struct nlattr **attrs) { struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; return nla_get_u8_default(linkid, 0); } static int nl80211_link_id_or_invalid(struct nlattr **attrs) { struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; if (!linkid) return -1; return nla_get_u8(linkid); } struct key_parse { struct key_params p; int idx; int type; bool def, defmgmt, defbeacon; bool def_uni, def_multi; }; static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, struct key_parse *k) { struct nlattr *tb[NL80211_KEY_MAX + 1]; int err = nla_parse_nested_deprecated(tb, NL80211_KEY_MAX, key, nl80211_key_policy, info->extack); if (err) return err; k->def = !!tb[NL80211_KEY_DEFAULT]; k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; k->defbeacon = !!tb[NL80211_KEY_DEFAULT_BEACON]; if (k->def) { k->def_uni = true; k->def_multi = true; } if (k->defmgmt || k->defbeacon) k->def_multi = true; if (tb[NL80211_KEY_IDX]) k->idx = nla_get_u8(tb[NL80211_KEY_IDX]); if (tb[NL80211_KEY_DATA]) { k->p.key = nla_data(tb[NL80211_KEY_DATA]); k->p.key_len = nla_len(tb[NL80211_KEY_DATA]); } if (tb[NL80211_KEY_SEQ]) { k->p.seq = nla_data(tb[NL80211_KEY_SEQ]); k->p.seq_len = nla_len(tb[NL80211_KEY_SEQ]); } if (tb[NL80211_KEY_CIPHER]) k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]); if (tb[NL80211_KEY_TYPE]) k->type = nla_get_u32(tb[NL80211_KEY_TYPE]); if (tb[NL80211_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; err = nla_parse_nested_deprecated(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, tb[NL80211_KEY_DEFAULT_TYPES], nl80211_key_default_policy, info->extack); if (err) return err; k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; } if (tb[NL80211_KEY_MODE]) k->p.mode = nla_get_u8(tb[NL80211_KEY_MODE]); return 0; } static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) { if (info->attrs[NL80211_ATTR_KEY_DATA]) { k->p.key = nla_data(info->attrs[NL80211_ATTR_KEY_DATA]); k->p.key_len = nla_len(info->attrs[NL80211_ATTR_KEY_DATA]); } if (info->attrs[NL80211_ATTR_KEY_SEQ]) { k->p.seq = nla_data(info->attrs[NL80211_ATTR_KEY_SEQ]); k->p.seq_len = nla_len(info->attrs[NL80211_ATTR_KEY_SEQ]); } if (info->attrs[NL80211_ATTR_KEY_IDX]) k->idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); if (info->attrs[NL80211_ATTR_KEY_CIPHER]) k->p.cipher = nla_get_u32(info->attrs[NL80211_ATTR_KEY_CIPHER]); k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; if (k->def) { k->def_uni = true; k->def_multi = true; } if (k->defmgmt) k->def_multi = true; if (info->attrs[NL80211_ATTR_KEY_TYPE]) k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); if (info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES]) { struct nlattr *kdt[NUM_NL80211_KEY_DEFAULT_TYPES]; int err = nla_parse_nested_deprecated(kdt, NUM_NL80211_KEY_DEFAULT_TYPES - 1, info->attrs[NL80211_ATTR_KEY_DEFAULT_TYPES], nl80211_key_default_policy, info->extack); if (err) return err; k->def_uni = kdt[NL80211_KEY_DEFAULT_TYPE_UNICAST]; k->def_multi = kdt[NL80211_KEY_DEFAULT_TYPE_MULTICAST]; } return 0; } static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) { int err; memset(k, 0, sizeof(*k)); k->idx = -1; k->type = -1; if (info->attrs[NL80211_ATTR_KEY]) err = nl80211_parse_key_new(info, info->attrs[NL80211_ATTR_KEY], k); else err = nl80211_parse_key_old(info, k); if (err) return err; if ((k->def ? 1 : 0) + (k->defmgmt ? 1 : 0) + (k->defbeacon ? 1 : 0) > 1) { GENL_SET_ERR_MSG(info, "key with multiple default flags is invalid"); return -EINVAL; } if (k->defmgmt || k->defbeacon) { if (k->def_uni || !k->def_multi) { GENL_SET_ERR_MSG(info, "defmgmt/defbeacon key must be mcast"); return -EINVAL; } } if (k->idx != -1) { if (k->defmgmt) { if (k->idx < 4 || k->idx > 5) { GENL_SET_ERR_MSG(info, "defmgmt key idx not 4 or 5"); return -EINVAL; } } else if (k->defbeacon) { if (k->idx < 6 || k->idx > 7) { GENL_SET_ERR_MSG(info, "defbeacon key idx not 6 or 7"); return -EINVAL; } } else if (k->def) { if (k->idx < 0 || k->idx > 3) { GENL_SET_ERR_MSG(info, "def key idx not 0-3"); return -EINVAL; } } else { if (k->idx < 0 || k->idx > 7) { GENL_SET_ERR_MSG(info, "key idx not 0-7"); return -EINVAL; } } } return 0; } static struct cfg80211_cached_keys * nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, struct genl_info *info, bool *no_ht) { struct nlattr *keys = info->attrs[NL80211_ATTR_KEYS]; struct key_parse parse; struct nlattr *key; struct cfg80211_cached_keys *result; int rem, err, def = 0; bool have_key = false; nla_for_each_nested(key, keys, rem) { have_key = true; break; } if (!have_key) return NULL; result = kzalloc(sizeof(*result), GFP_KERNEL); if (!result) return ERR_PTR(-ENOMEM); result->def = -1; nla_for_each_nested(key, keys, rem) { memset(&parse, 0, sizeof(parse)); parse.idx = -1; err = nl80211_parse_key_new(info, key, &parse); if (err) goto error; err = -EINVAL; if (!parse.p.key) goto error; if (parse.idx < 0 || parse.idx > 3) { GENL_SET_ERR_MSG(info, "key index out of range [0-3]"); goto error; } if (parse.def) { if (def) { GENL_SET_ERR_MSG(info, "only one key can be default"); goto error; } def = 1; result->def = parse.idx; if (!parse.def_uni || !parse.def_multi) goto error; } else if (parse.defmgmt) goto error; err = cfg80211_validate_key_settings(rdev, &parse.p, parse.idx, false, NULL); if (err) goto error; if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { GENL_SET_ERR_MSG(info, "connect key must be WEP"); err = -EINVAL; goto error; } result->params[parse.idx].cipher = parse.p.cipher; result->params[parse.idx].key_len = parse.p.key_len; result->params[parse.idx].key = result->data[parse.idx]; memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len); /* must be WEP key if we got here */ if (no_ht) *no_ht = true; } if (result->def < 0) { err = -EINVAL; GENL_SET_ERR_MSG(info, "need a default/TX key"); goto error; } return result; error: kfree(result); return ERR_PTR(err); } static int nl80211_key_allowed(struct wireless_dev *wdev) { lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.current_bss) return 0; return -ENOLINK; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) return 0; return -ENOLINK; case NL80211_IFTYPE_NAN: if (wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return 0; return -EINVAL; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: return -EINVAL; } return 0; } static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy, u32 freq) { struct ieee80211_channel *chan; chan = ieee80211_get_channel_khz(wiphy, freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) return NULL; return chan; } static int nl80211_put_iftypes(struct sk_buff *msg, u32 attr, u16 ifmodes) { struct nlattr *nl_modes = nla_nest_start_noflag(msg, attr); int i; if (!nl_modes) goto nla_put_failure; i = 0; while (ifmodes) { if ((ifmodes & 1) && nla_put_flag(msg, i)) goto nla_put_failure; ifmodes >>= 1; i++; } nla_nest_end(msg, nl_modes); return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_put_ifcomb_data(struct sk_buff *msg, bool large, int idx, const struct ieee80211_iface_combination *c, u16 nested) { struct nlattr *nl_combi, *nl_limits; int i; nl_combi = nla_nest_start_noflag(msg, idx | nested); if (!nl_combi) goto nla_put_failure; nl_limits = nla_nest_start_noflag(msg, NL80211_IFACE_COMB_LIMITS | nested); if (!nl_limits) goto nla_put_failure; for (i = 0; i < c->n_limits; i++) { struct nlattr *nl_limit; nl_limit = nla_nest_start_noflag(msg, i + 1); if (!nl_limit) goto nla_put_failure; if (nla_put_u32(msg, NL80211_IFACE_LIMIT_MAX, c->limits[i].max)) goto nla_put_failure; if (nl80211_put_iftypes(msg, NL80211_IFACE_LIMIT_TYPES, c->limits[i].types)) goto nla_put_failure; nla_nest_end(msg, nl_limit); } nla_nest_end(msg, nl_limits); if (c->beacon_int_infra_match && nla_put_flag(msg, NL80211_IFACE_COMB_STA_AP_BI_MATCH)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_IFACE_COMB_NUM_CHANNELS, c->num_different_channels) || nla_put_u32(msg, NL80211_IFACE_COMB_MAXNUM, c->max_interfaces)) goto nla_put_failure; if (large && (nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_WIDTHS, c->radar_detect_widths) || nla_put_u32(msg, NL80211_IFACE_COMB_RADAR_DETECT_REGIONS, c->radar_detect_regions))) goto nla_put_failure; if (c->beacon_int_min_gcd && nla_put_u32(msg, NL80211_IFACE_COMB_BI_MIN_GCD, c->beacon_int_min_gcd)) goto nla_put_failure; nla_nest_end(msg, nl_combi); return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_put_iface_combinations(struct wiphy *wiphy, struct sk_buff *msg, int attr, int radio, bool large, u16 nested) { const struct ieee80211_iface_combination *c; struct nlattr *nl_combis; int i, n; nl_combis = nla_nest_start_noflag(msg, attr | nested); if (!nl_combis) goto nla_put_failure; if (radio >= 0) { c = wiphy->radio[0].iface_combinations; n = wiphy->radio[0].n_iface_combinations; } else { c = wiphy->iface_combinations; n = wiphy->n_iface_combinations; } for (i = 0; i < n; i++) if (nl80211_put_ifcomb_data(msg, large, i + 1, &c[i], nested)) goto nla_put_failure; nla_nest_end(msg, nl_combis); return 0; nla_put_failure: return -ENOBUFS; } #ifdef CONFIG_PM static int nl80211_send_wowlan_tcp_caps(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { const struct wiphy_wowlan_tcp_support *tcp = rdev->wiphy.wowlan->tcp; struct nlattr *nl_tcp; if (!tcp) return 0; nl_tcp = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, tcp->data_payload_max)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, tcp->data_payload_max)) return -ENOBUFS; if (tcp->seq && nla_put_flag(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ)) return -ENOBUFS; if (tcp->tok && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, sizeof(*tcp->tok), tcp->tok)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, tcp->data_interval_max)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, tcp->wake_payload_max)) return -ENOBUFS; nla_nest_end(msg, nl_tcp); return 0; } static int nl80211_send_wowlan(struct sk_buff *msg, struct cfg80211_registered_device *rdev, bool large) { struct nlattr *nl_wowlan; if (!rdev->wiphy.wowlan) return 0; nl_wowlan = nla_nest_start_noflag(msg, NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED); if (!nl_wowlan) return -ENOBUFS; if (((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_ANY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_DISCONNECT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE) && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) return -ENOBUFS; if (rdev->wiphy.wowlan->n_patterns) { struct nl80211_pattern_support pat = { .max_patterns = rdev->wiphy.wowlan->n_patterns, .min_pattern_len = rdev->wiphy.wowlan->pattern_min_len, .max_pattern_len = rdev->wiphy.wowlan->pattern_max_len, .max_pkt_offset = rdev->wiphy.wowlan->max_pkt_offset, }; if (nla_put(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, sizeof(pat), &pat)) return -ENOBUFS; } if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) && nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT, rdev->wiphy.wowlan->max_nd_match_sets)) return -ENOBUFS; if (large && nl80211_send_wowlan_tcp_caps(rdev, msg)) return -ENOBUFS; nla_nest_end(msg, nl_wowlan); return 0; } #endif static int nl80211_send_coalesce(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct nl80211_coalesce_rule_support rule; if (!rdev->wiphy.coalesce) return 0; rule.max_rules = rdev->wiphy.coalesce->n_rules; rule.max_delay = rdev->wiphy.coalesce->max_delay; rule.pat.max_patterns = rdev->wiphy.coalesce->n_patterns; rule.pat.min_pattern_len = rdev->wiphy.coalesce->pattern_min_len; rule.pat.max_pattern_len = rdev->wiphy.coalesce->pattern_max_len; rule.pat.max_pkt_offset = rdev->wiphy.coalesce->max_pkt_offset; if (nla_put(msg, NL80211_ATTR_COALESCE_RULE, sizeof(rule), &rule)) return -ENOBUFS; return 0; } static int nl80211_send_iftype_data(struct sk_buff *msg, const struct ieee80211_supported_band *sband, const struct ieee80211_sband_iftype_data *iftdata) { const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap; const struct ieee80211_sta_eht_cap *eht_cap = &iftdata->eht_cap; if (nl80211_put_iftypes(msg, NL80211_BAND_IFTYPE_ATTR_IFTYPES, iftdata->types_mask)) return -ENOBUFS; if (he_cap->has_he) { if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MAC, sizeof(he_cap->he_cap_elem.mac_cap_info), he_cap->he_cap_elem.mac_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY, sizeof(he_cap->he_cap_elem.phy_cap_info), he_cap->he_cap_elem.phy_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET, sizeof(he_cap->he_mcs_nss_supp), &he_cap->he_mcs_nss_supp) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE, sizeof(he_cap->ppe_thres), he_cap->ppe_thres)) return -ENOBUFS; } if (eht_cap->has_eht && he_cap->has_he) { u8 mcs_nss_size, ppe_thresh_size; u16 ppe_thres_hdr; bool is_ap; is_ap = iftdata->types_mask & BIT(NL80211_IFTYPE_AP) || iftdata->types_mask & BIT(NL80211_IFTYPE_P2P_GO); mcs_nss_size = ieee80211_eht_mcs_nss_size(&he_cap->he_cap_elem, &eht_cap->eht_cap_elem, is_ap); ppe_thres_hdr = get_unaligned_le16(&eht_cap->eht_ppe_thres[0]); ppe_thresh_size = ieee80211_eht_ppe_size(ppe_thres_hdr, eht_cap->eht_cap_elem.phy_cap_info); if (nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MAC, sizeof(eht_cap->eht_cap_elem.mac_cap_info), eht_cap->eht_cap_elem.mac_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PHY, sizeof(eht_cap->eht_cap_elem.phy_cap_info), eht_cap->eht_cap_elem.phy_cap_info) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_MCS_SET, mcs_nss_size, &eht_cap->eht_mcs_nss_supp) || nla_put(msg, NL80211_BAND_IFTYPE_ATTR_EHT_CAP_PPE, ppe_thresh_size, eht_cap->eht_ppe_thres)) return -ENOBUFS; } if (sband->band == NL80211_BAND_6GHZ && nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA, sizeof(iftdata->he_6ghz_capa), &iftdata->he_6ghz_capa)) return -ENOBUFS; if (iftdata->vendor_elems.data && iftdata->vendor_elems.len && nla_put(msg, NL80211_BAND_IFTYPE_ATTR_VENDOR_ELEMS, iftdata->vendor_elems.len, iftdata->vendor_elems.data)) return -ENOBUFS; return 0; } static int nl80211_send_band_rateinfo(struct sk_buff *msg, struct ieee80211_supported_band *sband, bool large) { struct nlattr *nl_rates, *nl_rate; struct ieee80211_rate *rate; int i; /* add HT info */ if (sband->ht_cap.ht_supported && (nla_put(msg, NL80211_BAND_ATTR_HT_MCS_SET, sizeof(sband->ht_cap.mcs), &sband->ht_cap.mcs) || nla_put_u16(msg, NL80211_BAND_ATTR_HT_CAPA, sband->ht_cap.cap) || nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_FACTOR, sband->ht_cap.ampdu_factor) || nla_put_u8(msg, NL80211_BAND_ATTR_HT_AMPDU_DENSITY, sband->ht_cap.ampdu_density))) return -ENOBUFS; /* add VHT info */ if (sband->vht_cap.vht_supported && (nla_put(msg, NL80211_BAND_ATTR_VHT_MCS_SET, sizeof(sband->vht_cap.vht_mcs), &sband->vht_cap.vht_mcs) || nla_put_u32(msg, NL80211_BAND_ATTR_VHT_CAPA, sband->vht_cap.cap))) return -ENOBUFS; if (large && sband->n_iftype_data) { struct nlattr *nl_iftype_data = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_IFTYPE_DATA); const struct ieee80211_sband_iftype_data *iftd; int err; if (!nl_iftype_data) return -ENOBUFS; for_each_sband_iftype_data(sband, i, iftd) { struct nlattr *iftdata; iftdata = nla_nest_start_noflag(msg, i + 1); if (!iftdata) return -ENOBUFS; err = nl80211_send_iftype_data(msg, sband, iftd); if (err) return err; nla_nest_end(msg, iftdata); } nla_nest_end(msg, nl_iftype_data); } /* add EDMG info */ if (large && sband->edmg_cap.channels && (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS, sband->edmg_cap.channels) || nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG, sband->edmg_cap.bw_config))) return -ENOBUFS; /* add bitrates */ nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) return -ENOBUFS; for (i = 0; i < sband->n_bitrates; i++) { nl_rate = nla_nest_start_noflag(msg, i); if (!nl_rate) return -ENOBUFS; rate = &sband->bitrates[i]; if (nla_put_u32(msg, NL80211_BITRATE_ATTR_RATE, rate->bitrate)) return -ENOBUFS; if ((rate->flags & IEEE80211_RATE_SHORT_PREAMBLE) && nla_put_flag(msg, NL80211_BITRATE_ATTR_2GHZ_SHORTPREAMBLE)) return -ENOBUFS; nla_nest_end(msg, nl_rate); } nla_nest_end(msg, nl_rates); /* S1G capabilities */ if (sband->band == NL80211_BAND_S1GHZ && sband->s1g_cap.s1g && (nla_put(msg, NL80211_BAND_ATTR_S1G_CAPA, sizeof(sband->s1g_cap.cap), sband->s1g_cap.cap) || nla_put(msg, NL80211_BAND_ATTR_S1G_MCS_NSS_SET, sizeof(sband->s1g_cap.nss_mcs), sband->s1g_cap.nss_mcs))) return -ENOBUFS; return 0; } static int nl80211_send_mgmt_stypes(struct sk_buff *msg, const struct ieee80211_txrx_stypes *mgmt_stypes) { u16 stypes; struct nlattr *nl_ftypes, *nl_ifs; enum nl80211_iftype ift; int i; if (!mgmt_stypes) return 0; nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_TX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; stypes = mgmt_stypes[ift].tx; while (stypes) { if ((stypes & 1) && nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, (i << 4) | IEEE80211_FTYPE_MGMT)) return -ENOBUFS; stypes >>= 1; i++; } nla_nest_end(msg, nl_ftypes); } nla_nest_end(msg, nl_ifs); nl_ifs = nla_nest_start_noflag(msg, NL80211_ATTR_RX_FRAME_TYPES); if (!nl_ifs) return -ENOBUFS; for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { nl_ftypes = nla_nest_start_noflag(msg, ift); if (!nl_ftypes) return -ENOBUFS; i = 0; stypes = mgmt_stypes[ift].rx; while (stypes) { if ((stypes & 1) && nla_put_u16(msg, NL80211_ATTR_FRAME_TYPE, (i << 4) | IEEE80211_FTYPE_MGMT)) return -ENOBUFS; stypes >>= 1; i++; } nla_nest_end(msg, nl_ftypes); } nla_nest_end(msg, nl_ifs); return 0; } #define CMD(op, n) \ do { \ if (rdev->ops->op) { \ i++; \ if (nla_put_u32(msg, i, NL80211_CMD_ ## n)) \ goto nla_put_failure; \ } \ } while (0) static int nl80211_add_commands_unsplit(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { int i = 0; /* * do *NOT* add anything into this function, new things need to be * advertised only to new versions of userspace that can deal with * the split (and they can't possibly care about new features... */ CMD(add_virtual_intf, NEW_INTERFACE); CMD(change_virtual_intf, SET_INTERFACE); CMD(add_key, NEW_KEY); CMD(start_ap, START_AP); CMD(add_station, NEW_STATION); CMD(add_mpath, NEW_MPATH); CMD(update_mesh_config, SET_MESH_CONFIG); CMD(change_bss, SET_BSS); CMD(auth, AUTHENTICATE); CMD(assoc, ASSOCIATE); CMD(deauth, DEAUTHENTICATE); CMD(disassoc, DISASSOCIATE); CMD(join_ibss, JOIN_IBSS); CMD(join_mesh, JOIN_MESH); CMD(set_pmksa, SET_PMKSA); CMD(del_pmksa, DEL_PMKSA); CMD(flush_pmksa, FLUSH_PMKSA); if (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); CMD(mgmt_tx, FRAME); CMD(mgmt_tx_cancel_wait, FRAME_WAIT_CANCEL); if (rdev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_WIPHY_NETNS)) goto nla_put_failure; } if (rdev->ops->set_monitor_channel || rdev->ops->start_ap || rdev->ops->join_mesh) { i++; if (nla_put_u32(msg, i, NL80211_CMD_SET_CHANNEL)) goto nla_put_failure; } if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) { CMD(tdls_mgmt, TDLS_MGMT); CMD(tdls_oper, TDLS_OPER); } if (rdev->wiphy.max_sched_scan_reqs) CMD(sched_scan_start, START_SCHED_SCAN); CMD(probe_client, PROBE_CLIENT); CMD(set_noack_map, SET_NOACK_MAP); if (rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) { i++; if (nla_put_u32(msg, i, NL80211_CMD_REGISTER_BEACONS)) goto nla_put_failure; } CMD(start_p2p_device, START_P2P_DEVICE); CMD(set_mcast_rate, SET_MCAST_RATE); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); #endif if (rdev->ops->connect || rdev->ops->auth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_CONNECT)) goto nla_put_failure; } if (rdev->ops->disconnect || rdev->ops->deauth) { i++; if (nla_put_u32(msg, i, NL80211_CMD_DISCONNECT)) goto nla_put_failure; } return i; nla_put_failure: return -ENOBUFS; } static int nl80211_send_pmsr_ftm_capa(const struct cfg80211_pmsr_capabilities *cap, struct sk_buff *msg) { struct nlattr *ftm; if (!cap->ftm.supported) return 0; ftm = nla_nest_start_noflag(msg, NL80211_PMSR_TYPE_FTM); if (!ftm) return -ENOBUFS; if (cap->ftm.asap && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_ASAP)) return -ENOBUFS; if (cap->ftm.non_asap && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_NON_ASAP)) return -ENOBUFS; if (cap->ftm.request_lci && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_REQ_LCI)) return -ENOBUFS; if (cap->ftm.request_civicloc && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_REQ_CIVICLOC)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_PREAMBLES, cap->ftm.preambles)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_BANDWIDTHS, cap->ftm.bandwidths)) return -ENOBUFS; if (cap->ftm.max_bursts_exponent >= 0 && nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_BURSTS_EXPONENT, cap->ftm.max_bursts_exponent)) return -ENOBUFS; if (cap->ftm.max_ftms_per_burst && nla_put_u32(msg, NL80211_PMSR_FTM_CAPA_ATTR_MAX_FTMS_PER_BURST, cap->ftm.max_ftms_per_burst)) return -ENOBUFS; if (cap->ftm.trigger_based && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_TRIGGER_BASED)) return -ENOBUFS; if (cap->ftm.non_trigger_based && nla_put_flag(msg, NL80211_PMSR_FTM_CAPA_ATTR_NON_TRIGGER_BASED)) return -ENOBUFS; nla_nest_end(msg, ftm); return 0; } static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { const struct cfg80211_pmsr_capabilities *cap = rdev->wiphy.pmsr_capa; struct nlattr *pmsr, *caps; if (!cap) return 0; /* * we don't need to clean up anything here since the caller * will genlmsg_cancel() if we fail */ pmsr = nla_nest_start_noflag(msg, NL80211_ATTR_PEER_MEASUREMENTS); if (!pmsr) return -ENOBUFS; if (nla_put_u32(msg, NL80211_PMSR_ATTR_MAX_PEERS, cap->max_peers)) return -ENOBUFS; if (cap->report_ap_tsf && nla_put_flag(msg, NL80211_PMSR_ATTR_REPORT_AP_TSF)) return -ENOBUFS; if (cap->randomize_mac_addr && nla_put_flag(msg, NL80211_PMSR_ATTR_RANDOMIZE_MAC_ADDR)) return -ENOBUFS; caps = nla_nest_start_noflag(msg, NL80211_PMSR_ATTR_TYPE_CAPA); if (!caps) return -ENOBUFS; if (nl80211_send_pmsr_ftm_capa(cap, msg)) return -ENOBUFS; nla_nest_end(msg, caps); nla_nest_end(msg, pmsr); return 0; } static int nl80211_put_iftype_akm_suites(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { int i; struct nlattr *nested, *nested_akms; const struct wiphy_iftype_akm_suites *iftype_akms; if (!rdev->wiphy.num_iftype_akm_suites || !rdev->wiphy.iftype_akm_suites) return 0; nested = nla_nest_start(msg, NL80211_ATTR_IFTYPE_AKM_SUITES); if (!nested) return -ENOBUFS; for (i = 0; i < rdev->wiphy.num_iftype_akm_suites; i++) { nested_akms = nla_nest_start(msg, i + 1); if (!nested_akms) return -ENOBUFS; iftype_akms = &rdev->wiphy.iftype_akm_suites[i]; if (nl80211_put_iftypes(msg, NL80211_IFTYPE_AKM_ATTR_IFTYPES, iftype_akms->iftypes_mask)) return -ENOBUFS; if (nla_put(msg, NL80211_IFTYPE_AKM_ATTR_SUITES, sizeof(u32) * iftype_akms->n_akm_suites, iftype_akms->akm_suites)) { return -ENOBUFS; } nla_nest_end(msg, nested_akms); } nla_nest_end(msg, nested); return 0; } static int nl80211_put_tid_config_support(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { struct nlattr *supp; if (!rdev->wiphy.tid_config_support.vif && !rdev->wiphy.tid_config_support.peer) return 0; supp = nla_nest_start(msg, NL80211_ATTR_TID_CONFIG); if (!supp) return -ENOSPC; if (rdev->wiphy.tid_config_support.vif && nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_VIF_SUPP, rdev->wiphy.tid_config_support.vif, NL80211_TID_CONFIG_ATTR_PAD)) goto fail; if (rdev->wiphy.tid_config_support.peer && nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_PEER_SUPP, rdev->wiphy.tid_config_support.peer, NL80211_TID_CONFIG_ATTR_PAD)) goto fail; /* for now we just use the same value ... makes more sense */ if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_SHORT, rdev->wiphy.tid_config_support.max_retry)) goto fail; if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_LONG, rdev->wiphy.tid_config_support.max_retry)) goto fail; nla_nest_end(msg, supp); return 0; fail: nla_nest_cancel(msg, supp); return -ENOBUFS; } static int nl80211_put_sar_specs(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { struct nlattr *sar_capa, *specs, *sub_freq_range; u8 num_freq_ranges; int i; if (!rdev->wiphy.sar_capa) return 0; num_freq_ranges = rdev->wiphy.sar_capa->num_freq_ranges; sar_capa = nla_nest_start(msg, NL80211_ATTR_SAR_SPEC); if (!sar_capa) return -ENOSPC; if (nla_put_u32(msg, NL80211_SAR_ATTR_TYPE, rdev->wiphy.sar_capa->type)) goto fail; specs = nla_nest_start(msg, NL80211_SAR_ATTR_SPECS); if (!specs) goto fail; /* report supported freq_ranges */ for (i = 0; i < num_freq_ranges; i++) { sub_freq_range = nla_nest_start(msg, i + 1); if (!sub_freq_range) goto fail; if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_START_FREQ, rdev->wiphy.sar_capa->freq_ranges[i].start_freq)) goto fail; if (nla_put_u32(msg, NL80211_SAR_ATTR_SPECS_END_FREQ, rdev->wiphy.sar_capa->freq_ranges[i].end_freq)) goto fail; nla_nest_end(msg, sub_freq_range); } nla_nest_end(msg, specs); nla_nest_end(msg, sar_capa); return 0; fail: nla_nest_cancel(msg, sar_capa); return -ENOBUFS; } static int nl80211_put_mbssid_support(struct wiphy *wiphy, struct sk_buff *msg) { struct nlattr *config; if (!wiphy->mbssid_max_interfaces) return 0; config = nla_nest_start(msg, NL80211_ATTR_MBSSID_CONFIG); if (!config) return -ENOBUFS; if (nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_INTERFACES, wiphy->mbssid_max_interfaces)) goto fail; if (wiphy->ema_max_profile_periodicity && nla_put_u8(msg, NL80211_MBSSID_CONFIG_ATTR_MAX_EMA_PROFILE_PERIODICITY, wiphy->ema_max_profile_periodicity)) goto fail; nla_nest_end(msg, config); return 0; fail: nla_nest_cancel(msg, config); return -ENOBUFS; } static int nl80211_put_radio(struct wiphy *wiphy, struct sk_buff *msg, int idx) { const struct wiphy_radio *r = &wiphy->radio[idx]; struct nlattr *radio, *freq; int i; radio = nla_nest_start(msg, idx); if (!radio) return -ENOBUFS; if (nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_INDEX, idx)) goto nla_put_failure; if (r->antenna_mask && nla_put_u32(msg, NL80211_WIPHY_RADIO_ATTR_ANTENNA_MASK, r->antenna_mask)) goto nla_put_failure; for (i = 0; i < r->n_freq_range; i++) { const struct wiphy_radio_freq_range *range = &r->freq_range[i]; freq = nla_nest_start(msg, NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE); if (!freq) goto nla_put_failure; if (nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_START, range->start_freq) || nla_put_u32(msg, NL80211_WIPHY_RADIO_FREQ_ATTR_END, range->end_freq)) goto nla_put_failure; nla_nest_end(msg, freq); } for (i = 0; i < r->n_iface_combinations; i++) if (nl80211_put_ifcomb_data(msg, true, NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION, &r->iface_combinations[i], NLA_F_NESTED)) goto nla_put_failure; nla_nest_end(msg, radio); return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_put_radios(struct wiphy *wiphy, struct sk_buff *msg) { struct nlattr *radios; int i; if (!wiphy->n_radio) return 0; radios = nla_nest_start(msg, NL80211_ATTR_WIPHY_RADIOS); if (!radios) return -ENOBUFS; for (i = 0; i < wiphy->n_radio; i++) if (nl80211_put_radio(wiphy, msg, i)) goto fail; nla_nest_end(msg, radios); if (nl80211_put_iface_combinations(wiphy, msg, NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS, -1, true, NLA_F_NESTED)) return -ENOBUFS; return 0; fail: nla_nest_cancel(msg, radios); return -ENOBUFS; } struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; long split_start, band_start, chan_start, capa_start; bool split; }; static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd, struct sk_buff *msg, u32 portid, u32 seq, int flags, struct nl80211_dump_wiphy_state *state) { void *hdr; struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_cmds; enum nl80211_band band; struct ieee80211_channel *chan; int i; const struct ieee80211_txrx_stypes *mgmt_stypes = rdev->wiphy.mgmt_stypes; u32 features; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -ENOBUFS; if (WARN_ON(!state)) return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_string(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&rdev->wiphy)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, cfg80211_rdev_list_generation)) goto nla_put_failure; if (cmd != NL80211_CMD_NEW_WIPHY) goto finish; switch (state->split_start) { case 0: if (nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_SHORT, rdev->wiphy.retry_short) || nla_put_u8(msg, NL80211_ATTR_WIPHY_RETRY_LONG, rdev->wiphy.retry_long) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FRAG_THRESHOLD, rdev->wiphy.frag_threshold) || nla_put_u32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD, rdev->wiphy.rts_threshold) || nla_put_u8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS, rdev->wiphy.coverage_class) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, rdev->wiphy.max_scan_ssids) || nla_put_u8(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_SSIDS, rdev->wiphy.max_sched_scan_ssids) || nla_put_u16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, rdev->wiphy.max_scan_ie_len) || nla_put_u16(msg, NL80211_ATTR_MAX_SCHED_SCAN_IE_LEN, rdev->wiphy.max_sched_scan_ie_len) || nla_put_u8(msg, NL80211_ATTR_MAX_MATCH_SETS, rdev->wiphy.max_match_sets)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_IBSS_RSN)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_MESH_AUTH) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_MESH_AUTH)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) && nla_put_flag(msg, NL80211_ATTR_SUPPORT_AP_UAPSD)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_FW_ROAM) && nla_put_flag(msg, NL80211_ATTR_ROAM_SUPPORT)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) && nla_put_flag(msg, NL80211_ATTR_TDLS_SUPPORT)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP) && nla_put_flag(msg, NL80211_ATTR_TDLS_EXTERNAL_SETUP)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 1: if (nla_put(msg, NL80211_ATTR_CIPHER_SUITES, sizeof(u32) * rdev->wiphy.n_cipher_suites, rdev->wiphy.cipher_suites)) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, rdev->wiphy.max_num_pmkids)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_TX, rdev->wiphy.available_antennas_tx) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX, rdev->wiphy.available_antennas_rx)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD) && nla_put_u32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD, rdev->wiphy.probe_resp_offload)) goto nla_put_failure; if ((rdev->wiphy.available_antennas_tx || rdev->wiphy.available_antennas_rx) && rdev->ops->get_antenna) { u32 tx_ant = 0, rx_ant = 0; int res; res = rdev_get_antenna(rdev, &tx_ant, &rx_ant); if (!res) { if (nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_TX, tx_ant) || nla_put_u32(msg, NL80211_ATTR_WIPHY_ANTENNA_RX, rx_ant)) goto nla_put_failure; } } state->split_start++; if (state->split) break; fallthrough; case 2: if (nl80211_put_iftypes(msg, NL80211_ATTR_SUPPORTED_IFTYPES, rdev->wiphy.interface_modes)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 3: nl_bands = nla_nest_start_noflag(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; for (band = state->band_start; band < (state->split ? NUM_NL80211_BANDS : NL80211_BAND_60GHZ + 1); band++) { struct ieee80211_supported_band *sband; /* omit higher bands for ancient software */ if (band > NL80211_BAND_5GHZ && !state->split) break; sband = rdev->wiphy.bands[band]; if (!sband) continue; nl_band = nla_nest_start_noflag(msg, band); if (!nl_band) goto nla_put_failure; switch (state->chan_start) { case 0: if (nl80211_send_band_rateinfo(msg, sband, state->split)) goto nla_put_failure; state->chan_start++; if (state->split) break; fallthrough; default: /* add frequencies */ nl_freqs = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_FREQS); if (!nl_freqs) goto nla_put_failure; for (i = state->chan_start - 1; i < sband->n_channels; i++) { nl_freq = nla_nest_start_noflag(msg, i); if (!nl_freq) goto nla_put_failure; chan = &sband->channels[i]; if (nl80211_msg_put_channel( msg, &rdev->wiphy, chan, state->split)) goto nla_put_failure; nla_nest_end(msg, nl_freq); if (state->split) break; } if (i < sband->n_channels) state->chan_start = i + 2; else state->chan_start = 0; nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_band); if (state->split) { /* start again here */ if (state->chan_start) band--; break; } } nla_nest_end(msg, nl_bands); if (band < NUM_NL80211_BANDS) state->band_start = band + 1; else state->band_start = 0; /* if bands & channels are done, continue outside */ if (state->band_start == 0 && state->chan_start == 0) state->split_start++; if (state->split) break; fallthrough; case 4: nl_cmds = nla_nest_start_noflag(msg, NL80211_ATTR_SUPPORTED_COMMANDS); if (!nl_cmds) goto nla_put_failure; i = nl80211_add_commands_unsplit(rdev, msg); if (i < 0) goto nla_put_failure; if (state->split) { CMD(crit_proto_start, CRIT_PROTOCOL_START); CMD(crit_proto_stop, CRIT_PROTOCOL_STOP); if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH) CMD(channel_switch, CHANNEL_SWITCH); CMD(set_qos_map, SET_QOS_MAP); if (rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION) CMD(add_tx_ts, ADD_TX_TS); CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); CMD(update_connect_params, UPDATE_CONNECT_PARAMS); CMD(update_ft_ies, UPDATE_FT_IES); if (rdev->wiphy.sar_capa) CMD(set_sar_specs, SET_SAR_SPECS); } #undef CMD nla_nest_end(msg, nl_cmds); state->split_start++; if (state->split) break; fallthrough; case 5: if (rdev->ops->remain_on_channel && (rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL) && nla_put_u32(msg, NL80211_ATTR_MAX_REMAIN_ON_CHANNEL_DURATION, rdev->wiphy.max_remain_on_channel_duration)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX) && nla_put_flag(msg, NL80211_ATTR_OFFCHANNEL_TX_OK)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 6: #ifdef CONFIG_PM if (nl80211_send_wowlan(msg, rdev, state->split)) goto nla_put_failure; state->split_start++; if (state->split) break; #else state->split_start++; #endif fallthrough; case 7: if (nl80211_put_iftypes(msg, NL80211_ATTR_SOFTWARE_IFTYPES, rdev->wiphy.software_iftypes)) goto nla_put_failure; if (nl80211_put_iface_combinations(&rdev->wiphy, msg, NL80211_ATTR_INTERFACE_COMBINATIONS, rdev->wiphy.n_radio ? 0 : -1, state->split, 0)) goto nla_put_failure; state->split_start++; if (state->split) break; fallthrough; case 8: if ((rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME) && nla_put_u32(msg, NL80211_ATTR_DEVICE_AP_SME, rdev->wiphy.ap_sme_capa)) goto nla_put_failure; features = rdev->wiphy.features; /* * We can only add the per-channel limit information if the * dump is split, otherwise it makes it too big. Therefore * only advertise it in that case. */ if (state->split) features |= NL80211_FEATURE_ADVERTISE_CHAN_LIMITS; if (nla_put_u32(msg, NL80211_ATTR_FEATURE_FLAGS, features)) goto nla_put_failure; if (rdev->wiphy.ht_capa_mod_mask && nla_put(msg, NL80211_ATTR_HT_CAPABILITY_MASK, sizeof(*rdev->wiphy.ht_capa_mod_mask), rdev->wiphy.ht_capa_mod_mask)) goto nla_put_failure; if (rdev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME && rdev->wiphy.max_acl_mac_addrs && nla_put_u32(msg, NL80211_ATTR_MAC_ACL_MAX, rdev->wiphy.max_acl_mac_addrs)) goto nla_put_failure; /* * Any information below this point is only available to * applications that can deal with it being split. This * helps ensure that newly added capabilities don't break * older tools by overrunning their buffers. * * We still increment split_start so that in the split * case we'll continue with more data in the next round, * but break unconditionally so unsplit data stops here. */ if (state->split) state->split_start++; else state->split_start = 0; break; case 9: if (nl80211_send_mgmt_stypes(msg, mgmt_stypes)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_MAX_NUM_SCHED_SCAN_PLANS, rdev->wiphy.max_sched_scan_plans) || nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_INTERVAL, rdev->wiphy.max_sched_scan_plan_interval) || nla_put_u32(msg, NL80211_ATTR_MAX_SCAN_PLAN_ITERATIONS, rdev->wiphy.max_sched_scan_plan_iterations)) goto nla_put_failure; if (rdev->wiphy.extended_capabilities && (nla_put(msg, NL80211_ATTR_EXT_CAPA, rdev->wiphy.extended_capabilities_len, rdev->wiphy.extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, rdev->wiphy.extended_capabilities_len, rdev->wiphy.extended_capabilities_mask))) goto nla_put_failure; if (rdev->wiphy.vht_capa_mod_mask && nla_put(msg, NL80211_ATTR_VHT_CAPABILITY_MASK, sizeof(*rdev->wiphy.vht_capa_mod_mask), rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, rdev->wiphy.perm_addr)) goto nla_put_failure; if (!is_zero_ether_addr(rdev->wiphy.addr_mask) && nla_put(msg, NL80211_ATTR_MAC_MASK, ETH_ALEN, rdev->wiphy.addr_mask)) goto nla_put_failure; if (rdev->wiphy.n_addresses > 1) { void *attr; attr = nla_nest_start(msg, NL80211_ATTR_MAC_ADDRS); if (!attr) goto nla_put_failure; for (i = 0; i < rdev->wiphy.n_addresses; i++) if (nla_put(msg, i + 1, ETH_ALEN, rdev->wiphy.addresses[i].addr)) goto nla_put_failure; nla_nest_end(msg, attr); } state->split_start++; break; case 10: if (nl80211_send_coalesce(msg, rdev)) goto nla_put_failure; if ((rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ) && (nla_put_flag(msg, NL80211_ATTR_SUPPORT_5_MHZ) || nla_put_flag(msg, NL80211_ATTR_SUPPORT_10_MHZ))) goto nla_put_failure; if (rdev->wiphy.max_ap_assoc_sta && nla_put_u32(msg, NL80211_ATTR_MAX_AP_ASSOC_STA, rdev->wiphy.max_ap_assoc_sta)) goto nla_put_failure; state->split_start++; break; case 11: if (rdev->wiphy.n_vendor_commands) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; nested = nla_nest_start_noflag(msg, NL80211_ATTR_VENDOR_DATA); if (!nested) goto nla_put_failure; for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { info = &rdev->wiphy.vendor_commands[i].info; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } if (rdev->wiphy.n_vendor_events) { const struct nl80211_vendor_cmd_info *info; struct nlattr *nested; nested = nla_nest_start_noflag(msg, NL80211_ATTR_VENDOR_EVENTS); if (!nested) goto nla_put_failure; for (i = 0; i < rdev->wiphy.n_vendor_events; i++) { info = &rdev->wiphy.vendor_events[i]; if (nla_put(msg, i + 1, sizeof(*info), info)) goto nla_put_failure; } nla_nest_end(msg, nested); } state->split_start++; break; case 12: if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH && nla_put_u8(msg, NL80211_ATTR_MAX_CSA_COUNTERS, rdev->wiphy.max_num_csa_counters)) goto nla_put_failure; if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; if (rdev->wiphy.max_sched_scan_reqs && nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_MAX_REQS, rdev->wiphy.max_sched_scan_reqs)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_EXT_FEATURES, sizeof(rdev->wiphy.ext_features), rdev->wiphy.ext_features)) goto nla_put_failure; if (rdev->wiphy.bss_select_support) { struct nlattr *nested; u32 bss_select_support = rdev->wiphy.bss_select_support; nested = nla_nest_start_noflag(msg, NL80211_ATTR_BSS_SELECT); if (!nested) goto nla_put_failure; i = 0; while (bss_select_support) { if ((bss_select_support & 1) && nla_put_flag(msg, i)) goto nla_put_failure; i++; bss_select_support >>= 1; } nla_nest_end(msg, nested); } state->split_start++; break; case 13: if (rdev->wiphy.num_iftype_ext_capab && rdev->wiphy.iftype_ext_capab) { struct nlattr *nested_ext_capab, *nested; nested = nla_nest_start_noflag(msg, NL80211_ATTR_IFTYPE_EXT_CAPA); if (!nested) goto nla_put_failure; for (i = state->capa_start; i < rdev->wiphy.num_iftype_ext_capab; i++) { const struct wiphy_iftype_ext_capab *capab; capab = &rdev->wiphy.iftype_ext_capab[i]; nested_ext_capab = nla_nest_start_noflag(msg, i); if (!nested_ext_capab || nla_put_u32(msg, NL80211_ATTR_IFTYPE, capab->iftype) || nla_put(msg, NL80211_ATTR_EXT_CAPA, capab->extended_capabilities_len, capab->extended_capabilities) || nla_put(msg, NL80211_ATTR_EXT_CAPA_MASK, capab->extended_capabilities_len, capab->extended_capabilities_mask)) goto nla_put_failure; if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO && (nla_put_u16(msg, NL80211_ATTR_EML_CAPABILITY, capab->eml_capabilities) || nla_put_u16(msg, NL80211_ATTR_MLD_CAPA_AND_OPS, capab->mld_capa_and_ops))) goto nla_put_failure; nla_nest_end(msg, nested_ext_capab); if (state->split) break; } nla_nest_end(msg, nested); if (i < rdev->wiphy.num_iftype_ext_capab) { state->capa_start = i + 1; break; } } if (nla_put_u32(msg, NL80211_ATTR_BANDS, rdev->wiphy.nan_supported_bands)) goto nla_put_failure; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) { struct cfg80211_txq_stats txqstats = {}; int res; res = rdev_get_txq_stats(rdev, NULL, &txqstats); if (!res && !nl80211_put_txq_stats(msg, &txqstats, NL80211_ATTR_TXQ_STATS)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_TXQ_LIMIT, rdev->wiphy.txq_limit)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_TXQ_MEMORY_LIMIT, rdev->wiphy.txq_memory_limit)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_TXQ_QUANTUM, rdev->wiphy.txq_quantum)) goto nla_put_failure; } state->split_start++; break; case 14: if (nl80211_send_pmsr_capa(rdev, msg)) goto nla_put_failure; state->split_start++; break; case 15: if (rdev->wiphy.akm_suites && nla_put(msg, NL80211_ATTR_AKM_SUITES, sizeof(u32) * rdev->wiphy.n_akm_suites, rdev->wiphy.akm_suites)) goto nla_put_failure; if (nl80211_put_iftype_akm_suites(rdev, msg)) goto nla_put_failure; if (nl80211_put_tid_config_support(rdev, msg)) goto nla_put_failure; state->split_start++; break; case 16: if (nl80211_put_sar_specs(rdev, msg)) goto nla_put_failure; if (nl80211_put_mbssid_support(&rdev->wiphy, msg)) goto nla_put_failure; if (nla_put_u16(msg, NL80211_ATTR_MAX_NUM_AKM_SUITES, rdev->wiphy.max_num_akm_suites)) goto nla_put_failure; if (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO) nla_put_flag(msg, NL80211_ATTR_MLO_SUPPORT); if (rdev->wiphy.hw_timestamp_max_peers && nla_put_u16(msg, NL80211_ATTR_MAX_HW_TIMESTAMP_PEERS, rdev->wiphy.hw_timestamp_max_peers)) goto nla_put_failure; state->split_start++; break; case 17: if (nl80211_put_radios(&rdev->wiphy, msg)) goto nla_put_failure; /* done */ state->split_start = 0; break; } finish: genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { struct nlattr **tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL); int ret; if (!tb) return -ENOMEM; ret = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, tb, nl80211_fam.maxattr, nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ if (ret) { ret = 0; goto out; } state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; if (tb[NL80211_ATTR_WIPHY]) state->filter_wiphy = nla_get_u32(tb[NL80211_ATTR_WIPHY]); if (tb[NL80211_ATTR_WDEV]) state->filter_wiphy = nla_get_u64(tb[NL80211_ATTR_WDEV]) >> 32; if (tb[NL80211_ATTR_IFINDEX]) { struct net_device *netdev; struct cfg80211_registered_device *rdev; int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); if (!netdev) { ret = -ENODEV; goto out; } if (netdev->ieee80211_ptr) { rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); state->filter_wiphy = rdev->wiphy_idx; } } ret = 0; out: kfree(tb); return ret; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) { int idx = 0, ret; struct nl80211_dump_wiphy_state *state = (void *)cb->args[0]; struct cfg80211_registered_device *rdev; rtnl_lock(); if (!state) { state = kzalloc(sizeof(*state), GFP_KERNEL); if (!state) { rtnl_unlock(); return -ENOMEM; } state->filter_wiphy = -1; ret = nl80211_dump_wiphy_parse(skb, cb, state); if (ret) { kfree(state); rtnl_unlock(); return ret; } cb->args[0] = (long)state; } for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (++idx <= state->start) continue; if (state->filter_wiphy != -1 && state->filter_wiphy != rdev->wiphy_idx) continue; wiphy_lock(&rdev->wiphy); /* attempt to fit multiple wiphy data chunks into the skb */ do { ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, state); if (ret < 0) { /* * If sending the wiphy data didn't fit (ENOBUFS * or EMSGSIZE returned), this SKB is still * empty (so it's not too big because another * wiphy dataset is already in the skb) and * we've not tried to adjust the dump allocation * yet ... then adjust the alloc size to be * bigger, and return 1 but with the empty skb. * This results in an empty message being RX'ed * in userspace, but that is ignored. * * We can then retry with the larger buffer. */ if ((ret == -ENOBUFS || ret == -EMSGSIZE) && !skb->len && !state->split && cb->min_dump_alloc < 4096) { cb->min_dump_alloc = 4096; state->split_start = 0; wiphy_unlock(&rdev->wiphy); rtnl_unlock(); return 1; } idx--; break; } } while (state->split_start > 0); wiphy_unlock(&rdev->wiphy); break; } rtnl_unlock(); state->start = idx; return skb->len; } static int nl80211_dump_wiphy_done(struct netlink_callback *cb) { kfree((void *)cb->args[0]); return 0; } static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nl80211_dump_wiphy_state state = {}; msg = nlmsg_new(4096, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY, msg, info->snd_portid, info->snd_seq, 0, &state) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { [NL80211_TXQ_ATTR_QUEUE] = { .type = NLA_U8 }, [NL80211_TXQ_ATTR_TXOP] = { .type = NLA_U16 }, [NL80211_TXQ_ATTR_CWMIN] = { .type = NLA_U16 }, [NL80211_TXQ_ATTR_CWMAX] = { .type = NLA_U16 }, [NL80211_TXQ_ATTR_AIFS] = { .type = NLA_U8 }, }; static int parse_txq_params(struct nlattr *tb[], struct ieee80211_txq_params *txq_params) { u8 ac; if (!tb[NL80211_TXQ_ATTR_AC] || !tb[NL80211_TXQ_ATTR_TXOP] || !tb[NL80211_TXQ_ATTR_CWMIN] || !tb[NL80211_TXQ_ATTR_CWMAX] || !tb[NL80211_TXQ_ATTR_AIFS]) return -EINVAL; ac = nla_get_u8(tb[NL80211_TXQ_ATTR_AC]); txq_params->txop = nla_get_u16(tb[NL80211_TXQ_ATTR_TXOP]); txq_params->cwmin = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMIN]); txq_params->cwmax = nla_get_u16(tb[NL80211_TXQ_ATTR_CWMAX]); txq_params->aifs = nla_get_u8(tb[NL80211_TXQ_ATTR_AIFS]); if (ac >= NL80211_NUM_ACS) return -EINVAL; txq_params->ac = array_index_nospec(ac, NL80211_NUM_ACS); return 0; } static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) { /* * You can only set the channel explicitly for some interfaces, * most have their channel managed via their respective * "establish a connection" command (connect, join, ...) * * For AP/GO and mesh mode, the channel can be set with the * channel userspace API, but is only stored and passed to the * low-level driver when the AP starts or the mesh is joined. * This is for backward compatibility, userspace can also give * the channel in the start-ap or join-mesh commands instead. * * Monitors are special as they are normally slaved to * whatever else is going on, so they have their own special * operation to set the monitor channel if possible. */ return !wdev || wdev->iftype == NL80211_IFTYPE_AP || wdev->iftype == NL80211_IFTYPE_MESH_POINT || wdev->iftype == NL80211_IFTYPE_MONITOR || wdev->iftype == NL80211_IFTYPE_P2P_GO; } static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, struct genl_info *info, bool monitor, struct cfg80211_chan_def *chandef) { struct netlink_ext_ack *extack = info->extack; struct nlattr **attrs = info->attrs; u32 control_freq; if (!attrs[NL80211_ATTR_WIPHY_FREQ]) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], "Frequency is missing"); return -EINVAL; } control_freq = MHZ_TO_KHZ( nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) control_freq += nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); memset(chandef, 0, sizeof(*chandef)); chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq); chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = KHZ_TO_MHZ(control_freq); chandef->freq1_offset = control_freq % 1000; chandef->center_freq2 = 0; if (!chandef->chan) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], "Unknown channel"); return -EINVAL; } if (attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { enum nl80211_channel_type chantype; chantype = nla_get_u32(attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); switch (chantype) { case NL80211_CHAN_NO_HT: case NL80211_CHAN_HT20: case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: cfg80211_chandef_create(chandef, chandef->chan, chantype); /* user input for center_freq is incorrect */ if (attrs[NL80211_ATTR_CENTER_FREQ1] && chandef->center_freq1 != nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1])) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_CENTER_FREQ1], "bad center frequency 1"); return -EINVAL; } /* center_freq2 must be zero */ if (attrs[NL80211_ATTR_CENTER_FREQ2] && nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2])) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_CENTER_FREQ2], "center frequency 2 can't be used"); return -EINVAL; } break; default: NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE], "invalid channel type"); return -EINVAL; } } else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) { chandef->width = nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]); if (chandef->chan->band == NL80211_BAND_S1GHZ) { /* User input error for channel width doesn't match channel */ if (chandef->width != ieee80211_s1g_channel_width(chandef->chan)) { NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_CHANNEL_WIDTH], "bad channel width"); return -EINVAL; } } if (attrs[NL80211_ATTR_CENTER_FREQ1]) { chandef->center_freq1 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]); chandef->freq1_offset = nla_get_u32_default(attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET], 0); } if (attrs[NL80211_ATTR_CENTER_FREQ2]) chandef->center_freq2 = nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]); } if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { chandef->edmg.channels = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) chandef->edmg.bw_config = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); } else { chandef->edmg.bw_config = 0; chandef->edmg.channels = 0; } if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { chandef->punctured = nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]); if (chandef->punctured && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT)) { NL_SET_ERR_MSG(extack, "driver doesn't support puncturing"); return -EINVAL; } } if (!cfg80211_chandef_valid(chandef)) { NL_SET_ERR_MSG(extack, "invalid channel definition"); return -EINVAL; } if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef, IEEE80211_CHAN_DISABLED, monitor ? IEEE80211_CHAN_CAN_MONITOR : 0)) { NL_SET_ERR_MSG(extack, "(extension) channel is disabled"); return -EINVAL; } if ((chandef->width == NL80211_CHAN_WIDTH_5 || chandef->width == NL80211_CHAN_WIDTH_10) && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_5_10_MHZ)) { NL_SET_ERR_MSG(extack, "5/10 MHz not supported"); return -EINVAL; } return 0; } int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_chan_def *chandef) { return _nl80211_parse_chandef(rdev, info, false, chandef); } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, struct genl_info *info, int _link_id) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; struct wireless_dev *wdev = NULL; int link_id = _link_id; if (dev) wdev = dev->ieee80211_ptr; if (!nl80211_can_set_dev_channel(wdev)) return -EOPNOTSUPP; if (wdev) iftype = wdev->iftype; if (link_id < 0) { if (wdev && wdev->valid_links) return -EINVAL; link_id = 0; } result = _nl80211_parse_chandef(rdev, info, iftype == NL80211_IFTYPE_MONITOR, &chandef); if (result) return result; switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, iftype)) return -EINVAL; if (wdev->links[link_id].ap.beacon_interval) { struct ieee80211_channel *cur_chan; if (!dev || !rdev->ops->set_ap_chanwidth || !(rdev->wiphy.features & NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) return -EBUSY; /* Only allow dynamic channel width changes */ cur_chan = wdev->links[link_id].ap.chandef.chan; if (chandef.chan != cur_chan) return -EBUSY; /* only allow this for regular channel widths */ switch (wdev->links[link_id].ap.chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: case NL80211_CHAN_WIDTH_320: break; default: return -EINVAL; } switch (chandef.width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: case NL80211_CHAN_WIDTH_320: break; default: return -EINVAL; } result = rdev_set_ap_chanwidth(rdev, dev, link_id, &chandef); if (result) return result; wdev->links[link_id].ap.chandef = chandef; } else { wdev->u.ap.preset_chandef = chandef; } return 0; case NL80211_IFTYPE_MESH_POINT: return cfg80211_set_mesh_channel(rdev, wdev, &chandef); case NL80211_IFTYPE_MONITOR: return cfg80211_set_monitor_channel(rdev, dev, &chandef); default: break; } return -EINVAL; } static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int link_id = nl80211_link_id_or_invalid(info->attrs); struct net_device *netdev = info->user_ptr[1]; return __nl80211_set_channel(rdev, netdev, info, link_id); } static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; struct net_device *netdev = NULL; struct wireless_dev *wdev; int result = 0, rem_txq_params = 0; struct nlattr *nl_txq_params; u32 changed; u8 retry_short = 0, retry_long = 0; u32 frag_threshold = 0, rts_threshold = 0; u8 coverage_class = 0; u32 txq_limit = 0, txq_memory_limit = 0, txq_quantum = 0; rtnl_lock(); /* * Try to find the wiphy and netdev. Normally this * function shouldn't need the netdev, but this is * done for backward compatibility -- previously * setting the channel was done per wiphy, but now * it is per netdev. Previous userland like hostapd * also passed a netdev to set_wiphy, so that it is * possible to let that go to the right netdev! */ if (info->attrs[NL80211_ATTR_IFINDEX]) { int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(genl_info_net(info), ifindex); if (netdev && netdev->ieee80211_ptr) rdev = wiphy_to_rdev(netdev->ieee80211_ptr->wiphy); else netdev = NULL; } if (!netdev) { rdev = __cfg80211_rdev_from_attrs(genl_info_net(info), info->attrs); if (IS_ERR(rdev)) { rtnl_unlock(); return PTR_ERR(rdev); } wdev = NULL; netdev = NULL; result = 0; } else wdev = netdev->ieee80211_ptr; guard(wiphy)(&rdev->wiphy); /* * end workaround code, by now the rdev is available * and locked, and wdev may or may not be NULL. */ if (info->attrs[NL80211_ATTR_WIPHY_NAME]) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); rtnl_unlock(); if (result) return result; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; struct nlattr *tb[NL80211_TXQ_ATTR_MAX + 1]; if (!rdev->ops->set_txq_params) return -EOPNOTSUPP; if (!netdev) return -EINVAL; if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; if (!netif_running(netdev)) return -ENETDOWN; nla_for_each_nested(nl_txq_params, info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS], rem_txq_params) { result = nla_parse_nested_deprecated(tb, NL80211_TXQ_ATTR_MAX, nl_txq_params, txq_params_policy, info->extack); if (result) return result; result = parse_txq_params(tb, &txq_params); if (result) return result; txq_params.link_id = nl80211_link_id_or_invalid(info->attrs); if (txq_params.link_id >= 0 && !(netdev->ieee80211_ptr->valid_links & BIT(txq_params.link_id))) result = -ENOLINK; else if (txq_params.link_id >= 0 && !netdev->ieee80211_ptr->valid_links) result = -EINVAL; else result = rdev_set_txq_params(rdev, netdev, &txq_params); if (result) return result; } } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { int link_id = nl80211_link_id_or_invalid(info->attrs); if (wdev) { result = __nl80211_set_channel( rdev, nl80211_can_set_dev_channel(wdev) ? netdev : NULL, info, link_id); } else { result = __nl80211_set_channel(rdev, netdev, info, link_id); } if (result) return result; } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { struct wireless_dev *txp_wdev = wdev; enum nl80211_tx_power_setting type; int idx, mbm = 0; if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) txp_wdev = NULL; if (!rdev->ops->set_tx_power) return -EOPNOTSUPP; idx = NL80211_ATTR_WIPHY_TX_POWER_SETTING; type = nla_get_u32(info->attrs[idx]); if (!info->attrs[NL80211_ATTR_WIPHY_TX_POWER_LEVEL] && (type != NL80211_TX_POWER_AUTOMATIC)) return -EINVAL; if (type != NL80211_TX_POWER_AUTOMATIC) { idx = NL80211_ATTR_WIPHY_TX_POWER_LEVEL; mbm = nla_get_u32(info->attrs[idx]); } result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) return result; } if (info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX] && info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]) { u32 tx_ant, rx_ant; if ((!rdev->wiphy.available_antennas_tx && !rdev->wiphy.available_antennas_rx) || !rdev->ops->set_antenna) return -EOPNOTSUPP; tx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_TX]); rx_ant = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_ANTENNA_RX]); /* reject antenna configurations which don't match the * available antenna masks, except for the "all" mask */ if ((~tx_ant && (tx_ant & ~rdev->wiphy.available_antennas_tx)) || (~rx_ant && (rx_ant & ~rdev->wiphy.available_antennas_rx))) return -EINVAL; tx_ant = tx_ant & rdev->wiphy.available_antennas_tx; rx_ant = rx_ant & rdev->wiphy.available_antennas_rx; result = rdev_set_antenna(rdev, tx_ant, rx_ant); if (result) return result; } changed = 0; if (info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]) { retry_short = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_RETRY_SHORT]); changed |= WIPHY_PARAM_RETRY_SHORT; } if (info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]) { retry_long = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_RETRY_LONG]); changed |= WIPHY_PARAM_RETRY_LONG; } if (info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]) { frag_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FRAG_THRESHOLD]); if (frag_threshold < 256) return -EINVAL; if (frag_threshold != (u32) -1) { /* * Fragments (apart from the last one) are required to * have even length. Make the fragmentation code * simpler by stripping LSB should someone try to use * odd threshold value. */ frag_threshold &= ~0x1; } changed |= WIPHY_PARAM_FRAG_THRESHOLD; } if (info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]) { rts_threshold = nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_RTS_THRESHOLD]); changed |= WIPHY_PARAM_RTS_THRESHOLD; } if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) { if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) return -EINVAL; coverage_class = nla_get_u8( info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]); changed |= WIPHY_PARAM_COVERAGE_CLASS; } if (info->attrs[NL80211_ATTR_WIPHY_DYN_ACK]) { if (!(rdev->wiphy.features & NL80211_FEATURE_ACKTO_ESTIMATION)) return -EOPNOTSUPP; changed |= WIPHY_PARAM_DYN_ACK; } if (info->attrs[NL80211_ATTR_TXQ_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) return -EOPNOTSUPP; txq_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_LIMIT]); changed |= WIPHY_PARAM_TXQ_LIMIT; } if (info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) return -EOPNOTSUPP; txq_memory_limit = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_MEMORY_LIMIT]); changed |= WIPHY_PARAM_TXQ_MEMORY_LIMIT; } if (info->attrs[NL80211_ATTR_TXQ_QUANTUM]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_TXQS)) return -EOPNOTSUPP; txq_quantum = nla_get_u32( info->attrs[NL80211_ATTR_TXQ_QUANTUM]); changed |= WIPHY_PARAM_TXQ_QUANTUM; } if (changed) { u8 old_retry_short, old_retry_long; u32 old_frag_threshold, old_rts_threshold; u8 old_coverage_class; u32 old_txq_limit, old_txq_memory_limit, old_txq_quantum; if (!rdev->ops->set_wiphy_params) return -EOPNOTSUPP; old_retry_short = rdev->wiphy.retry_short; old_retry_long = rdev->wiphy.retry_long; old_frag_threshold = rdev->wiphy.frag_threshold; old_rts_threshold = rdev->wiphy.rts_threshold; old_coverage_class = rdev->wiphy.coverage_class; old_txq_limit = rdev->wiphy.txq_limit; old_txq_memory_limit = rdev->wiphy.txq_memory_limit; old_txq_quantum = rdev->wiphy.txq_quantum; if (changed & WIPHY_PARAM_RETRY_SHORT) rdev->wiphy.retry_short = retry_short; if (changed & WIPHY_PARAM_RETRY_LONG) rdev->wiphy.retry_long = retry_long; if (changed & WIPHY_PARAM_FRAG_THRESHOLD) rdev->wiphy.frag_threshold = frag_threshold; if (changed & WIPHY_PARAM_RTS_THRESHOLD) rdev->wiphy.rts_threshold = rts_threshold; if (changed & WIPHY_PARAM_COVERAGE_CLASS) rdev->wiphy.coverage_class = coverage_class; if (changed & WIPHY_PARAM_TXQ_LIMIT) rdev->wiphy.txq_limit = txq_limit; if (changed & WIPHY_PARAM_TXQ_MEMORY_LIMIT) rdev->wiphy.txq_memory_limit = txq_memory_limit; if (changed & WIPHY_PARAM_TXQ_QUANTUM) rdev->wiphy.txq_quantum = txq_quantum; result = rdev_set_wiphy_params(rdev, changed); if (result) { rdev->wiphy.retry_short = old_retry_short; rdev->wiphy.retry_long = old_retry_long; rdev->wiphy.frag_threshold = old_frag_threshold; rdev->wiphy.rts_threshold = old_rts_threshold; rdev->wiphy.coverage_class = old_coverage_class; rdev->wiphy.txq_limit = old_txq_limit; rdev->wiphy.txq_memory_limit = old_txq_memory_limit; rdev->wiphy.txq_quantum = old_txq_quantum; return result; } } return 0; } int nl80211_send_chandef(struct sk_buff *msg, const struct cfg80211_chan_def *chandef) { if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chandef->chan->center_freq)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, chandef->chan->freq_offset)) return -ENOBUFS; switch (chandef->width) { case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: if (nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, cfg80211_get_chandef_type(chandef))) return -ENOBUFS; break; default: break; } if (nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, chandef->width)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ1, chandef->center_freq1)) return -ENOBUFS; if (chandef->center_freq2 && nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2)) return -ENOBUFS; if (chandef->punctured && nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, chandef->punctured)) return -ENOBUFS; return 0; } EXPORT_SYMBOL(nl80211_send_chandef); static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_commands cmd) { struct net_device *dev = wdev->netdev; void *hdr; lockdep_assert_wiphy(&rdev->wiphy); WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && cmd != NL80211_CMD_DEL_INTERFACE && cmd != NL80211_CMD_SET_INTERFACE); hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; if (dev && (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_string(msg, NL80211_ATTR_IFNAME, dev->name))) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFTYPE, wdev->iftype) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev_address(wdev)) || nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->devlist_generation ^ (cfg80211_rdev_list_generation << 2)) || nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr) || nla_put_u32(msg, NL80211_ATTR_VIF_RADIO_MASK, wdev->radio_mask)) goto nla_put_failure; if (rdev->ops->get_channel && !wdev->valid_links) { struct cfg80211_chan_def chandef = {}; int ret; ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret == 0 && nl80211_send_chandef(msg, &chandef)) goto nla_put_failure; } if (rdev->ops->get_tx_power && !wdev->valid_links) { int dbm, ret; ret = rdev_get_tx_power(rdev, wdev, 0, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) goto nla_put_failure; } switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (wdev->u.ap.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, wdev->u.ap.ssid)) goto nla_put_failure; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->u.client.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len, wdev->u.client.ssid)) goto nla_put_failure; break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len, wdev->u.ibss.ssid)) goto nla_put_failure; break; default: /* nothing */ break; } if (rdev->ops->get_txq_stats) { struct cfg80211_txq_stats txqstats = {}; int ret = rdev_get_txq_stats(rdev, wdev, &txqstats); if (ret == 0 && !nl80211_put_txq_stats(msg, &txqstats, NL80211_ATTR_TXQ_STATS)) goto nla_put_failure; } if (wdev->valid_links) { unsigned int link_id; struct nlattr *links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!links) goto nla_put_failure; for_each_valid_link(wdev, link_id) { struct nlattr *link = nla_nest_start(msg, link_id + 1); struct cfg80211_chan_def chandef = {}; int ret; if (!link) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, wdev->links[link_id].addr)) goto nla_put_failure; ret = rdev_get_channel(rdev, wdev, link_id, &chandef); if (ret == 0 && nl80211_send_chandef(msg, &chandef)) goto nla_put_failure; if (rdev->ops->get_tx_power) { int dbm, ret; ret = rdev_get_tx_power(rdev, wdev, link_id, &dbm); if (ret == 0 && nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, DBM_TO_MBM(dbm))) goto nla_put_failure; } nla_nest_end(msg, link); } nla_nest_end(msg, links); } genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) { int wp_idx = 0; int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int ret; rtnl_lock(); if (!cb->args[2]) { struct nl80211_dump_wiphy_state state = { .filter_wiphy = -1, }; ret = nl80211_dump_wiphy_parse(skb, cb, &state); if (ret) goto out_unlock; filter_wiphy = state.filter_wiphy; /* * if filtering, set cb->args[2] to +1 since 0 is the default * value needed to determine that parsing is necessary. */ if (filter_wiphy >= 0) cb->args[2] = filter_wiphy + 1; else cb->args[2] = -1; } else if (cb->args[2] > 0) { filter_wiphy = cb->args[2] - 1; } for_each_rdev(rdev) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; if (wp_idx < wp_start) { wp_idx++; continue; } if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx) continue; if_idx = 0; guard(wiphy)(&rdev->wiphy); list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (if_idx < if_start) { if_idx++; continue; } if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) goto out; if_idx++; } if_start = 0; wp_idx++; } out: cb->args[0] = wp_idx; cb->args[1] = if_idx; ret = skb->len; out_unlock: rtnl_unlock(); return ret; } static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) { struct sk_buff *msg; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { [NL80211_MNTR_FLAG_FCSFAIL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_PLCPFAIL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_CONTROL] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_OTHER_BSS] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_COOK_FRAMES] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_ACTIVE] = { .type = NLA_FLAG }, [NL80211_MNTR_FLAG_SKIP_TX] = { .type = NLA_FLAG }, }; static int parse_monitor_flags(struct nlattr *nla, u32 *mntrflags) { struct nlattr *flags[NL80211_MNTR_FLAG_MAX + 1]; int flag; *mntrflags = 0; if (!nla) return -EINVAL; if (nla_parse_nested_deprecated(flags, NL80211_MNTR_FLAG_MAX, nla, mntr_flags_policy, NULL)) return -EINVAL; for (flag = 1; flag <= NL80211_MNTR_FLAG_MAX; flag++) if (flags[flag]) *mntrflags |= (1<<flag); *mntrflags |= MONITOR_FLAG_CHANGED; return 0; } static int nl80211_parse_mon_options(struct cfg80211_registered_device *rdev, enum nl80211_iftype type, struct genl_info *info, struct vif_params *params) { bool change = false; int err; if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { if (type != NL80211_IFTYPE_MONITOR) return -EINVAL; err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], &params->flags); if (err) return err; change = true; } if (params->flags & MONITOR_FLAG_ACTIVE && !(rdev->wiphy.features & NL80211_FEATURE_ACTIVE_MONITOR)) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]) { const u8 *mumimo_groups; u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; if (type != NL80211_IFTYPE_MONITOR) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) return -EOPNOTSUPP; mumimo_groups = nla_data(info->attrs[NL80211_ATTR_MU_MIMO_GROUP_DATA]); /* bits 0 and 63 are reserved and must be zero */ if ((mumimo_groups[0] & BIT(0)) || (mumimo_groups[VHT_MUMIMO_GROUPS_DATA_LEN - 1] & BIT(7))) return -EINVAL; params->vht_mumimo_groups = mumimo_groups; change = true; } if (info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]) { u32 cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; if (type != NL80211_IFTYPE_MONITOR) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, cap_flag)) return -EOPNOTSUPP; params->vht_mumimo_follow_addr = nla_data(info->attrs[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR]); change = true; } return change ? 1 : 0; } static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev, struct net_device *netdev, u8 use_4addr, enum nl80211_iftype iftype) { if (!use_4addr) { if (netdev && netif_is_bridge_port(netdev)) return -EBUSY; return 0; } switch (iftype) { case NL80211_IFTYPE_AP_VLAN: if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_AP) return 0; break; case NL80211_IFTYPE_STATION: if (rdev->wiphy.flags & WIPHY_FLAG_4ADDR_STATION) return 0; break; default: break; } return -EOPNOTSUPP; } static int nl80211_parse_vif_radio_mask(struct genl_info *info, u32 *radio_mask) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attr = info->attrs[NL80211_ATTR_VIF_RADIO_MASK]; u32 mask, allowed; if (!attr) { *radio_mask = 0; return 0; } allowed = BIT(rdev->wiphy.n_radio) - 1; mask = nla_get_u32(attr); if (mask & ~allowed) return -EINVAL; if (!mask) mask = allowed; *radio_mask = mask; return 1; } static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; int err; enum nl80211_iftype otype, ntype; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; u32 radio_mask = 0; bool change = false; memset(&params, 0, sizeof(params)); otype = ntype = dev->ieee80211_ptr->iftype; if (info->attrs[NL80211_ATTR_IFTYPE]) { ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); if (otype != ntype) change = true; } if (info->attrs[NL80211_ATTR_MESH_ID]) { if (ntype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; if (otype != NL80211_IFTYPE_MESH_POINT) return -EINVAL; if (netif_running(dev)) return -EBUSY; wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); } if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); change = true; err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype); if (err) return err; } else { params.use_4addr = -1; } err = nl80211_parse_mon_options(rdev, ntype, info, &params); if (err < 0) return err; if (err > 0) change = true; err = nl80211_parse_vif_radio_mask(info, &radio_mask); if (err < 0) return err; if (err && netif_running(dev)) return -EBUSY; if (change) err = cfg80211_change_iface(rdev, dev, ntype, &params); else err = 0; if (!err && params.use_4addr != -1) dev->ieee80211_ptr->use_4addr = params.use_4addr; if (radio_mask) wdev->radio_mask = radio_mask; if (change && !err) nl80211_notify_iface(rdev, wdev, NL80211_CMD_SET_INTERFACE); return err; } static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; struct wireless_dev *wdev; struct sk_buff *msg; u32 radio_mask; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; memset(&params, 0, sizeof(params)); if (!info->attrs[NL80211_ATTR_IFNAME]) return -EINVAL; if (info->attrs[NL80211_ATTR_IFTYPE]) type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); if (!rdev->ops->add_virtual_intf) return -EOPNOTSUPP; if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN || rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], ETH_ALEN); if (!is_valid_ether_addr(params.macaddr)) return -EADDRNOTAVAIL; } if (info->attrs[NL80211_ATTR_4ADDR]) { params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); if (err) return err; } if (!cfg80211_iftype_allowed(&rdev->wiphy, type, params.use_4addr, 0)) return -EOPNOTSUPP; err = nl80211_parse_mon_options(rdev, type, info, &params); if (err < 0) return err; err = nl80211_parse_vif_radio_mask(info, &radio_mask); if (err < 0) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; wdev = rdev_add_virtual_intf(rdev, nla_data(info->attrs[NL80211_ATTR_IFNAME]), NET_NAME_USER, type, &params); if (WARN_ON(!wdev)) { nlmsg_free(msg); return -EPROTO; } else if (IS_ERR(wdev)) { nlmsg_free(msg); return PTR_ERR(wdev); } if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->owner_nlportid = info->snd_portid; switch (type) { case NL80211_IFTYPE_MESH_POINT: if (!info->attrs[NL80211_ATTR_MESH_ID]) break; wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); memcpy(wdev->u.mesh.id, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), wdev->u.mesh.id_up_len); break; case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: /* * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ cfg80211_init_wdev(wdev); cfg80211_register_wdev(rdev, wdev); break; default: break; } if (radio_mask) wdev->radio_mask = radio_mask; if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev, NL80211_CMD_NEW_INTERFACE) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; /* to avoid failing a new interface creation due to pending removal */ cfg80211_destroy_ifaces(rdev); guard(wiphy)(&rdev->wiphy); return _nl80211_new_interface(skb, info); } static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; /* * We hold RTNL, so this is safe, without RTNL opencount cannot * reach 0, and thus the rdev cannot be deleted. * * We need to do it for the dev_close(), since that will call * the netdev notifiers, and we need to acquire the mutex there * but don't know if we get there from here or from some other * place (e.g. "ip link set ... down"). */ mutex_unlock(&rdev->wiphy.mtx); /* * If we remove a wireless device without a netdev then clear * user_ptr[1] so that nl80211_post_doit won't dereference it * to check if it needs to do dev_put(). Otherwise it crashes * since the wdev has been freed, unlike with a netdev where * we need the dev_put() for the netdev to really be freed. */ if (!wdev->netdev) info->user_ptr[1] = NULL; else dev_close(wdev->netdev); mutex_lock(&rdev->wiphy.mtx); return cfg80211_remove_virtual_intf(rdev, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 noack_map; if (!info->attrs[NL80211_ATTR_NOACK_MAP]) return -EINVAL; if (!rdev->ops->set_noack_map) return -EOPNOTSUPP; noack_map = nla_get_u16(info->attrs[NL80211_ATTR_NOACK_MAP]); return rdev_set_noack_map(rdev, dev, noack_map); } static int nl80211_validate_key_link_id(struct genl_info *info, struct wireless_dev *wdev, int link_id, bool pairwise) { if (pairwise) { if (link_id != -1) { GENL_SET_ERR_MSG(info, "link ID not allowed for pairwise key"); return -EINVAL; } return 0; } if (wdev->valid_links) { if (link_id == -1) { GENL_SET_ERR_MSG(info, "link ID must for MLO group key"); return -EINVAL; } if (!(wdev->valid_links & BIT(link_id))) { GENL_SET_ERR_MSG(info, "invalid link ID for MLO group key"); return -EINVAL; } } else if (link_id != -1) { GENL_SET_ERR_MSG(info, "link ID not allowed for non-MLO group key"); return -EINVAL; } return 0; } struct get_key_cookie { struct sk_buff *msg; int error; int idx; }; static void get_key_callback(void *c, struct key_params *params) { struct nlattr *key; struct get_key_cookie *cookie = c; if ((params->seq && nla_put(cookie->msg, NL80211_ATTR_KEY_SEQ, params->seq_len, params->seq)) || (params->cipher && nla_put_u32(cookie->msg, NL80211_ATTR_KEY_CIPHER, params->cipher))) goto nla_put_failure; key = nla_nest_start_noflag(cookie->msg, NL80211_ATTR_KEY); if (!key) goto nla_put_failure; if ((params->seq && nla_put(cookie->msg, NL80211_KEY_SEQ, params->seq_len, params->seq)) || (params->cipher && nla_put_u32(cookie->msg, NL80211_KEY_CIPHER, params->cipher))) goto nla_put_failure; if (nla_put_u8(cookie->msg, NL80211_KEY_IDX, cookie->idx)) goto nla_put_failure; nla_nest_end(cookie->msg, key); return; nla_put_failure: cookie->error = 1; } static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; u8 key_idx = 0; const u8 *mac_addr = NULL; bool pairwise; struct get_key_cookie cookie = { .error = 0, }; void *hdr; struct sk_buff *msg; bool bigtk_support = false; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION)) bigtk_support = true; if ((wdev->iftype == NL80211_IFTYPE_STATION || wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) bigtk_support = true; if (info->attrs[NL80211_ATTR_KEY_IDX]) { key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); if (key_idx >= 6 && key_idx <= 7 && !bigtk_support) { GENL_SET_ERR_MSG(info, "BIGTK not supported"); return -EINVAL; } } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); pairwise = !!mac_addr; if (info->attrs[NL80211_ATTR_KEY_TYPE]) { u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); if (kt != NL80211_KEYTYPE_GROUP && kt != NL80211_KEYTYPE_PAIRWISE) return -EINVAL; pairwise = kt == NL80211_KEYTYPE_PAIRWISE; } if (!rdev->ops->get_key) return -EOPNOTSUPP; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) return -ENOENT; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (!hdr) goto nla_put_failure; cookie.msg = msg; cookie.idx = key_idx; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u8(msg, NL80211_ATTR_KEY_IDX, key_idx)) goto nla_put_failure; if (mac_addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr)) goto nla_put_failure; err = nl80211_validate_key_link_id(info, wdev, link_id, pairwise); if (err) goto free_msg; err = rdev_get_key(rdev, dev, link_id, key_idx, pairwise, mac_addr, &cookie, get_key_callback); if (err) goto free_msg; if (cookie.error) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct key_parse key; int err; struct net_device *dev = info->user_ptr[1]; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) return err; if (key.idx < 0) return -EINVAL; /* Only support setting default key and * Extended Key ID action NL80211_KEY_SET_TX. */ if (!key.def && !key.defmgmt && !key.defbeacon && !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; if (key.def) { if (!rdev->ops->set_default_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) return err; err = rdev_set_default_key(rdev, dev, link_id, key.idx, key.def_uni, key.def_multi); if (err) return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_key = key.idx; #endif return 0; } else if (key.defmgmt) { if (key.def_uni || !key.def_multi) return -EINVAL; if (!rdev->ops->set_default_mgmt_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) return err; err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); if (err) return err; #ifdef CONFIG_CFG80211_WEXT wdev->wext.default_mgmt_key = key.idx; #endif return 0; } else if (key.defbeacon) { if (key.def_uni || !key.def_multi) return -EINVAL; if (!rdev->ops->set_default_beacon_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (err) return err; err = nl80211_validate_key_link_id(info, wdev, link_id, false); if (err) return err; return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { u8 *mac_addr = NULL; if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!mac_addr || key.idx < 0 || key.idx > 1) return -EINVAL; err = nl80211_validate_key_link_id(info, wdev, link_id, true); if (err) return err; return rdev_add_key(rdev, dev, link_id, key.idx, NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); } return -EINVAL; } static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct key_parse key; const u8 *mac_addr = NULL; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) return err; if (!key.p.key) { GENL_SET_ERR_MSG(info, "no key"); return -EINVAL; } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (key.type == -1) { if (mac_addr) key.type = NL80211_KEYTYPE_PAIRWISE; else key.type = NL80211_KEYTYPE_GROUP; } /* for now */ if (key.type != NL80211_KEYTYPE_PAIRWISE && key.type != NL80211_KEYTYPE_GROUP) { GENL_SET_ERR_MSG(info, "key type not pairwise or group"); return -EINVAL; } if (key.type == NL80211_KEYTYPE_GROUP && info->attrs[NL80211_ATTR_VLAN_ID]) key.p.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); if (!rdev->ops->add_key) return -EOPNOTSUPP; if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr)) { GENL_SET_ERR_MSG(info, "key setting validation failed"); return -EINVAL; } err = nl80211_key_allowed(wdev); if (err) GENL_SET_ERR_MSG(info, "key not allowed"); if (!err) err = nl80211_validate_key_link_id(info, wdev, link_id, key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) { err = rdev_add_key(rdev, dev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); if (err) GENL_SET_ERR_MSG(info, "key addition failed"); } return err; } static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; u8 *mac_addr = NULL; struct key_parse key; int link_id = nl80211_link_id_or_invalid(info->attrs); struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) return err; if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (key.type == -1) { if (mac_addr) key.type = NL80211_KEYTYPE_PAIRWISE; else key.type = NL80211_KEYTYPE_GROUP; } /* for now */ if (key.type != NL80211_KEYTYPE_PAIRWISE && key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; if (!cfg80211_valid_key_idx(rdev, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE)) return -EINVAL; if (!rdev->ops->del_key) return -EOPNOTSUPP; err = nl80211_key_allowed(wdev); if (key.type == NL80211_KEYTYPE_GROUP && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; if (!err) err = nl80211_validate_key_link_id(info, wdev, link_id, key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) err = rdev_del_key(rdev, dev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr); #ifdef CONFIG_CFG80211_WEXT if (!err) { if (key.idx == wdev->wext.default_key) wdev->wext.default_key = -1; else if (key.idx == wdev->wext.default_mgmt_key) wdev->wext.default_mgmt_key = -1; } #endif return err; } /* This function returns an error or the number of nested attributes */ static int validate_acl_mac_addrs(struct nlattr *nl_attr) { struct nlattr *attr; int n_entries = 0, tmp; nla_for_each_nested(attr, nl_attr, tmp) { if (nla_len(attr) != ETH_ALEN) return -EINVAL; n_entries++; } return n_entries; } /* * This function parses ACL information and allocates memory for ACL data. * On successful return, the calling function is responsible to free the * ACL buffer returned by this function. */ static struct cfg80211_acl_data *parse_acl_data(struct wiphy *wiphy, struct genl_info *info) { enum nl80211_acl_policy acl_policy; struct nlattr *attr; struct cfg80211_acl_data *acl; int i = 0, n_entries, tmp; if (!wiphy->max_acl_mac_addrs) return ERR_PTR(-EOPNOTSUPP); if (!info->attrs[NL80211_ATTR_ACL_POLICY]) return ERR_PTR(-EINVAL); acl_policy = nla_get_u32(info->attrs[NL80211_ATTR_ACL_POLICY]); if (acl_policy != NL80211_ACL_POLICY_ACCEPT_UNLESS_LISTED && acl_policy != NL80211_ACL_POLICY_DENY_UNLESS_LISTED) return ERR_PTR(-EINVAL); if (!info->attrs[NL80211_ATTR_MAC_ADDRS]) return ERR_PTR(-EINVAL); n_entries = validate_acl_mac_addrs(info->attrs[NL80211_ATTR_MAC_ADDRS]); if (n_entries < 0) return ERR_PTR(n_entries); if (n_entries > wiphy->max_acl_mac_addrs) return ERR_PTR(-EOPNOTSUPP); acl = kzalloc(struct_size(acl, mac_addrs, n_entries), GFP_KERNEL); if (!acl) return ERR_PTR(-ENOMEM); acl->n_acl_entries = n_entries; nla_for_each_nested(attr, info->attrs[NL80211_ATTR_MAC_ADDRS], tmp) { memcpy(acl->mac_addrs[i].addr, nla_data(attr), ETH_ALEN); i++; } acl->acl_policy = acl_policy; return acl; } static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_acl_data *acl; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!dev->ieee80211_ptr->links[0].ap.beacon_interval) return -EINVAL; acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(acl)) return PTR_ERR(acl); err = rdev_set_mac_acl(rdev, dev, acl); kfree(acl); return err; } static u32 rateset_to_mask(struct ieee80211_supported_band *sband, u8 *rates, u8 rates_len) { u8 i; u32 mask = 0; for (i = 0; i < rates_len; i++) { int rate = (rates[i] & 0x7f) * 5; int ridx; for (ridx = 0; ridx < sband->n_bitrates; ridx++) { struct ieee80211_rate *srate = &sband->bitrates[ridx]; if (rate == srate->bitrate) { mask |= 1 << ridx; break; } } if (ridx == sband->n_bitrates) return 0; /* rate not found */ } return mask; } static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, u8 *rates, u8 rates_len, u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) { u8 i; memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); for (i = 0; i < rates_len; i++) { int ridx, rbit; ridx = rates[i] / 8; rbit = BIT(rates[i] % 8); /* check validity */ if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) return false; /* check availability */ ridx = array_index_nospec(ridx, IEEE80211_HT_MCS_MASK_LEN); if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) mcs[ridx] |= rbit; else return false; } return true; } static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) { u16 mcs_mask = 0; switch (vht_mcs_map) { case IEEE80211_VHT_MCS_NOT_SUPPORTED: break; case IEEE80211_VHT_MCS_SUPPORT_0_7: mcs_mask = 0x00FF; break; case IEEE80211_VHT_MCS_SUPPORT_0_8: mcs_mask = 0x01FF; break; case IEEE80211_VHT_MCS_SUPPORT_0_9: mcs_mask = 0x03FF; break; default: break; } return mcs_mask; } static void vht_build_mcs_mask(u16 vht_mcs_map, u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) { u8 nss; for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); vht_mcs_map >>= 2; } } static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, struct nl80211_txrate_vht *txrate, u16 mcs[NL80211_VHT_NSS_MAX]) { u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; u8 i; if (!sband->vht_cap.vht_supported) return false; memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); /* Build vht_mcs_mask from VHT capabilities */ vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) mcs[i] = txrate->mcs[i]; else return false; } return true; } static u16 he_mcs_map_to_mcs_mask(u8 he_mcs_map) { switch (he_mcs_map) { case IEEE80211_HE_MCS_NOT_SUPPORTED: return 0; case IEEE80211_HE_MCS_SUPPORT_0_7: return 0x00FF; case IEEE80211_HE_MCS_SUPPORT_0_9: return 0x03FF; case IEEE80211_HE_MCS_SUPPORT_0_11: return 0xFFF; default: break; } return 0; } static void he_build_mcs_mask(u16 he_mcs_map, u16 he_mcs_mask[NL80211_HE_NSS_MAX]) { u8 nss; for (nss = 0; nss < NL80211_HE_NSS_MAX; nss++) { he_mcs_mask[nss] = he_mcs_map_to_mcs_mask(he_mcs_map & 0x03); he_mcs_map >>= 2; } } static u16 he_get_txmcsmap(struct genl_info *info, unsigned int link_id, const struct ieee80211_sta_he_cap *he_cap) { struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def *chandef; __le16 tx_mcs; chandef = wdev_chandef(wdev, link_id); if (!chandef) { /* * This is probably broken, but we never maintained * a chandef in these cases, so it always was. */ return le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80); } switch (chandef->width) { case NL80211_CHAN_WIDTH_80P80: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80; break; case NL80211_CHAN_WIDTH_160: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_160; break; default: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80; break; } return le16_to_cpu(tx_mcs); } static bool he_set_mcs_mask(struct genl_info *info, struct wireless_dev *wdev, struct ieee80211_supported_band *sband, struct nl80211_txrate_he *txrate, u16 mcs[NL80211_HE_NSS_MAX], unsigned int link_id) { const struct ieee80211_sta_he_cap *he_cap; u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {}; u16 tx_mcs_map = 0; u8 i; he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype); if (!he_cap) return false; memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX); tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); /* Build he_mcs_mask from HE capabilities */ he_build_mcs_mask(tx_mcs_map, tx_mcs_mask); for (i = 0; i < NL80211_HE_NSS_MAX; i++) { if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) mcs[i] = txrate->mcs[i]; else return false; } return true; } static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, struct nlattr *attrs[], enum nl80211_attrs attr, struct cfg80211_bitrate_mask *mask, struct net_device *dev, bool default_all_enabled, unsigned int link_id) { struct nlattr *tb[NL80211_TXRATE_MAX + 1]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = dev->ieee80211_ptr; int rem, i; struct nlattr *tx_rates; struct ieee80211_supported_band *sband; u16 vht_tx_mcs_map, he_tx_mcs_map; memset(mask, 0, sizeof(*mask)); /* Default to all rates enabled */ for (i = 0; i < NUM_NL80211_BANDS; i++) { const struct ieee80211_sta_he_cap *he_cap; if (!default_all_enabled) break; sband = rdev->wiphy.bands[i]; if (!sband) continue; mask->control[i].legacy = (1 << sband->n_bitrates) - 1; memcpy(mask->control[i].ht_mcs, sband->ht_cap.mcs.rx_mask, sizeof(mask->control[i].ht_mcs)); if (sband->vht_cap.vht_supported) { vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); } he_cap = ieee80211_get_he_iftype_cap(sband, wdev->iftype); if (!he_cap) continue; he_tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs); mask->control[i].he_gi = 0xFF; mask->control[i].he_ltf = 0xFF; } /* if no rates are given set it back to the defaults */ if (!attrs[attr]) goto out; /* The nested attribute uses enum nl80211_band as the index. This maps * directly to the enum nl80211_band values used in cfg80211. */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); nla_for_each_nested(tx_rates, attrs[attr], rem) { enum nl80211_band band = nla_type(tx_rates); int err; if (band < 0 || band >= NUM_NL80211_BANDS) return -EINVAL; sband = rdev->wiphy.bands[band]; if (sband == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, NL80211_TXRATE_MAX, tx_rates, nl80211_txattr_policy, info->extack); if (err) return err; if (tb[NL80211_TXRATE_LEGACY]) { mask->control[band].legacy = rateset_to_mask( sband, nla_data(tb[NL80211_TXRATE_LEGACY]), nla_len(tb[NL80211_TXRATE_LEGACY])); if ((mask->control[band].legacy == 0) && nla_len(tb[NL80211_TXRATE_LEGACY])) return -EINVAL; } if (tb[NL80211_TXRATE_HT]) { if (!ht_rateset_to_mask( sband, nla_data(tb[NL80211_TXRATE_HT]), nla_len(tb[NL80211_TXRATE_HT]), mask->control[band].ht_mcs)) return -EINVAL; } if (tb[NL80211_TXRATE_VHT]) { if (!vht_set_mcs_mask( sband, nla_data(tb[NL80211_TXRATE_VHT]), mask->control[band].vht_mcs)) return -EINVAL; } if (tb[NL80211_TXRATE_GI]) { mask->control[band].gi = nla_get_u8(tb[NL80211_TXRATE_GI]); if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI) return -EINVAL; } if (tb[NL80211_TXRATE_HE] && !he_set_mcs_mask(info, wdev, sband, nla_data(tb[NL80211_TXRATE_HE]), mask->control[band].he_mcs, link_id)) return -EINVAL; if (tb[NL80211_TXRATE_HE_GI]) mask->control[band].he_gi = nla_get_u8(tb[NL80211_TXRATE_HE_GI]); if (tb[NL80211_TXRATE_HE_LTF]) mask->control[band].he_ltf = nla_get_u8(tb[NL80211_TXRATE_HE_LTF]); if (mask->control[band].legacy == 0) { /* don't allow empty legacy rates if HT, VHT or HE * are not even supported. */ if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || rdev->wiphy.bands[band]->vht_cap.vht_supported || ieee80211_get_he_iftype_cap(sband, wdev->iftype))) return -EINVAL; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) if (mask->control[band].ht_mcs[i]) goto out; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) if (mask->control[band].vht_mcs[i]) goto out; for (i = 0; i < NL80211_HE_NSS_MAX; i++) if (mask->control[band].he_mcs[i]) goto out; /* legacy and mcs rates may not be both empty */ return -EINVAL; } } out: return 0; } static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, enum nl80211_band band, struct cfg80211_bitrate_mask *beacon_rate) { u32 count_ht, count_vht, count_he, i; u32 rate = beacon_rate->control[band].legacy; /* Allow only one rate */ if (hweight32(rate) > 1) return -EINVAL; count_ht = 0; for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) { return -EINVAL; } else if (beacon_rate->control[band].ht_mcs[i]) { count_ht++; if (count_ht > 1) return -EINVAL; } if (count_ht && rate) return -EINVAL; } count_vht = 0; for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) { return -EINVAL; } else if (beacon_rate->control[band].vht_mcs[i]) { count_vht++; if (count_vht > 1) return -EINVAL; } if (count_vht && rate) return -EINVAL; } count_he = 0; for (i = 0; i < NL80211_HE_NSS_MAX; i++) { if (hweight16(beacon_rate->control[band].he_mcs[i]) > 1) { return -EINVAL; } else if (beacon_rate->control[band].he_mcs[i]) { count_he++; if (count_he > 1) return -EINVAL; } if (count_he && rate) return -EINVAL; } if ((count_ht && count_vht && count_he) || (!rate && !count_ht && !count_vht && !count_he)) return -EINVAL; if (rate && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) return -EINVAL; if (count_ht && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_HT)) return -EINVAL; if (count_vht && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_VHT)) return -EINVAL; if (count_he && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_RATE_HE)) return -EINVAL; return 0; } static int nl80211_parse_mbssid_config(struct wiphy *wiphy, struct net_device *dev, struct nlattr *attrs, struct cfg80211_mbssid_config *config, u8 num_elems) { struct nlattr *tb[NL80211_MBSSID_CONFIG_ATTR_MAX + 1]; if (!wiphy->mbssid_max_interfaces) return -EOPNOTSUPP; if (nla_parse_nested(tb, NL80211_MBSSID_CONFIG_ATTR_MAX, attrs, NULL, NULL) || !tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]) return -EINVAL; config->ema = nla_get_flag(tb[NL80211_MBSSID_CONFIG_ATTR_EMA]); if (config->ema) { if (!wiphy->ema_max_profile_periodicity) return -EOPNOTSUPP; if (num_elems > wiphy->ema_max_profile_periodicity) return -EINVAL; } config->index = nla_get_u8(tb[NL80211_MBSSID_CONFIG_ATTR_INDEX]); if (config->index >= wiphy->mbssid_max_interfaces || (!config->index && !num_elems)) return -EINVAL; if (tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]) { u32 tx_ifindex = nla_get_u32(tb[NL80211_MBSSID_CONFIG_ATTR_TX_IFINDEX]); if ((!config->index && tx_ifindex != dev->ifindex) || (config->index && tx_ifindex == dev->ifindex)) return -EINVAL; if (tx_ifindex != dev->ifindex) { struct net_device *tx_netdev = dev_get_by_index(wiphy_net(wiphy), tx_ifindex); if (!tx_netdev || !tx_netdev->ieee80211_ptr || tx_netdev->ieee80211_ptr->wiphy != wiphy || tx_netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { dev_put(tx_netdev); return -EINVAL; } config->tx_wdev = tx_netdev->ieee80211_ptr; } else { config->tx_wdev = dev->ieee80211_ptr; } } else if (!config->index) { config->tx_wdev = dev->ieee80211_ptr; } else { return -EINVAL; } return 0; } static struct cfg80211_mbssid_elems * nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs) { struct nlattr *nl_elems; struct cfg80211_mbssid_elems *elems; int rem_elems; u8 i = 0, num_elems = 0; if (!wiphy->mbssid_max_interfaces) return ERR_PTR(-EINVAL); nla_for_each_nested(nl_elems, attrs, rem_elems) { if (num_elems >= 255) return ERR_PTR(-EINVAL); num_elems++; } elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); if (!elems) return ERR_PTR(-ENOMEM); elems->cnt = num_elems; nla_for_each_nested(nl_elems, attrs, rem_elems) { elems->elem[i].data = nla_data(nl_elems); elems->elem[i].len = nla_len(nl_elems); i++; } return elems; } static struct cfg80211_rnr_elems * nl80211_parse_rnr_elems(struct wiphy *wiphy, struct nlattr *attrs, struct netlink_ext_ack *extack) { struct nlattr *nl_elems; struct cfg80211_rnr_elems *elems; int rem_elems; u8 i = 0, num_elems = 0; nla_for_each_nested(nl_elems, attrs, rem_elems) { int ret; ret = validate_ie_attr(nl_elems, extack); if (ret) return ERR_PTR(ret); num_elems++; } elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL); if (!elems) return ERR_PTR(-ENOMEM); elems->cnt = num_elems; nla_for_each_nested(nl_elems, attrs, rem_elems) { elems->elem[i].data = nla_data(nl_elems); elems->elem[i].len = nla_len(nl_elems); i++; } return elems; } static int nl80211_parse_he_bss_color(struct nlattr *attrs, struct cfg80211_he_bss_color *he_bss_color) { struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1]; int err; err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs, he_bss_color_policy, NULL); if (err) return err; if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]) return -EINVAL; he_bss_color->color = nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); he_bss_color->enabled = !nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); he_bss_color->partial = nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); return 0; } static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct cfg80211_beacon_data *bcn, struct netlink_ext_ack *extack) { bool haveinfo = false; int err; memset(bcn, 0, sizeof(*bcn)); bcn->link_id = nl80211_link_id(attrs); if (attrs[NL80211_ATTR_BEACON_HEAD]) { bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); if (!bcn->head_len) return -EINVAL; haveinfo = true; } if (attrs[NL80211_ATTR_BEACON_TAIL]) { bcn->tail = nla_data(attrs[NL80211_ATTR_BEACON_TAIL]); bcn->tail_len = nla_len(attrs[NL80211_ATTR_BEACON_TAIL]); haveinfo = true; } if (!haveinfo) return -EINVAL; if (attrs[NL80211_ATTR_IE]) { bcn->beacon_ies = nla_data(attrs[NL80211_ATTR_IE]); bcn->beacon_ies_len = nla_len(attrs[NL80211_ATTR_IE]); } if (attrs[NL80211_ATTR_IE_PROBE_RESP]) { bcn->proberesp_ies = nla_data(attrs[NL80211_ATTR_IE_PROBE_RESP]); bcn->proberesp_ies_len = nla_len(attrs[NL80211_ATTR_IE_PROBE_RESP]); } if (attrs[NL80211_ATTR_IE_ASSOC_RESP]) { bcn->assocresp_ies = nla_data(attrs[NL80211_ATTR_IE_ASSOC_RESP]); bcn->assocresp_ies_len = nla_len(attrs[NL80211_ATTR_IE_ASSOC_RESP]); } if (attrs[NL80211_ATTR_PROBE_RESP]) { bcn->probe_resp = nla_data(attrs[NL80211_ATTR_PROBE_RESP]); bcn->probe_resp_len = nla_len(attrs[NL80211_ATTR_PROBE_RESP]); } if (attrs[NL80211_ATTR_FTM_RESPONDER]) { struct nlattr *tb[NL80211_FTM_RESP_ATTR_MAX + 1]; err = nla_parse_nested_deprecated(tb, NL80211_FTM_RESP_ATTR_MAX, attrs[NL80211_ATTR_FTM_RESPONDER], NULL, NULL); if (err) return err; if (tb[NL80211_FTM_RESP_ATTR_ENABLED] && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_ENABLE_FTM_RESPONDER)) bcn->ftm_responder = 1; else return -EOPNOTSUPP; if (tb[NL80211_FTM_RESP_ATTR_LCI]) { bcn->lci = nla_data(tb[NL80211_FTM_RESP_ATTR_LCI]); bcn->lci_len = nla_len(tb[NL80211_FTM_RESP_ATTR_LCI]); } if (tb[NL80211_FTM_RESP_ATTR_CIVICLOC]) { bcn->civicloc = nla_data(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]); bcn->civicloc_len = nla_len(tb[NL80211_FTM_RESP_ATTR_CIVICLOC]); } } else { bcn->ftm_responder = -1; } if (attrs[NL80211_ATTR_HE_BSS_COLOR]) { err = nl80211_parse_he_bss_color(attrs[NL80211_ATTR_HE_BSS_COLOR], &bcn->he_bss_color); if (err) return err; bcn->he_bss_color_valid = true; } if (attrs[NL80211_ATTR_MBSSID_ELEMS]) { struct cfg80211_mbssid_elems *mbssid = nl80211_parse_mbssid_elems(&rdev->wiphy, attrs[NL80211_ATTR_MBSSID_ELEMS]); if (IS_ERR(mbssid)) return PTR_ERR(mbssid); bcn->mbssid_ies = mbssid; if (bcn->mbssid_ies && attrs[NL80211_ATTR_EMA_RNR_ELEMS]) { struct cfg80211_rnr_elems *rnr = nl80211_parse_rnr_elems(&rdev->wiphy, attrs[NL80211_ATTR_EMA_RNR_ELEMS], extack); if (IS_ERR(rnr)) return PTR_ERR(rnr); if (rnr && rnr->cnt < bcn->mbssid_ies->cnt) return -EINVAL; bcn->rnr_ies = rnr; } } return 0; } static int nl80211_parse_he_obss_pd(struct nlattr *attrs, struct ieee80211_he_obss_pd *he_obss_pd) { struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1]; int err; err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs, he_obss_pd_policy, NULL); if (err) return err; if (!tb[NL80211_HE_OBSS_PD_ATTR_SR_CTRL]) return -EINVAL; he_obss_pd->sr_ctrl = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_SR_CTRL]); if (tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]) he_obss_pd->min_offset = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]); if (tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]) he_obss_pd->max_offset = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]); if (tb[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET]) he_obss_pd->non_srg_max_offset = nla_get_u8(tb[NL80211_HE_OBSS_PD_ATTR_NON_SRG_MAX_OFFSET]); if (he_obss_pd->min_offset > he_obss_pd->max_offset) return -EINVAL; if (tb[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP]) memcpy(he_obss_pd->bss_color_bitmap, nla_data(tb[NL80211_HE_OBSS_PD_ATTR_BSS_COLOR_BITMAP]), sizeof(he_obss_pd->bss_color_bitmap)); if (tb[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP]) memcpy(he_obss_pd->partial_bssid_bitmap, nla_data(tb[NL80211_HE_OBSS_PD_ATTR_PARTIAL_BSSID_BITMAP]), sizeof(he_obss_pd->partial_bssid_bitmap)); he_obss_pd->enable = true; return 0; } static int nl80211_parse_fils_discovery(struct cfg80211_registered_device *rdev, struct nlattr *attrs, struct cfg80211_fils_discovery *fd) { struct nlattr *tb[NL80211_FILS_DISCOVERY_ATTR_MAX + 1]; int ret; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_DISCOVERY)) return -EINVAL; ret = nla_parse_nested(tb, NL80211_FILS_DISCOVERY_ATTR_MAX, attrs, NULL, NULL); if (ret) return ret; if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] && !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] && !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) { fd->update = true; return 0; } if (!tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN] || !tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX] || !tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]) return -EINVAL; fd->tmpl_len = nla_len(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]); fd->tmpl = nla_data(tb[NL80211_FILS_DISCOVERY_ATTR_TMPL]); fd->min_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MIN]); fd->max_interval = nla_get_u32(tb[NL80211_FILS_DISCOVERY_ATTR_INT_MAX]); fd->update = true; return 0; } static int nl80211_parse_unsol_bcast_probe_resp(struct cfg80211_registered_device *rdev, struct nlattr *attrs, struct cfg80211_unsol_bcast_probe_resp *presp) { struct nlattr *tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX + 1]; int ret; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_UNSOL_BCAST_PROBE_RESP)) return -EINVAL; ret = nla_parse_nested(tb, NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_MAX, attrs, NULL, NULL); if (ret) return ret; if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] && !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) { presp->update = true; return 0; } if (!tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT] || !tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]) return -EINVAL; presp->tmpl = nla_data(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->tmpl_len = nla_len(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_TMPL]); presp->interval = nla_get_u32(tb[NL80211_UNSOL_BCAST_PROBE_RESP_ATTR_INT]); presp->update = true; return 0; } static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, const struct element *rates) { int i; if (!rates) return; for (i = 0; i < rates->datalen; i++) { if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HT_PHY) params->ht_required = true; if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY) params->vht_required = true; if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY) params->he_required = true; if (rates->data[i] == BSS_MEMBERSHIP_SELECTOR_SAE_H2E) params->sae_h2e_required = true; } } /* * Since the nl80211 API didn't include, from the beginning, attributes about * HT/VHT requirements/capabilities, we parse them out of the IEs for the * benefit of drivers that rebuild IEs in the firmware. */ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) { const struct cfg80211_beacon_data *bcn = &params->beacon; size_t ies_len = bcn->tail_len; const u8 *ies = bcn->tail; const struct element *rates; const struct element *cap; rates = cfg80211_find_elem(WLAN_EID_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); rates = cfg80211_find_elem(WLAN_EID_EXT_SUPP_RATES, ies, ies_len); nl80211_check_ap_rate_selectors(params, rates); cap = cfg80211_find_elem(WLAN_EID_HT_CAPABILITY, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->ht_cap)) params->ht_cap = (void *)cap->data; cap = cfg80211_find_elem(WLAN_EID_VHT_CAPABILITY, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->vht_cap)) params->vht_cap = (void *)cap->data; cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_CAPABILITY, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->he_cap) + 1) params->he_cap = (void *)(cap->data + 1); cap = cfg80211_find_ext_elem(WLAN_EID_EXT_HE_OPERATION, ies, ies_len); if (cap && cap->datalen >= sizeof(*params->he_oper) + 1) params->he_oper = (void *)(cap->data + 1); cap = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_CAPABILITY, ies, ies_len); if (cap) { if (!cap->datalen) return -EINVAL; params->eht_cap = (void *)(cap->data + 1); if (!ieee80211_eht_capa_size_ok((const u8 *)params->he_cap, (const u8 *)params->eht_cap, cap->datalen - 1, true)) return -EINVAL; } cap = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION, ies, ies_len); if (cap) { if (!cap->datalen) return -EINVAL; params->eht_oper = (void *)(cap->data + 1); if (!ieee80211_eht_oper_size_ok((const u8 *)params->eht_oper, cap->datalen - 1)) return -EINVAL; } return 0; } static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct cfg80211_ap_settings *params) { struct wireless_dev *wdev; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; if (!wdev->u.ap.preset_chandef.chan) continue; params->chandef = wdev->u.ap.preset_chandef; return true; } return false; } static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, enum nl80211_auth_type auth_type, enum nl80211_commands cmd) { if (auth_type > NL80211_AUTHTYPE_MAX) return false; switch (cmd) { case NL80211_CMD_AUTHENTICATE: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && auth_type == NL80211_AUTHTYPE_SAE) return false; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_STA) && (auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK)) return false; return true; case NL80211_CMD_CONNECT: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD) && auth_type == NL80211_AUTHTYPE_SAE) return false; /* FILS with SK PFS or PK not supported yet */ if (auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) return false; if (!wiphy_ext_feature_isset( &rdev->wiphy, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && auth_type == NL80211_AUTHTYPE_FILS_SK) return false; return true; case NL80211_CMD_START_AP: if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD_AP) && auth_type == NL80211_AUTHTYPE_SAE) return false; /* FILS not supported yet */ if (auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) return false; return true; default: return false; } } static void nl80211_send_ap_started(struct wireless_dev *wdev, unsigned int link_id) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_START_AP); if (!hdr) goto out; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || (wdev->u.ap.ssid_len && nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, wdev->u.ap.ssid)) || (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))) goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; out: nlmsg_free(msg); } static int nl80211_validate_ap_phy_operation(struct cfg80211_ap_settings *params) { struct ieee80211_channel *channel = params->chandef.chan; if ((params->he_cap || params->he_oper) && (channel->flags & IEEE80211_CHAN_NO_HE)) return -EOPNOTSUPP; if ((params->eht_cap || params->eht_oper) && (channel->flags & IEEE80211_CHAN_NO_EHT)) return -EOPNOTSUPP; return 0; } static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings *params; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!rdev->ops->start_ap) return -EOPNOTSUPP; if (wdev->links[link_id].cac_started) return -EBUSY; if (wdev->links[link_id].ap.beacon_interval) return -EALREADY; /* these are required for START_AP */ if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || !info->attrs[NL80211_ATTR_DTIM_PERIOD] || !info->attrs[NL80211_ATTR_BEACON_HEAD]) return -EINVAL; if (info->attrs[NL80211_ATTR_SMPS_MODE] && nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]) != NL80211_SMPS_OFF) return -EOPNOTSUPP; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; err = nl80211_parse_beacon(rdev, info->attrs, &params->beacon, info->extack); if (err) goto out; params->beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); params->dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); err = cfg80211_validate_beacon_int(rdev, dev->ieee80211_ptr->iftype, params->beacon_interval); if (err) goto out; /* * In theory, some of these attributes should be required here * but since they were not used when the command was originally * added, keep them optional for old user space programs to let * them continue to work with drivers that do not need the * additional information -- drivers must check! */ if (info->attrs[NL80211_ATTR_SSID]) { params->ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); params->ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (params->ssid_len == 0) { err = -EINVAL; goto out; } if (wdev->u.ap.ssid_len && (wdev->u.ap.ssid_len != params->ssid_len || memcmp(wdev->u.ap.ssid, params->ssid, params->ssid_len))) { /* require identical SSID for MLO */ err = -EINVAL; goto out; } } else if (wdev->valid_links) { /* require SSID for MLO */ err = -EINVAL; goto out; } if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) params->hidden_ssid = nla_get_u32( info->attrs[NL80211_ATTR_HIDDEN_SSID]); params->privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { params->auth_type = nla_get_u32( info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, params->auth_type, NL80211_CMD_START_AP)) { err = -EINVAL; goto out; } } else params->auth_type = NL80211_AUTHTYPE_AUTOMATIC; err = nl80211_crypto_settings(rdev, info, &params->crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) goto out; if (info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]) { if (!(rdev->wiphy.features & NL80211_FEATURE_INACTIVITY_TIMER)) { err = -EOPNOTSUPP; goto out; } params->inactivity_timeout = nla_get_u16( info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { err = -EINVAL; goto out; } params->p2p_ctwindow = nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); if (params->p2p_ctwindow != 0 && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) { err = -EINVAL; goto out; } } if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { u8 tmp; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { err = -EINVAL; goto out; } tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); params->p2p_opp_ps = tmp; if (params->p2p_opp_ps != 0 && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) { err = -EINVAL; goto out; } } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &params->chandef); if (err) goto out; } else if (wdev->valid_links) { /* with MLD need to specify the channel configuration */ err = -EINVAL; goto out; } else if (wdev->u.ap.preset_chandef.chan) { params->chandef = wdev->u.ap.preset_chandef; } else if (!nl80211_get_ap_channel(rdev, params)) { err = -EINVAL; goto out; } beacon_check.iftype = wdev->iftype; beacon_check.relax = true; beacon_check.reg_power = cfg80211_get_6ghz_power_type(params->beacon.tail, params->beacon.tail_len); if (!cfg80211_reg_check_beaconing(&rdev->wiphy, &params->chandef, &beacon_check)) { err = -EINVAL; goto out; } if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &params->beacon_rate, dev, false, link_id); if (err) goto out; err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, &params->beacon_rate); if (err) goto out; } params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { err = -EOPNOTSUPP; goto out; } if (info->attrs[NL80211_ATTR_ACL_POLICY]) { params->acl = parse_acl_data(&rdev->wiphy, info); if (IS_ERR(params->acl)) { err = PTR_ERR(params->acl); params->acl = NULL; goto out; } } params->twt_responder = nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]); if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) { err = nl80211_parse_he_obss_pd( info->attrs[NL80211_ATTR_HE_OBSS_PD], &params->he_obss_pd); if (err) goto out; } if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { err = nl80211_parse_fils_discovery(rdev, info->attrs[NL80211_ATTR_FILS_DISCOVERY], &params->fils_discovery); if (err) goto out; } if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { err = nl80211_parse_unsol_bcast_probe_resp( rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], &params->unsol_bcast_probe_resp); if (err) goto out; } if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { err = nl80211_parse_mbssid_config(&rdev->wiphy, dev, info->attrs[NL80211_ATTR_MBSSID_CONFIG], &params->mbssid_config, params->beacon.mbssid_ies ? params->beacon.mbssid_ies->cnt : 0); if (err) goto out; } if (!params->mbssid_config.ema && params->beacon.rnr_ies) { err = -EINVAL; goto out; } err = nl80211_calculate_ap_params(params); if (err) goto out; err = nl80211_validate_ap_phy_operation(params); if (err) goto out; if (info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]) params->flags = nla_get_u32( info->attrs[NL80211_ATTR_AP_SETTINGS_FLAGS]); else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; if (wdev->conn_owner_nlportid && info->attrs[NL80211_ATTR_SOCKET_OWNER] && wdev->conn_owner_nlportid != info->snd_portid) { err = -EINVAL; goto out; } /* FIXME: validate MLO/link-id against driver capabilities */ err = rdev_start_ap(rdev, dev, params); if (!err) { wdev->links[link_id].ap.beacon_interval = params->beacon_interval; wdev->links[link_id].ap.chandef = params->chandef; wdev->u.ap.ssid_len = params->ssid_len; memcpy(wdev->u.ap.ssid, params->ssid, params->ssid_len); if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->conn_owner_nlportid = info->snd_portid; nl80211_send_ap_started(wdev, link_id); } out: kfree(params->acl); kfree(params->beacon.mbssid_ies); if (params->mbssid_config.tx_wdev && params->mbssid_config.tx_wdev->netdev && params->mbssid_config.tx_wdev->netdev != dev) dev_put(params->mbssid_config.tx_wdev->netdev); kfree(params->beacon.rnr_ies); kfree(params); return err; } static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_beaconing_check_config beacon_check = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_update *params; struct nlattr *attr; int err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!rdev->ops->change_beacon) return -EOPNOTSUPP; if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) return -ENOMEM; err = nl80211_parse_beacon(rdev, info->attrs, &params->beacon, info->extack); if (err) goto out; /* recheck beaconing is permitted with possibly changed power type */ beacon_check.iftype = wdev->iftype; beacon_check.relax = true; beacon_check.reg_power = cfg80211_get_6ghz_power_type(params->beacon.tail, params->beacon.tail_len); if (!cfg80211_reg_check_beaconing(&rdev->wiphy, &wdev->links[link_id].ap.chandef, &beacon_check)) { err = -EINVAL; goto out; } attr = info->attrs[NL80211_ATTR_FILS_DISCOVERY]; if (attr) { err = nl80211_parse_fils_discovery(rdev, attr, &params->fils_discovery); if (err) goto out; } attr = info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]; if (attr) { err = nl80211_parse_unsol_bcast_probe_resp(rdev, attr, &params->unsol_bcast_probe_resp); if (err) goto out; } err = rdev_change_beacon(rdev, dev, params); out: kfree(params->beacon.mbssid_ies); kfree(params->beacon.rnr_ies); kfree(params); return err; } static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; return cfg80211_stop_ap(rdev, dev, link_id, false); } static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { [NL80211_STA_FLAG_AUTHORIZED] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_SHORT_PREAMBLE] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_WME] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_MFP] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_AUTHENTICATED] = { .type = NLA_FLAG }, [NL80211_STA_FLAG_TDLS_PEER] = { .type = NLA_FLAG }, }; static int parse_station_flags(struct genl_info *info, enum nl80211_iftype iftype, struct station_parameters *params) { struct nlattr *flags[NL80211_STA_FLAG_MAX + 1]; struct nlattr *nla; int flag; /* * Try parsing the new attribute first so userspace * can specify both for older kernels. */ nla = info->attrs[NL80211_ATTR_STA_FLAGS2]; if (nla) { struct nl80211_sta_flag_update *sta_flags; sta_flags = nla_data(nla); params->sta_flags_mask = sta_flags->mask; params->sta_flags_set = sta_flags->set; params->sta_flags_set &= params->sta_flags_mask; if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; return 0; } /* if present, parse the old attribute */ nla = info->attrs[NL80211_ATTR_STA_FLAGS]; if (!nla) return 0; if (nla_parse_nested_deprecated(flags, NL80211_STA_FLAG_MAX, nla, sta_flags_policy, info->extack)) return -EINVAL; /* * Only allow certain flags for interface types so that * other attributes are silently ignored. Remember that * this is backward compatibility code with old userspace * and shouldn't be hit in other cases anyway. */ switch (iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP); break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_TDLS_PEER); break; case NL80211_IFTYPE_MESH_POINT: params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EINVAL; } for (flag = 1; flag <= NL80211_STA_FLAG_MAX; flag++) { if (flags[flag]) { params->sta_flags_set |= (1<<flag); /* no longer support new API additions in old API */ if (flag > NL80211_STA_FLAG_MAX_OLD_API) return -EINVAL; } } return 0; } bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, int attr) { struct nlattr *rate; u32 bitrate; u16 bitrate_compat; enum nl80211_rate_info rate_flg; rate = nla_nest_start_noflag(msg, attr); if (!rate) return false; /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ bitrate = cfg80211_calculate_bitrate(info); /* report 16-bit bitrate only if we can */ bitrate_compat = bitrate < (1UL << 16) ? bitrate : 0; if (bitrate > 0 && nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) return false; if (bitrate_compat > 0 && nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) return false; switch (info->bw) { case RATE_INFO_BW_1: rate_flg = NL80211_RATE_INFO_1_MHZ_WIDTH; break; case RATE_INFO_BW_2: rate_flg = NL80211_RATE_INFO_2_MHZ_WIDTH; break; case RATE_INFO_BW_4: rate_flg = NL80211_RATE_INFO_4_MHZ_WIDTH; break; case RATE_INFO_BW_5: rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH; break; case RATE_INFO_BW_8: rate_flg = NL80211_RATE_INFO_8_MHZ_WIDTH; break; case RATE_INFO_BW_10: rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH; break; case RATE_INFO_BW_16: rate_flg = NL80211_RATE_INFO_16_MHZ_WIDTH; break; default: WARN_ON(1); fallthrough; case RATE_INFO_BW_20: rate_flg = 0; break; case RATE_INFO_BW_40: rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH; break; case RATE_INFO_BW_80: rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH; break; case RATE_INFO_BW_160: rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH; break; case RATE_INFO_BW_HE_RU: rate_flg = 0; WARN_ON(!(info->flags & RATE_INFO_FLAGS_HE_MCS)); break; case RATE_INFO_BW_320: rate_flg = NL80211_RATE_INFO_320_MHZ_WIDTH; break; case RATE_INFO_BW_EHT_RU: rate_flg = 0; WARN_ON(!(info->flags & RATE_INFO_FLAGS_EHT_MCS)); break; } if (rate_flg && nla_put_flag(msg, rate_flg)) return false; if (info->flags & RATE_INFO_FLAGS_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; } else if (info->flags & RATE_INFO_FLAGS_VHT_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss)) return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; } else if (info->flags & RATE_INFO_FLAGS_HE_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_HE_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_HE_NSS, info->nss)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_HE_GI, info->he_gi)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_HE_DCM, info->he_dcm)) return false; if (info->bw == RATE_INFO_BW_HE_RU && nla_put_u8(msg, NL80211_RATE_INFO_HE_RU_ALLOC, info->he_ru_alloc)) return false; } else if (info->flags & RATE_INFO_FLAGS_S1G_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_S1G_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_S1G_NSS, info->nss)) return false; if (info->flags & RATE_INFO_FLAGS_SHORT_GI && nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) return false; } else if (info->flags & RATE_INFO_FLAGS_EHT_MCS) { if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_MCS, info->mcs)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_NSS, info->nss)) return false; if (nla_put_u8(msg, NL80211_RATE_INFO_EHT_GI, info->eht_gi)) return false; if (info->bw == RATE_INFO_BW_EHT_RU && nla_put_u8(msg, NL80211_RATE_INFO_EHT_RU_ALLOC, info->eht_ru_alloc)) return false; } nla_nest_end(msg, rate); return true; } static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal, int id) { void *attr; int i = 0; if (!mask) return true; attr = nla_nest_start_noflag(msg, id); if (!attr) return false; for (i = 0; i < IEEE80211_MAX_CHAINS; i++) { if (!(mask & BIT(i))) continue; if (nla_put_u8(msg, i, signal[i])) return false; } nla_nest_end(msg, attr); return true; } static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo) { void *hdr; struct nlattr *sinfoattr, *bss_param; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) { cfg80211_sinfo_release_content(sinfo); return -1; } if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation)) goto nla_put_failure; sinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_STA_INFO); if (!sinfoattr) goto nla_put_failure; #define PUT_SINFO(attr, memb, type) do { \ BUILD_BUG_ON(sizeof(type) == sizeof(u64)); \ if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb)) \ goto nla_put_failure; \ } while (0) #define PUT_SINFO_U64(attr, memb) do { \ if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_ ## attr) && \ nla_put_u64_64bit(msg, NL80211_STA_INFO_ ## attr, \ sinfo->memb, NL80211_STA_INFO_PAD)) \ goto nla_put_failure; \ } while (0) PUT_SINFO(CONNECTED_TIME, connected_time, u32); PUT_SINFO(INACTIVE_TIME, inactive_time, u32); PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at); if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES, (u32)sinfo->rx_bytes)) goto nla_put_failure; if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES) | BIT_ULL(NL80211_STA_INFO_TX_BYTES64)) && nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES, (u32)sinfo->tx_bytes)) goto nla_put_failure; PUT_SINFO_U64(RX_BYTES64, rx_bytes); PUT_SINFO_U64(TX_BYTES64, tx_bytes); PUT_SINFO(LLID, llid, u16); PUT_SINFO(PLID, plid, u16); PUT_SINFO(PLINK_STATE, plink_state, u8); PUT_SINFO_U64(RX_DURATION, rx_duration); PUT_SINFO_U64(TX_DURATION, tx_duration); if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) PUT_SINFO(AIRTIME_WEIGHT, airtime_weight, u16); switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: PUT_SINFO(SIGNAL, signal, u8); PUT_SINFO(SIGNAL_AVG, signal_avg, u8); break; default: break; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal, NL80211_STA_INFO_CHAIN_SIGNAL)) goto nla_put_failure; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) { if (!nl80211_put_signal(msg, sinfo->chains, sinfo->chain_signal_avg, NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) goto nla_put_failure; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->txrate, NL80211_STA_INFO_TX_BITRATE)) goto nla_put_failure; } if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) { if (!nl80211_put_sta_rate(msg, &sinfo->rxrate, NL80211_STA_INFO_RX_BITRATE)) goto nla_put_failure; } PUT_SINFO(RX_PACKETS, rx_packets, u32); PUT_SINFO(TX_PACKETS, tx_packets, u32); PUT_SINFO(TX_RETRIES, tx_retries, u32); PUT_SINFO(TX_FAILED, tx_failed, u32); PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32); PUT_SINFO(AIRTIME_LINK_METRIC, airtime_link_metric, u32); PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32); PUT_SINFO(LOCAL_PM, local_pm, u32); PUT_SINFO(PEER_PM, peer_pm, u32); PUT_SINFO(NONPEER_PM, nonpeer_pm, u32); PUT_SINFO(CONNECTED_TO_GATE, connected_to_gate, u8); PUT_SINFO(CONNECTED_TO_AS, connected_to_as, u8); if (sinfo->filled & BIT_ULL(NL80211_STA_INFO_BSS_PARAM)) { bss_param = nla_nest_start_noflag(msg, NL80211_STA_INFO_BSS_PARAM); if (!bss_param) goto nla_put_failure; if (((sinfo->bss_param.flags & BSS_PARAM_FLAGS_CTS_PROT) && nla_put_flag(msg, NL80211_STA_BSS_PARAM_CTS_PROT)) || ((sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_PREAMBLE) && nla_put_flag(msg, NL80211_STA_BSS_PARAM_SHORT_PREAMBLE)) || ((sinfo->bss_param.flags & BSS_PARAM_FLAGS_SHORT_SLOT_TIME) && nla_put_flag(msg, NL80211_STA_BSS_PARAM_SHORT_SLOT_TIME)) || nla_put_u8(msg, NL80211_STA_BSS_PARAM_DTIM_PERIOD, sinfo->bss_param.dtim_period) || nla_put_u16(msg, NL80211_STA_BSS_PARAM_BEACON_INTERVAL, sinfo->bss_param.beacon_interval)) goto nla_put_failure; nla_nest_end(msg, bss_param); } if ((sinfo->filled & BIT_ULL(NL80211_STA_INFO_STA_FLAGS)) && nla_put(msg, NL80211_STA_INFO_STA_FLAGS, sizeof(struct nl80211_sta_flag_update), &sinfo->sta_flags)) goto nla_put_failure; PUT_SINFO_U64(T_OFFSET, t_offset); PUT_SINFO_U64(RX_DROP_MISC, rx_dropped_misc); PUT_SINFO_U64(BEACON_RX, rx_beacon); PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8); PUT_SINFO(RX_MPDUS, rx_mpdu_count, u32); PUT_SINFO(FCS_ERROR_COUNT, fcs_err_count, u32); if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_ACK_SIGNAL_SUPPORT)) { PUT_SINFO(ACK_SIGNAL, ack_signal, u8); PUT_SINFO(ACK_SIGNAL_AVG, avg_ack_signal, s8); } #undef PUT_SINFO #undef PUT_SINFO_U64 if (sinfo->pertid) { struct nlattr *tidsattr; int tid; tidsattr = nla_nest_start_noflag(msg, NL80211_STA_INFO_TID_STATS); if (!tidsattr) goto nla_put_failure; for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) { struct cfg80211_tid_stats *tidstats; struct nlattr *tidattr; tidstats = &sinfo->pertid[tid]; if (!tidstats->filled) continue; tidattr = nla_nest_start_noflag(msg, tid + 1); if (!tidattr) goto nla_put_failure; #define PUT_TIDVAL_U64(attr, memb) do { \ if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) && \ nla_put_u64_64bit(msg, NL80211_TID_STATS_ ## attr, \ tidstats->memb, NL80211_TID_STATS_PAD)) \ goto nla_put_failure; \ } while (0) PUT_TIDVAL_U64(RX_MSDU, rx_msdu); PUT_TIDVAL_U64(TX_MSDU, tx_msdu); PUT_TIDVAL_U64(TX_MSDU_RETRIES, tx_msdu_retries); PUT_TIDVAL_U64(TX_MSDU_FAILED, tx_msdu_failed); #undef PUT_TIDVAL_U64 if ((tidstats->filled & BIT(NL80211_TID_STATS_TXQ_STATS)) && !nl80211_put_txq_stats(msg, &tidstats->txq_stats, NL80211_TID_STATS_TXQ_STATS)) goto nla_put_failure; nla_nest_end(msg, tidattr); } nla_nest_end(msg, tidsattr); } nla_nest_end(msg, sinfoattr); if (sinfo->assoc_req_ies_len && nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len, sinfo->assoc_req_ies)) goto nla_put_failure; if (sinfo->assoc_resp_ies_len && nla_put(msg, NL80211_ATTR_RESP_IE, sinfo->assoc_resp_ies_len, sinfo->assoc_resp_ies)) goto nla_put_failure; if (sinfo->mlo_params_valid) { if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, sinfo->assoc_link_id)) goto nla_put_failure; if (!is_zero_ether_addr(sinfo->mld_addr) && nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, sinfo->mld_addr)) goto nla_put_failure; } cfg80211_sinfo_release_content(sinfo); genlmsg_end(msg, hdr); return 0; nla_put_failure: cfg80211_sinfo_release_content(sinfo); genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_station(struct sk_buff *skb, struct netlink_callback *cb) { struct station_info sinfo; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 mac_addr[ETH_ALEN]; int sta_idx = cb->args[2]; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); if (!wdev->netdev) { err = -EINVAL; goto out_err; } if (!rdev->ops->dump_station) { err = -EOPNOTSUPP; goto out_err; } while (1) { memset(&sinfo, 0, sizeof(sinfo)); err = rdev_dump_station(rdev, wdev->netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; if (err) goto out_err; if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev->netdev, mac_addr, &sinfo) < 0) goto out; sta_idx++; } out: cb->args[2] = sta_idx; err = skb->len; out_err: wiphy_unlock(&rdev->wiphy); return err; } static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_info sinfo; struct sk_buff *msg; u8 *mac_addr = NULL; int err; memset(&sinfo, 0, sizeof(sinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->get_station) return -EOPNOTSUPP; err = rdev_get_station(rdev, dev, mac_addr, &sinfo); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { cfg80211_sinfo_release_content(&sinfo); return -ENOMEM; } if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } int cfg80211_check_station_change(struct wiphy *wiphy, struct station_parameters *params, enum cfg80211_station_type statype) { if (params->listen_interval != -1 && statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; if (params->support_p2p_ps != -1 && statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; if (params->aid && !(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) && statype != CFG80211_STA_AP_CLIENT_UNASSOC) return -EINVAL; /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8); switch (statype) { case CFG80211_STA_MESH_PEER_KERNEL: case CFG80211_STA_MESH_PEER_USER: /* * No ignoring the TDLS flag here -- the userspace mesh * code doesn't have the bug of including TDLS in the * mask everywhere. */ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHORIZED))) return -EINVAL; break; case CFG80211_STA_TDLS_PEER_SETUP: case CFG80211_STA_TDLS_PEER_ACTIVE: if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; /* ignore since it can't change */ params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); break; default: /* disallow mesh-specific things */ if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION) return -EINVAL; if (params->local_pm) return -EINVAL; if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) return -EINVAL; } if (statype != CFG80211_STA_TDLS_PEER_SETUP && statype != CFG80211_STA_TDLS_PEER_ACTIVE) { /* TDLS can't be set, ... */ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) return -EINVAL; /* * ... but don't bother the driver with it. This works around * a hostapd/wpa_supplicant issue -- it always includes the * TLDS_PEER flag in the mask even for AP mode. */ params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); } if (statype != CFG80211_STA_TDLS_PEER_SETUP && statype != CFG80211_STA_AP_CLIENT_UNASSOC) { /* reject other things that can't change */ if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) return -EINVAL; if (params->sta_modify_mask & STATION_PARAM_APPLY_CAPABILITY) return -EINVAL; if (params->link_sta_params.supported_rates) return -EINVAL; if (params->ext_capab || params->link_sta_params.ht_capa || params->link_sta_params.vht_capa || params->link_sta_params.he_capa || params->link_sta_params.eht_capa) return -EINVAL; if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) return -EINVAL; } if (statype != CFG80211_STA_AP_CLIENT && statype != CFG80211_STA_AP_CLIENT_UNASSOC) { if (params->vlan) return -EINVAL; } switch (statype) { case CFG80211_STA_AP_MLME_CLIENT: /* Use this only for authorizing/unauthorizing a station */ if (!(params->sta_flags_mask & BIT(NL80211_STA_FLAG_AUTHORIZED))) return -EOPNOTSUPP; break; case CFG80211_STA_AP_CLIENT: case CFG80211_STA_AP_CLIENT_UNASSOC: /* accept only the listed bits */ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_SPP_AMSDU))) return -EINVAL; /* but authenticated/associated only if driver handles it */ if (!(wiphy->features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && params->sta_flags_mask & (BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED))) return -EINVAL; break; case CFG80211_STA_IBSS: case CFG80211_STA_AP_STA: /* reject any changes other than AUTHORIZED */ if (params->sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED)) return -EINVAL; break; case CFG80211_STA_TDLS_PEER_SETUP: /* reject any changes other than AUTHORIZED or WME */ if (params->sta_flags_mask & ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_WME))) return -EINVAL; /* force (at least) rates when authorizing */ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_AUTHORIZED) && !params->link_sta_params.supported_rates) return -EINVAL; break; case CFG80211_STA_TDLS_PEER_ACTIVE: /* reject any changes */ return -EINVAL; case CFG80211_STA_MESH_PEER_KERNEL: if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) return -EINVAL; break; case CFG80211_STA_MESH_PEER_USER: if (params->plink_action != NL80211_PLINK_ACTION_NO_ACTION && params->plink_action != NL80211_PLINK_ACTION_BLOCK) return -EINVAL; break; } /* * Older kernel versions ignored this attribute entirely, so don't * reject attempts to update it but mark it as unused instead so the * driver won't look at the data. */ if (statype != CFG80211_STA_AP_CLIENT_UNASSOC && statype != CFG80211_STA_TDLS_PEER_SETUP) params->link_sta_params.opmode_notif_used = false; return 0; } EXPORT_SYMBOL(cfg80211_check_station_change); /* * Get vlan interface making sure it is running and on the right wiphy. */ static struct net_device *get_vlan(struct genl_info *info, struct cfg80211_registered_device *rdev) { struct nlattr *vlanattr = info->attrs[NL80211_ATTR_STA_VLAN]; struct net_device *v; int ret; if (!vlanattr) return NULL; v = dev_get_by_index(genl_info_net(info), nla_get_u32(vlanattr)); if (!v) return ERR_PTR(-ENODEV); if (!v->ieee80211_ptr || v->ieee80211_ptr->wiphy != &rdev->wiphy) { ret = -EINVAL; goto error; } if (v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && v->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && v->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) { ret = -EINVAL; goto error; } if (!netif_running(v)) { ret = -ENETDOWN; goto error; } return v; error: dev_put(v); return ERR_PTR(ret); } static int nl80211_parse_sta_wme(struct genl_info *info, struct station_parameters *params) { struct nlattr *tb[NL80211_STA_WME_MAX + 1]; struct nlattr *nla; int err; /* parse WME attributes if present */ if (!info->attrs[NL80211_ATTR_STA_WME]) return 0; nla = info->attrs[NL80211_ATTR_STA_WME]; err = nla_parse_nested_deprecated(tb, NL80211_STA_WME_MAX, nla, nl80211_sta_wme_policy, info->extack); if (err) return err; if (tb[NL80211_STA_WME_UAPSD_QUEUES]) params->uapsd_queues = nla_get_u8( tb[NL80211_STA_WME_UAPSD_QUEUES]); if (params->uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK) return -EINVAL; if (tb[NL80211_STA_WME_MAX_SP]) params->max_sp = nla_get_u8(tb[NL80211_STA_WME_MAX_SP]); if (params->max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK) return -EINVAL; params->sta_modify_mask |= STATION_PARAM_APPLY_UAPSD; return 0; } static int nl80211_parse_sta_channel_info(struct genl_info *info, struct station_parameters *params) { if (info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]) { params->supported_channels = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]); params->supported_channels_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_CHANNELS]); /* * Need to include at least one (first channel, number of * channels) tuple for each subband (checked in policy), * and must have proper tuples for the rest of the data as well. */ if (params->supported_channels_len % 2) return -EINVAL; } if (info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]) { params->supported_oper_classes = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]); params->supported_oper_classes_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_OPER_CLASSES]); } return 0; } static int nl80211_set_station_tdls(struct genl_info *info, struct station_parameters *params) { int err; /* Dummy STA entry gets updated once the peer capabilities are known */ if (info->attrs[NL80211_ATTR_PEER_AID]) params->aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params->link_sta_params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params->link_sta_params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { params->link_sta_params.he_capa = nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); params->link_sta_params.he_capa_len = nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) { params->link_sta_params.eht_capa = nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); params->link_sta_params.eht_capa_len = nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); if (!ieee80211_eht_capa_size_ok((const u8 *)params->link_sta_params.he_capa, (const u8 *)params->link_sta_params.eht_capa, params->link_sta_params.eht_capa_len, false)) return -EINVAL; } } err = nl80211_parse_sta_channel_info(info, params); if (err) return err; return nl80211_parse_sta_wme(info, params); } static int nl80211_parse_sta_txpower_setting(struct genl_info *info, struct sta_txpwr *txpwr, bool *txpwr_set) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int idx; if (info->attrs[NL80211_ATTR_STA_TX_POWER_SETTING]) { if (!rdev->ops->set_tx_power || !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_STA_TX_PWR)) return -EOPNOTSUPP; idx = NL80211_ATTR_STA_TX_POWER_SETTING; txpwr->type = nla_get_u8(info->attrs[idx]); if (txpwr->type == NL80211_TX_POWER_LIMITED) { idx = NL80211_ATTR_STA_TX_POWER; if (info->attrs[idx]) txpwr->power = nla_get_s16(info->attrs[idx]); else return -EINVAL; } *txpwr_set = true; } else { *txpwr_set = false; } return 0; } static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct station_parameters params; u8 *mac_addr; int err; memset(&params, 0, sizeof(params)); if (!rdev->ops->change_station) return -EOPNOTSUPP; /* * AID and listen_interval properties can be set only for unassociated * station. Include these parameters here and will check them in * cfg80211_check_station_change(). */ if (info->attrs[NL80211_ATTR_STA_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_VLAN_ID]) params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); else params.listen_interval = -1; if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) params.support_p2p_ps = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); else params.support_p2p_ps = -1; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; params.link_sta_params.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLD_ADDR]) { /* If MLD_ADDR attribute is set then this is an MLD station * and the MLD_ADDR attribute holds the MLD address and the * MAC attribute holds for the LINK address. * In that case, the link_id is also expected to be valid. */ if (params.link_sta_params.link_id < 0) return -EINVAL; mac_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_sta_params.mld_mac = mac_addr; params.link_sta_params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(params.link_sta_params.link_mac)) return -EINVAL; } else { mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); } if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) { params.link_sta_params.supported_rates = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.link_sta_params.supported_rates_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { params.capability = nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; } if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { params.ext_capab = nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); params.ext_capab_len = nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); } if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); if (info->attrs[NL80211_ATTR_STA_PLINK_STATE]) { params.plink_state = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]); if (info->attrs[NL80211_ATTR_MESH_PEER_AID]) params.peer_aid = nla_get_u16( info->attrs[NL80211_ATTR_MESH_PEER_AID]); params.sta_modify_mask |= STATION_PARAM_APPLY_PLINK_STATE; } if (info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]) params.local_pm = nla_get_u32( info->attrs[NL80211_ATTR_LOCAL_MESH_POWER_MODE]); if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.link_sta_params.opmode_notif_used = true; params.link_sta_params.opmode_notif = nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) params.airtime_weight = nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); if (params.airtime_weight && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) return -EOPNOTSUPP; err = nl80211_parse_sta_txpower_setting(info, &params.link_sta_params.txpwr, &params.link_sta_params.txpwr_set); if (err) return err; /* Include parameters for TDLS peer (will check later) */ err = nl80211_set_station_tdls(info, &params); if (err) return err; params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: break; default: err = -EOPNOTSUPP; goto out_put_vlan; } /* driver will call cfg80211_check_station_change() */ err = rdev_change_station(rdev, dev, mac_addr, &params); out_put_vlan: dev_put(params.vlan); return err; } static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct station_parameters params; u8 *mac_addr = NULL; u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED); memset(&params, 0, sizeof(params)); if (!rdev->ops->add_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STA_AID] && !info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; params.link_sta_params.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLD_ADDR]) { mac_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_sta_params.mld_mac = mac_addr; params.link_sta_params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(params.link_sta_params.link_mac)) return -EINVAL; } else { mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); } params.link_sta_params.supported_rates = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.link_sta_params.supported_rates_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); if (info->attrs[NL80211_ATTR_VLAN_ID]) params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); if (info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]) { params.support_p2p_ps = nla_get_u8(info->attrs[NL80211_ATTR_STA_SUPPORT_P2P_PS]); } else { /* * if not specified, assume it's supported for P2P GO interface, * and is NOT supported for AP interface */ params.support_p2p_ps = dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO; } if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); else params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { params.capability = nla_get_u16(info->attrs[NL80211_ATTR_STA_CAPABILITY]); params.sta_modify_mask |= STATION_PARAM_APPLY_CAPABILITY; } if (info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]) { params.ext_capab = nla_data(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); params.ext_capab_len = nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.link_sta_params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params.link_sta_params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { params.link_sta_params.he_capa = nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); params.link_sta_params.he_capa_len = nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) { params.link_sta_params.eht_capa = nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); params.link_sta_params.eht_capa_len = nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); if (!ieee80211_eht_capa_size_ok((const u8 *)params.link_sta_params.he_capa, (const u8 *)params.link_sta_params.eht_capa, params.link_sta_params.eht_capa_len, false)) return -EINVAL; } } if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.link_sta_params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.link_sta_params.opmode_notif_used = true; params.link_sta_params.opmode_notif = nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]) params.airtime_weight = nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]); if (params.airtime_weight && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) return -EOPNOTSUPP; err = nl80211_parse_sta_txpower_setting(info, &params.link_sta_params.txpwr, &params.link_sta_params.txpwr_set); if (err) return err; err = nl80211_parse_sta_channel_info(info, &params); if (err) return err; err = nl80211_parse_sta_wme(info, &params); if (err) return err; if (parse_station_flags(info, dev->ieee80211_ptr->iftype, &params)) return -EINVAL; /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT * as userspace might just pass through the capabilities from the IEs * directly, rather than enforcing this restriction and returning an * error in this case. */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) { params.link_sta_params.ht_capa = NULL; params.link_sta_params.vht_capa = NULL; /* HE and EHT require WME */ if (params.link_sta_params.he_capa_len || params.link_sta_params.he_6ghz_capa || params.link_sta_params.eht_capa_len) return -EINVAL; } /* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */ if (params.link_sta_params.he_6ghz_capa && (params.link_sta_params.ht_capa || params.link_sta_params.vht_capa)) return -EINVAL; /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8); switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: /* ignore WME attributes if iface/sta is not capable */ if (!(rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) || !(params.sta_flags_set & BIT(NL80211_STA_FLAG_WME))) params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* TDLS peers cannot be added */ if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; /* but don't bother the driver with it */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER); /* allow authenticated/associated only if driver handles it */ if (!(rdev->wiphy.features & NL80211_FEATURE_FULL_AP_CLIENT_STATE) && params.sta_flags_mask & auth_assoc) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT) && params.sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) return -EINVAL; /* Older userspace, or userspace wanting to be compatible with * !NL80211_FEATURE_FULL_AP_CLIENT_STATE, will not set the auth * and assoc flags in the mask, but assumes the station will be * added as associated anyway since this was the required driver * behaviour before NL80211_FEATURE_FULL_AP_CLIENT_STATE was * introduced. * In order to not bother drivers with this quirk in the API * set the flags in both the mask and set for new stations in * this case. */ if (!(params.sta_flags_mask & auth_assoc)) { params.sta_flags_mask |= auth_assoc; params.sta_flags_set |= auth_assoc; } /* must be last in here for error handling */ params.vlan = get_vlan(info, rdev); if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); break; case NL80211_IFTYPE_MESH_POINT: /* ignore uAPSD data */ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* associated is disallowed */ if (params.sta_flags_mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) return -EINVAL; /* TDLS peers cannot be added */ if ((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) || info->attrs[NL80211_ATTR_PEER_AID]) return -EINVAL; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: /* ignore uAPSD data */ params.sta_modify_mask &= ~STATION_PARAM_APPLY_UAPSD; /* these are disallowed */ if (params.sta_flags_mask & (BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_AUTHENTICATED))) return -EINVAL; /* Only TDLS peers can be added */ if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) return -EINVAL; /* Can only add if TDLS ... */ if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS)) return -EOPNOTSUPP; /* ... with external setup is supported */ if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)) return -EOPNOTSUPP; /* * Older wpa_supplicant versions always mark the TDLS peer * as authorized, but it shouldn't yet be. */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; default: return -EOPNOTSUPP; } /* be aware of params.vlan when changing code here */ if (wdev->valid_links) { if (params.link_sta_params.link_id < 0) { err = -EINVAL; goto out; } if (!(wdev->valid_links & BIT(params.link_sta_params.link_id))) { err = -ENOLINK; goto out; } } else { if (params.link_sta_params.link_id >= 0) { err = -EINVAL; goto out; } } err = rdev_add_station(rdev, dev, mac_addr, &params); out: dev_put(params.vlan); return err; } static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct station_del_parameters params; int link_id = nl80211_link_id_or_invalid(info->attrs); memset(&params, 0, sizeof(params)); if (info->attrs[NL80211_ATTR_MAC]) params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: /* always accept these */ break; case NL80211_IFTYPE_ADHOC: /* conditionally accept */ if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_DEL_IBSS_STA)) break; return -EINVAL; default: return -EINVAL; } if (!rdev->ops->del_station) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_MGMT_SUBTYPE]) { params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]); if (params.subtype != IEEE80211_STYPE_DISASSOC >> 4 && params.subtype != IEEE80211_STYPE_DEAUTH >> 4) return -EINVAL; } else { /* Default to Deauthentication frame */ params.subtype = IEEE80211_STYPE_DEAUTH >> 4; } if (info->attrs[NL80211_ATTR_REASON_CODE]) { params.reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (params.reason_code == 0) return -EINVAL; /* 0 is reserved */ } else { /* Default to reason code 2 */ params.reason_code = WLAN_REASON_PREV_AUTH_NOT_VALID; } /* Link ID not expected in case of non-ML operation */ if (!wdev->valid_links && link_id != -1) return -EINVAL; /* If given, a valid link ID should be passed during MLO */ if (wdev->valid_links && link_id >= 0 && !(wdev->valid_links & BIT(link_id))) return -EINVAL; params.link_id = link_id; return rdev_del_station(rdev, dev, &params); } static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { void *hdr; struct nlattr *pinfoattr; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_MPATH); if (!hdr) return -1; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, dst) || nla_put(msg, NL80211_ATTR_MPATH_NEXT_HOP, ETH_ALEN, next_hop) || nla_put_u32(msg, NL80211_ATTR_GENERATION, pinfo->generation)) goto nla_put_failure; pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MPATH_INFO); if (!pinfoattr) goto nla_put_failure; if ((pinfo->filled & MPATH_INFO_FRAME_QLEN) && nla_put_u32(msg, NL80211_MPATH_INFO_FRAME_QLEN, pinfo->frame_qlen)) goto nla_put_failure; if (((pinfo->filled & MPATH_INFO_SN) && nla_put_u32(msg, NL80211_MPATH_INFO_SN, pinfo->sn)) || ((pinfo->filled & MPATH_INFO_METRIC) && nla_put_u32(msg, NL80211_MPATH_INFO_METRIC, pinfo->metric)) || ((pinfo->filled & MPATH_INFO_EXPTIME) && nla_put_u32(msg, NL80211_MPATH_INFO_EXPTIME, pinfo->exptime)) || ((pinfo->filled & MPATH_INFO_FLAGS) && nla_put_u8(msg, NL80211_MPATH_INFO_FLAGS, pinfo->flags)) || ((pinfo->filled & MPATH_INFO_DISCOVERY_TIMEOUT) && nla_put_u32(msg, NL80211_MPATH_INFO_DISCOVERY_TIMEOUT, pinfo->discovery_timeout)) || ((pinfo->filled & MPATH_INFO_DISCOVERY_RETRIES) && nla_put_u8(msg, NL80211_MPATH_INFO_DISCOVERY_RETRIES, pinfo->discovery_retries)) || ((pinfo->filled & MPATH_INFO_HOP_COUNT) && nla_put_u8(msg, NL80211_MPATH_INFO_HOP_COUNT, pinfo->hop_count)) || ((pinfo->filled & MPATH_INFO_PATH_CHANGE) && nla_put_u32(msg, NL80211_MPATH_INFO_PATH_CHANGE, pinfo->path_change_count))) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_mpath(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 next_hop[ETH_ALEN]; int path_idx = cb->args[2]; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); if (!rdev->ops->dump_mpath) { err = -EOPNOTSUPP; goto out_err; } if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EOPNOTSUPP; goto out_err; } while (1) { err = rdev_dump_mpath(rdev, wdev->netdev, path_idx, dst, next_hop, &pinfo); if (err == -ENOENT) break; if (err) goto out_err; if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, wdev->netdev, dst, next_hop, &pinfo) < 0) goto out; path_idx++; } out: cb->args[2] = path_idx; err = skb->len; out_err: wiphy_unlock(&rdev->wiphy); return err; } static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct mpath_info pinfo; struct sk_buff *msg; u8 *dst = NULL; u8 next_hop[ETH_ALEN]; memset(&pinfo, 0, sizeof(pinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->get_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; err = rdev_get_mpath(rdev, dev, dst, next_hop, &pinfo); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 *dst = NULL; u8 *next_hop = NULL; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); if (!rdev->ops->change_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; return rdev_change_mpath(rdev, dev, dst, next_hop); } static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 *dst = NULL; u8 *next_hop = NULL; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); if (!rdev->ops->add_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; return rdev_add_mpath(rdev, dev, dst, next_hop); } static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 *dst = NULL; if (info->attrs[NL80211_ATTR_MAC]) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->del_mpath) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; return rdev_del_mpath(rdev, dev, dst); } static int nl80211_get_mpp(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; struct net_device *dev = info->user_ptr[1]; struct mpath_info pinfo; struct sk_buff *msg; u8 *dst = NULL; u8 mpp[ETH_ALEN]; memset(&pinfo, 0, sizeof(pinfo)); if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; dst = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!rdev->ops->get_mpp) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; err = rdev_get_mpp(rdev, dev, dst, mpp, &pinfo); if (err) return err; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, mpp, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; } return genlmsg_reply(msg, info); } static int nl80211_dump_mpp(struct sk_buff *skb, struct netlink_callback *cb) { struct mpath_info pinfo; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; u8 dst[ETH_ALEN]; u8 mpp[ETH_ALEN]; int path_idx = cb->args[2]; int err; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, NULL); if (err) return err; /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); if (!rdev->ops->dump_mpp) { err = -EOPNOTSUPP; goto out_err; } if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) { err = -EOPNOTSUPP; goto out_err; } while (1) { err = rdev_dump_mpp(rdev, wdev->netdev, path_idx, dst, mpp, &pinfo); if (err == -ENOENT) break; if (err) goto out_err; if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, wdev->netdev, dst, mpp, &pinfo) < 0) goto out; path_idx++; } out: cb->args[2] = path_idx; err = skb->len; out_err: wiphy_unlock(&rdev->wiphy); return err; } static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct bss_parameters params; memset(&params, 0, sizeof(params)); params.link_id = nl80211_link_id_or_invalid(info->attrs); /* default to not changing parameters */ params.use_cts_prot = -1; params.use_short_preamble = -1; params.use_short_slot_time = -1; params.ap_isolate = -1; params.ht_opmode = -1; params.p2p_ctwindow = -1; params.p2p_opp_ps = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = nla_get_u8(info->attrs[NL80211_ATTR_BSS_CTS_PROT]); if (info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]) params.use_short_preamble = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]); if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) params.use_short_slot_time = nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { params.basic_rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); params.basic_rates_len = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); } if (info->attrs[NL80211_ATTR_AP_ISOLATE]) params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); if (info->attrs[NL80211_ATTR_BSS_HT_OPMODE]) params.ht_opmode = nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]); if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; params.p2p_ctwindow = nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); if (params.p2p_ctwindow != 0 && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) return -EINVAL; } if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { u8 tmp; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); params.p2p_opp_ps = tmp; if (params.p2p_opp_ps && !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) return -EINVAL; } if (!rdev->ops->change_bss) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; return rdev_change_bss(rdev, dev, &params); } static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) { char *data = NULL; bool is_indoor; enum nl80211_user_reg_hint_type user_reg_hint_type; u32 owner_nlportid; /* * You should only get this when cfg80211 hasn't yet initialized * completely when built-in to the kernel right between the time * window between nl80211_init() and regulatory_init(), if that is * even possible. */ if (unlikely(!rcu_access_pointer(cfg80211_regdomain))) return -EINPROGRESS; user_reg_hint_type = nla_get_u32_default(info->attrs[NL80211_ATTR_USER_REG_HINT_TYPE], NL80211_USER_REG_HINT_USER); switch (user_reg_hint_type) { case NL80211_USER_REG_HINT_USER: case NL80211_USER_REG_HINT_CELL_BASE: if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); return regulatory_hint_user(data, user_reg_hint_type); case NL80211_USER_REG_HINT_INDOOR: if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) { owner_nlportid = info->snd_portid; is_indoor = !!info->attrs[NL80211_ATTR_REG_INDOOR]; } else { owner_nlportid = 0; is_indoor = true; } regulatory_hint_indoor(is_indoor, owner_nlportid); return 0; default: return -EINVAL; } } static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info) { return reg_reload_regdb(); } static int nl80211_get_mesh_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct mesh_config cur_params; int err = 0; void *hdr; struct nlattr *pinfoattr; struct sk_buff *msg; if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!rdev->ops->get_mesh_config) return -EOPNOTSUPP; /* If not connected, get default parameters */ if (!wdev->u.mesh.id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else err = rdev_get_mesh_config(rdev, dev, &cur_params); if (err) return err; /* Draw up a netlink message to send back */ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; pinfoattr = nla_nest_start_noflag(msg, NL80211_ATTR_MESH_CONFIG); if (!pinfoattr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u16(msg, NL80211_MESHCONF_RETRY_TIMEOUT, cur_params.dot11MeshRetryTimeout) || nla_put_u16(msg, NL80211_MESHCONF_CONFIRM_TIMEOUT, cur_params.dot11MeshConfirmTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HOLDING_TIMEOUT, cur_params.dot11MeshHoldingTimeout) || nla_put_u16(msg, NL80211_MESHCONF_MAX_PEER_LINKS, cur_params.dot11MeshMaxPeerLinks) || nla_put_u8(msg, NL80211_MESHCONF_MAX_RETRIES, cur_params.dot11MeshMaxRetries) || nla_put_u8(msg, NL80211_MESHCONF_TTL, cur_params.dot11MeshTTL) || nla_put_u8(msg, NL80211_MESHCONF_ELEMENT_TTL, cur_params.element_ttl) || nla_put_u8(msg, NL80211_MESHCONF_AUTO_OPEN_PLINKS, cur_params.auto_open_plinks) || nla_put_u32(msg, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, cur_params.dot11MeshNbrOffsetMaxNeighbor) || nla_put_u8(msg, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, cur_params.dot11MeshHWMPmaxPREQretries) || nla_put_u32(msg, NL80211_MESHCONF_PATH_REFRESH_TIME, cur_params.path_refresh_time) || nla_put_u16(msg, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, cur_params.min_discovery_timeout) || nla_put_u32(msg, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, cur_params.dot11MeshHWMPactivePathTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, cur_params.dot11MeshHWMPpreqMinInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, cur_params.dot11MeshHWMPperrMinInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, cur_params.dot11MeshHWMPnetDiameterTraversalTime) || nla_put_u8(msg, NL80211_MESHCONF_HWMP_ROOTMODE, cur_params.dot11MeshHWMPRootMode) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_RANN_INTERVAL, cur_params.dot11MeshHWMPRannInterval) || nla_put_u8(msg, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, cur_params.dot11MeshGateAnnouncementProtocol) || nla_put_u8(msg, NL80211_MESHCONF_FORWARDING, cur_params.dot11MeshForwarding) || nla_put_s32(msg, NL80211_MESHCONF_RSSI_THRESHOLD, cur_params.rssi_threshold) || nla_put_u32(msg, NL80211_MESHCONF_HT_OPMODE, cur_params.ht_opmode) || nla_put_u32(msg, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, cur_params.dot11MeshHWMPactivePathToRootTimeout) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, cur_params.dot11MeshHWMProotInterval) || nla_put_u16(msg, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, cur_params.dot11MeshHWMPconfirmationInterval) || nla_put_u32(msg, NL80211_MESHCONF_POWER_MODE, cur_params.power_mode) || nla_put_u16(msg, NL80211_MESHCONF_AWAKE_WINDOW, cur_params.dot11MeshAwakeWindowDuration) || nla_put_u32(msg, NL80211_MESHCONF_PLINK_TIMEOUT, cur_params.plink_timeout) || nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_GATE, cur_params.dot11MeshConnectedToMeshGate) || nla_put_u8(msg, NL80211_MESHCONF_NOLEARN, cur_params.dot11MeshNolearn) || nla_put_u8(msg, NL80211_MESHCONF_CONNECTED_TO_AS, cur_params.dot11MeshConnectedToAuthServer)) goto nla_put_failure; nla_nest_end(msg, pinfoattr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: out: nlmsg_free(msg); return -ENOBUFS; } static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_RETRY_TIMEOUT] = NLA_POLICY_RANGE(NLA_U16, 1, 255), [NL80211_MESHCONF_CONFIRM_TIMEOUT] = NLA_POLICY_RANGE(NLA_U16, 1, 255), [NL80211_MESHCONF_HOLDING_TIMEOUT] = NLA_POLICY_RANGE(NLA_U16, 1, 255), [NL80211_MESHCONF_MAX_PEER_LINKS] = NLA_POLICY_RANGE(NLA_U16, 0, 255), [NL80211_MESHCONF_MAX_RETRIES] = NLA_POLICY_MAX(NLA_U8, 16), [NL80211_MESHCONF_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_MESHCONF_ELEMENT_TTL] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_MESHCONF_AUTO_OPEN_PLINKS] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR] = NLA_POLICY_RANGE(NLA_U32, 1, 255), [NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES] = { .type = NLA_U8 }, [NL80211_MESHCONF_PATH_REFRESH_TIME] = { .type = NLA_U32 }, [NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_ROOTMODE] = NLA_POLICY_MAX(NLA_U8, 4), [NL80211_MESHCONF_HWMP_RANN_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_GATE_ANNOUNCEMENTS] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_MESHCONF_FORWARDING] = NLA_POLICY_MAX(NLA_U8, 1), [NL80211_MESHCONF_RSSI_THRESHOLD] = NLA_POLICY_RANGE(NLA_S32, -255, 0), [NL80211_MESHCONF_HT_OPMODE] = { .type = NLA_U16 }, [NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_HWMP_ROOT_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL] = NLA_POLICY_MIN(NLA_U16, 1), [NL80211_MESHCONF_POWER_MODE] = NLA_POLICY_RANGE(NLA_U32, NL80211_MESH_POWER_ACTIVE, NL80211_MESH_POWER_MAX), [NL80211_MESHCONF_AWAKE_WINDOW] = { .type = NLA_U16 }, [NL80211_MESHCONF_PLINK_TIMEOUT] = { .type = NLA_U32 }, [NL80211_MESHCONF_CONNECTED_TO_GATE] = NLA_POLICY_RANGE(NLA_U8, 0, 1), [NL80211_MESHCONF_NOLEARN] = NLA_POLICY_RANGE(NLA_U8, 0, 1), [NL80211_MESHCONF_CONNECTED_TO_AS] = NLA_POLICY_RANGE(NLA_U8, 0, 1), }; static const struct nla_policy nl80211_mesh_setup_params_policy[NL80211_MESH_SETUP_ATTR_MAX+1] = { [NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_AUTH] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_AUTH_PROTOCOL] = { .type = NLA_U8 }, [NL80211_MESH_SETUP_USERSPACE_MPM] = { .type = NLA_FLAG }, [NL80211_MESH_SETUP_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_ie_attr, IEEE80211_MAX_DATA_LEN), [NL80211_MESH_SETUP_USERSPACE_AMPE] = { .type = NLA_FLAG }, }; static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) { struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; u16 ht_opmode; #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, mask, attr, fn) \ do { \ if (tb[attr]) { \ cfg->param = fn(tb[attr]); \ mask |= BIT((attr) - 1); \ } \ } while (0) if (!info->attrs[NL80211_ATTR_MESH_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb, NL80211_MESHCONF_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_CONFIG], nl80211_meshconf_params_policy, info->extack)) return -EINVAL; /* This makes sure that there aren't more than 32 mesh config * parameters (otherwise our bitfield scheme would not work.) */ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); /* Fill in the params struct */ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshRetryTimeout, mask, NL80211_MESHCONF_RETRY_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConfirmTimeout, mask, NL80211_MESHCONF_CONFIRM_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHoldingTimeout, mask, NL80211_MESHCONF_HOLDING_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxPeerLinks, mask, NL80211_MESHCONF_MAX_PEER_LINKS, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshMaxRetries, mask, NL80211_MESHCONF_MAX_RETRIES, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshTTL, mask, NL80211_MESHCONF_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, element_ttl, mask, NL80211_MESHCONF_ELEMENT_TTL, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, auto_open_plinks, mask, NL80211_MESHCONF_AUTO_OPEN_PLINKS, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNbrOffsetMaxNeighbor, mask, NL80211_MESHCONF_SYNC_OFFSET_MAX_NEIGHBOR, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPmaxPREQretries, mask, NL80211_MESHCONF_HWMP_MAX_PREQ_RETRIES, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, path_refresh_time, mask, NL80211_MESHCONF_PATH_REFRESH_TIME, nla_get_u32); if (mask & BIT(NL80211_MESHCONF_PATH_REFRESH_TIME) && (cfg->path_refresh_time < 1 || cfg->path_refresh_time > 65535)) return -EINVAL; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, min_discovery_timeout, mask, NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathTimeout, mask, NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT, nla_get_u32); if (mask & BIT(NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT) && (cfg->dot11MeshHWMPactivePathTimeout < 1 || cfg->dot11MeshHWMPactivePathTimeout > 65535)) return -EINVAL; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval, mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval, mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPnetDiameterTraversalTime, mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRootMode, mask, NL80211_MESHCONF_HWMP_ROOTMODE, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPRannInterval, mask, NL80211_MESHCONF_HWMP_RANN_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshGateAnnouncementProtocol, mask, NL80211_MESHCONF_GATE_ANNOUNCEMENTS, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshForwarding, mask, NL80211_MESHCONF_FORWARDING, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nla_get_s32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToMeshGate, mask, NL80211_MESHCONF_CONNECTED_TO_GATE, nla_get_u8); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshConnectedToAuthServer, mask, NL80211_MESHCONF_CONNECTED_TO_AS, nla_get_u8); /* * Check HT operation mode based on * IEEE 802.11-2016 9.4.2.57 HT Operation element. */ if (tb[NL80211_MESHCONF_HT_OPMODE]) { ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION | IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) return -EINVAL; /* NON_HT_STA bit is reserved, but some programs set it */ ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT; cfg->ht_opmode = ht_opmode; mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1)); } FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, nla_get_u32); if (mask & BIT(NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT) && (cfg->dot11MeshHWMPactivePathToRootTimeout < 1 || cfg->dot11MeshHWMPactivePathToRootTimeout > 65535)) return -EINVAL; FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMProotInterval, mask, NL80211_MESHCONF_HWMP_ROOT_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPconfirmationInterval, mask, NL80211_MESHCONF_HWMP_CONFIRMATION_INTERVAL, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, power_mode, mask, NL80211_MESHCONF_POWER_MODE, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, mask, NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, mask, NL80211_MESHCONF_PLINK_TIMEOUT, nla_get_u32); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshNolearn, mask, NL80211_MESHCONF_NOLEARN, nla_get_u8); if (mask_out) *mask_out = mask; return 0; #undef FILL_IN_MESH_PARAM_IF_SET } static int nl80211_parse_mesh_setup(struct genl_info *info, struct mesh_setup *setup) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NL80211_MESH_SETUP_ATTR_MAX + 1]; if (!info->attrs[NL80211_ATTR_MESH_SETUP]) return -EINVAL; if (nla_parse_nested_deprecated(tb, NL80211_MESH_SETUP_ATTR_MAX, info->attrs[NL80211_ATTR_MESH_SETUP], nl80211_mesh_setup_params_policy, info->extack)) return -EINVAL; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC]) setup->sync_method = (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_SYNC])) ? IEEE80211_SYNC_METHOD_VENDOR : IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL]) setup->path_sel_proto = (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_PATH_SEL])) ? IEEE80211_PATH_PROTOCOL_VENDOR : IEEE80211_PATH_PROTOCOL_HWMP; if (tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC]) setup->path_metric = (nla_get_u8(tb[NL80211_MESH_SETUP_ENABLE_VENDOR_METRIC])) ? IEEE80211_PATH_METRIC_VENDOR : IEEE80211_PATH_METRIC_AIRTIME; if (tb[NL80211_MESH_SETUP_IE]) { struct nlattr *ieattr = tb[NL80211_MESH_SETUP_IE]; setup->ie = nla_data(ieattr); setup->ie_len = nla_len(ieattr); } if (tb[NL80211_MESH_SETUP_USERSPACE_MPM] && !(rdev->wiphy.features & NL80211_FEATURE_USERSPACE_MPM)) return -EINVAL; setup->user_mpm = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_MPM]); setup->is_authenticated = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AUTH]); setup->is_secure = nla_get_flag(tb[NL80211_MESH_SETUP_USERSPACE_AMPE]); if (setup->is_secure) setup->user_mpm = true; if (tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]) { if (!setup->user_mpm) return -EINVAL; setup->auth_id = nla_get_u8(tb[NL80211_MESH_SETUP_AUTH_PROTOCOL]); } return 0; } static int nl80211_update_mesh_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct mesh_config cfg = {}; u32 mask; int err; if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; if (!rdev->ops->update_mesh_config) return -EOPNOTSUPP; err = nl80211_parse_mesh_config(info, &cfg, &mask); if (err) return err; if (!wdev->u.mesh.id_len) err = -ENOLINK; if (!err) err = rdev_update_mesh_config(rdev, dev, mask, &cfg); return err; } static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom, struct sk_buff *msg) { struct nlattr *nl_reg_rules; unsigned int i; if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) || (regdom->dfs_region && nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region))) goto nla_put_failure; nl_reg_rules = nla_nest_start_noflag(msg, NL80211_ATTR_REG_RULES); if (!nl_reg_rules) goto nla_put_failure; for (i = 0; i < regdom->n_reg_rules; i++) { struct nlattr *nl_reg_rule; const struct ieee80211_reg_rule *reg_rule; const struct ieee80211_freq_range *freq_range; const struct ieee80211_power_rule *power_rule; unsigned int max_bandwidth_khz; reg_rule = &regdom->reg_rules[i]; freq_range = &reg_rule->freq_range; power_rule = &reg_rule->power_rule; nl_reg_rule = nla_nest_start_noflag(msg, i); if (!nl_reg_rule) goto nla_put_failure; max_bandwidth_khz = freq_range->max_bandwidth_khz; if (!max_bandwidth_khz) max_bandwidth_khz = reg_get_max_bandwidth(regdom, reg_rule); if (nla_put_u32(msg, NL80211_ATTR_REG_RULE_FLAGS, reg_rule->flags) || nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_START, freq_range->start_freq_khz) || nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_END, freq_range->end_freq_khz) || nla_put_u32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW, max_bandwidth_khz) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, power_rule->max_antenna_gain) || nla_put_u32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, power_rule->max_eirp) || nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME, reg_rule->dfs_cac_ms)) goto nla_put_failure; if ((reg_rule->flags & NL80211_RRF_PSD) && nla_put_s8(msg, NL80211_ATTR_POWER_RULE_PSD, reg_rule->psd)) goto nla_put_failure; nla_nest_end(msg, nl_reg_rule); } nla_nest_end(msg, nl_reg_rules); return 0; nla_put_failure: return -EMSGSIZE; } static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info) { const struct ieee80211_regdomain *regdom = NULL; struct cfg80211_registered_device *rdev; struct wiphy *wiphy = NULL; struct sk_buff *msg; int err = -EMSGSIZE; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOBUFS; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; rtnl_lock(); if (info->attrs[NL80211_ATTR_WIPHY]) { bool self_managed; rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto nla_put_failure; } wiphy = &rdev->wiphy; self_managed = wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED; rcu_read_lock(); regdom = get_wiphy_regdom(wiphy); /* a self-managed-reg device must have a private regdom */ if (WARN_ON(!regdom && self_managed)) { err = -EINVAL; goto nla_put_failure_rcu; } if (regdom && nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) goto nla_put_failure_rcu; } else { rcu_read_lock(); } if (!wiphy && reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure_rcu; if (!regdom) regdom = rcu_dereference(cfg80211_regdomain); if (nl80211_put_regdom(regdom, msg)) goto nla_put_failure_rcu; rcu_read_unlock(); genlmsg_end(msg, hdr); rtnl_unlock(); return genlmsg_reply(msg, info); nla_put_failure_rcu: rcu_read_unlock(); nla_put_failure: rtnl_unlock(); put_failure: nlmsg_free(msg); return err; } static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct wiphy *wiphy, const struct ieee80211_regdomain *regdom) { void *hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_GET_REG); if (!hdr) return -1; genl_dump_check_consistent(cb, hdr); if (nl80211_put_regdom(regdom, msg)) goto nla_put_failure; if (!wiphy && reg_last_request_cell_base() && nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE, NL80211_USER_REG_HINT_CELL_BASE)) goto nla_put_failure; if (wiphy && nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) goto nla_put_failure; if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_get_reg_dump(struct sk_buff *skb, struct netlink_callback *cb) { const struct ieee80211_regdomain *regdom = NULL; struct cfg80211_registered_device *rdev; int err, reg_idx, start = cb->args[2]; rcu_read_lock(); if (cfg80211_regdomain && start == 0) { err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, NULL, rcu_dereference(cfg80211_regdomain)); if (err < 0) goto out_err; } /* the global regdom is idx 0 */ reg_idx = 1; list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { regdom = get_wiphy_regdom(&rdev->wiphy); if (!regdom) continue; if (++reg_idx <= start) continue; err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, &rdev->wiphy, regdom); if (err < 0) { reg_idx--; break; } } cb->args[2] = reg_idx; err = skb->len; out_err: rcu_read_unlock(); return err; } #ifdef CONFIG_CFG80211_CRDA_SUPPORT static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, [NL80211_ATTR_DFS_CAC_TIME] = { .type = NLA_U32 }, }; static int parse_reg_rule(struct nlattr *tb[], struct ieee80211_reg_rule *reg_rule) { struct ieee80211_freq_range *freq_range = &reg_rule->freq_range; struct ieee80211_power_rule *power_rule = &reg_rule->power_rule; if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_START]) return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_END]) return -EINVAL; if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) return -EINVAL; if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) return -EINVAL; reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); freq_range->start_freq_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); freq_range->end_freq_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); freq_range->max_bandwidth_khz = nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); power_rule->max_eirp = nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) power_rule->max_antenna_gain = nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); if (tb[NL80211_ATTR_DFS_CAC_TIME]) reg_rule->dfs_cac_ms = nla_get_u32(tb[NL80211_ATTR_DFS_CAC_TIME]); return 0; } static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; struct nlattr *nl_reg_rule; char *alpha2; int rem_reg_rules, r; u32 num_rules = 0, rule_idx = 0; enum nl80211_dfs_regions dfs_region = NL80211_DFS_UNSET; struct ieee80211_regdomain *rd; if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) return -EINVAL; if (!info->attrs[NL80211_ATTR_REG_RULES]) return -EINVAL; alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); if (info->attrs[NL80211_ATTR_DFS_REGION]) dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]); nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { num_rules++; if (num_rules > NL80211_MAX_SUPP_REG_RULES) return -EINVAL; } rtnl_lock(); if (!reg_is_valid_request(alpha2)) { r = -EINVAL; goto out; } rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL); if (!rd) { r = -ENOMEM; goto out; } rd->n_reg_rules = num_rules; rd->alpha2[0] = alpha2[0]; rd->alpha2[1] = alpha2[1]; /* * Disable DFS master mode if the DFS region was * not supported or known on this kernel. */ if (reg_supported_dfs_region(dfs_region)) rd->dfs_region = dfs_region; nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], rem_reg_rules) { r = nla_parse_nested_deprecated(tb, NL80211_REG_RULE_ATTR_MAX, nl_reg_rule, reg_rule_policy, info->extack); if (r) goto bad_reg; r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); if (r) goto bad_reg; rule_idx++; if (rule_idx > NL80211_MAX_SUPP_REG_RULES) { r = -EINVAL; goto bad_reg; } } r = set_regdom(rd, REGD_SOURCE_CRDA); /* set_regdom takes ownership of rd */ rd = NULL; bad_reg: kfree(rd); out: rtnl_unlock(); return r; } #endif /* CONFIG_CFG80211_CRDA_SUPPORT */ static int validate_scan_freqs(struct nlattr *freqs) { struct nlattr *attr1, *attr2; int n_channels = 0, tmp1, tmp2; nla_for_each_nested(attr1, freqs, tmp1) if (nla_len(attr1) != sizeof(u32)) return 0; nla_for_each_nested(attr1, freqs, tmp1) { n_channels++; /* * Some hardware has a limited channel list for * scanning, and it is pretty much nonsensical * to scan for a channel twice, so disallow that * and don't require drivers to check that the * channel list they get isn't longer than what * they can scan, as long as they can scan all * the channels they registered at once. */ nla_for_each_nested(attr2, freqs, tmp2) if (attr1 != attr2 && nla_get_u32(attr1) == nla_get_u32(attr2)) return 0; } return n_channels; } static bool is_band_valid(struct wiphy *wiphy, enum nl80211_band b) { return b < NUM_NL80211_BANDS && wiphy->bands[b]; } static int parse_bss_select(struct nlattr *nla, struct wiphy *wiphy, struct cfg80211_bss_selection *bss_select) { struct nlattr *attr[NL80211_BSS_SELECT_ATTR_MAX + 1]; struct nlattr *nest; int err; bool found = false; int i; /* only process one nested attribute */ nest = nla_data(nla); if (!nla_ok(nest, nla_len(nest))) return -EINVAL; err = nla_parse_nested_deprecated(attr, NL80211_BSS_SELECT_ATTR_MAX, nest, nl80211_bss_select_policy, NULL); if (err) return err; /* only one attribute may be given */ for (i = 0; i <= NL80211_BSS_SELECT_ATTR_MAX; i++) { if (attr[i]) { if (found) return -EINVAL; found = true; } } bss_select->behaviour = __NL80211_BSS_SELECT_ATTR_INVALID; if (attr[NL80211_BSS_SELECT_ATTR_RSSI]) bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI; if (attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]) { bss_select->behaviour = NL80211_BSS_SELECT_ATTR_BAND_PREF; bss_select->param.band_pref = nla_get_u32(attr[NL80211_BSS_SELECT_ATTR_BAND_PREF]); if (!is_band_valid(wiphy, bss_select->param.band_pref)) return -EINVAL; } if (attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]) { struct nl80211_bss_select_rssi_adjust *adj_param; adj_param = nla_data(attr[NL80211_BSS_SELECT_ATTR_RSSI_ADJUST]); bss_select->behaviour = NL80211_BSS_SELECT_ATTR_RSSI_ADJUST; bss_select->param.adjust.band = adj_param->band; bss_select->param.adjust.delta = adj_param->delta; if (!is_band_valid(wiphy, bss_select->param.adjust.band)) return -EINVAL; } /* user-space did not provide behaviour attribute */ if (bss_select->behaviour == __NL80211_BSS_SELECT_ATTR_INVALID) return -EINVAL; if (!(wiphy->bss_select_support & BIT(bss_select->behaviour))) return -EINVAL; return 0; } int nl80211_parse_random_mac(struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask) { int i; if (!attrs[NL80211_ATTR_MAC] && !attrs[NL80211_ATTR_MAC_MASK]) { eth_zero_addr(mac_addr); eth_zero_addr(mac_addr_mask); mac_addr[0] = 0x2; mac_addr_mask[0] = 0x3; return 0; } /* need both or none */ if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_MAC_MASK]) return -EINVAL; memcpy(mac_addr, nla_data(attrs[NL80211_ATTR_MAC]), ETH_ALEN); memcpy(mac_addr_mask, nla_data(attrs[NL80211_ATTR_MAC_MASK]), ETH_ALEN); /* don't allow or configure an mcast address */ if (!is_multicast_ether_addr(mac_addr_mask) || is_multicast_ether_addr(mac_addr)) return -EINVAL; /* * allow users to pass a MAC address that has bits set outside * of the mask, but don't bother drivers with having to deal * with such bits */ for (i = 0; i < ETH_ALEN; i++) mac_addr[i] &= mac_addr_mask[i]; return 0; } static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, struct ieee80211_channel *chan) { unsigned int link_id; bool all_ok = true; lockdep_assert_wiphy(wdev->wiphy); if (!cfg80211_wdev_channel_allowed(wdev, chan)) return false; if (!cfg80211_beaconing_iface_active(wdev)) return true; /* * FIXME: check if we have a free HW resource/link for chan * * This, as well as the FIXME below, requires knowing the link * capabilities of the hardware. */ /* we cannot leave radar channels */ for_each_valid_link(wdev, link_id) { struct cfg80211_chan_def *chandef; chandef = wdev_chandef(wdev, link_id); if (!chandef || !chandef->chan) continue; /* * FIXME: don't require all_ok, but rather check only the * correct HW resource/link onto which 'chan' falls, * as only that link leaves the channel for doing * the off-channel operation. */ if (chandef->chan->flags & IEEE80211_CHAN_RADAR) all_ok = false; } if (all_ok) return true; return regulatory_pre_cac_allowed(wdev->wiphy); } static bool nl80211_check_scan_feat(struct wiphy *wiphy, u32 flags, u32 flag, enum nl80211_ext_feature_index feat) { if (!(flags & flag)) return true; if (wiphy_ext_feature_isset(wiphy, feat)) return true; return false; } static int nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, void *request, struct nlattr **attrs, bool is_sched_scan) { u8 *mac_addr, *mac_addr_mask; u32 *flags; enum nl80211_feature_flags randomness_flag; if (!attrs[NL80211_ATTR_SCAN_FLAGS]) return 0; if (is_sched_scan) { struct cfg80211_sched_scan_request *req = request; randomness_flag = wdev ? NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : NL80211_FEATURE_ND_RANDOM_MAC_ADDR; flags = &req->flags; mac_addr = req->mac_addr; mac_addr_mask = req->mac_addr_mask; } else { struct cfg80211_scan_request *req = request; randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; flags = &req->flags; mac_addr = req->mac_addr; mac_addr_mask = req->mac_addr_mask; } *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); if (((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_LOW_SPAN, NL80211_EXT_FEATURE_LOW_SPAN_SCAN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_LOW_POWER, NL80211_EXT_FEATURE_LOW_POWER_SCAN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_HIGH_ACCURACY, NL80211_EXT_FEATURE_HIGH_ACCURACY_SCAN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME, NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP, NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION, NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE, NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_RANDOM_SN, NL80211_EXT_FEATURE_SCAN_RANDOM_SN) || !nl80211_check_scan_feat(wiphy, *flags, NL80211_SCAN_FLAG_MIN_PREQ_CONTENT, NL80211_EXT_FEATURE_SCAN_MIN_PREQ_CONTENT)) return -EOPNOTSUPP; if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { int err; if (!(wiphy->features & randomness_flag) || (wdev && wdev->connected)) return -EOPNOTSUPP; err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); if (err) return err; } return 0; } static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_scan_request *request; struct nlattr *scan_freqs = NULL; bool scan_freqs_khz = false; struct nlattr *attr; struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels, i; size_t ie_len, size; size_t ssids_offset, ie_offset; wiphy = &rdev->wiphy; if (wdev->iftype == NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!rdev->ops->scan) return -EOPNOTSUPP; if (rdev->scan_req || rdev->scan_msg) return -EBUSY; if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) { if (!wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_SCAN_FREQ_KHZ)) return -EOPNOTSUPP; scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]; scan_freqs_khz = true; } else if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]; if (scan_freqs) { n_channels = validate_scan_freqs(scan_freqs); if (!n_channels) return -EINVAL; } else { n_channels = ieee80211_get_num_supported_channels(wiphy); } if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; if (n_ssids > wiphy->max_scan_ssids) return -EINVAL; if (info->attrs[NL80211_ATTR_IE]) ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); else ie_len = 0; if (ie_len > wiphy->max_scan_ie_len) return -EINVAL; size = struct_size(request, channels, n_channels); ssids_offset = size; size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); ie_offset = size; size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return -ENOMEM; request->n_channels = n_channels; if (n_ssids) request->ssids = (void *)request + ssids_offset; request->n_ssids = n_ssids; if (ie_len) request->ie = (void *)request + ie_offset; i = 0; if (scan_freqs) { /* user specified, bail out if channel not found */ nla_for_each_nested(attr, scan_freqs, tmp) { struct ieee80211_channel *chan; int freq = nla_get_u32(attr); if (!scan_freqs_khz) freq = MHZ_TO_KHZ(freq); chan = ieee80211_get_channel_khz(wiphy, freq); if (!chan) { err = -EINVAL; goto out_free; } /* ignore disabled channels */ if (chan->flags & IEEE80211_CHAN_DISABLED || !cfg80211_wdev_channel_allowed(wdev, chan)) continue; request->channels[i] = chan; i++; } } else { enum nl80211_band band; /* all channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { struct ieee80211_channel *chan; chan = &wiphy->bands[band]->channels[j]; if (chan->flags & IEEE80211_CHAN_DISABLED || !cfg80211_wdev_channel_allowed(wdev, chan)) continue; request->channels[i] = chan; i++; } } } if (!i) { err = -EINVAL; goto out_free; } request->n_channels = i; for (i = 0; i < request->n_channels; i++) { struct ieee80211_channel *chan = request->channels[i]; /* if we can go off-channel to the target channel we're good */ if (cfg80211_off_channel_oper_allowed(wdev, chan)) continue; if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) { err = -EBUSY; goto out_free; } } i = 0; if (n_ssids) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; } } if (info->attrs[NL80211_ATTR_IE]) { request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); memcpy((void *)request->ie, nla_data(info->attrs[NL80211_ATTR_IE]), request->ie_len); } for (i = 0; i < NUM_NL80211_BANDS; i++) if (wiphy->bands[i]) request->rates[i] = (1 << wiphy->bands[i]->n_bitrates) - 1; if (info->attrs[NL80211_ATTR_SCAN_SUPP_RATES]) { nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SUPP_RATES], tmp) { enum nl80211_band band = nla_type(attr); if (band < 0 || band >= NUM_NL80211_BANDS) { err = -EINVAL; goto out_free; } if (!wiphy->bands[band]) continue; err = ieee80211_get_ratemask(wiphy->bands[band], nla_data(attr), nla_len(attr), &request->rates[band]); if (err) goto out_free; } } if (info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]) { request->duration = nla_get_u16(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION]); request->duration_mandatory = nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, false); if (err) goto out_free; request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* Initial implementation used NL80211_ATTR_MAC to set the specific * BSSID to scan for. This was problematic because that same attribute * was already used for another purpose (local random MAC address). The * NL80211_ATTR_BSSID attribute was added to fix this. For backwards * compatibility with older userspace components, also use the * NL80211_ATTR_MAC value here if it can be determined to be used for * the specific BSSID use case instead of the random MAC address * (NL80211_ATTR_SCAN_FLAGS is used to enable random MAC address use). */ if (info->attrs[NL80211_ATTR_BSSID]) memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); else if (!(request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) && info->attrs[NL80211_ATTR_MAC]) memcpy(request->bssid, nla_data(info->attrs[NL80211_ATTR_MAC]), ETH_ALEN); else eth_broadcast_addr(request->bssid); request->tsf_report_link_id = nl80211_link_id_or_invalid(info->attrs); request->wdev = wdev; request->wiphy = &rdev->wiphy; request->scan_start = jiffies; rdev->scan_req = request; err = cfg80211_scan(rdev); if (err) goto out_free; nl80211_send_scan_start(rdev, wdev); dev_hold(wdev->netdev); return 0; out_free: rdev->scan_req = NULL; kfree(request); return err; } static int nl80211_abort_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->abort_scan) return -EOPNOTSUPP; if (rdev->scan_msg) return 0; if (!rdev->scan_req) return -ENOENT; rdev_abort_scan(rdev, wdev); return 0; } static int nl80211_parse_sched_scan_plans(struct wiphy *wiphy, int n_plans, struct cfg80211_sched_scan_request *request, struct nlattr **attrs) { int tmp, err, i = 0; struct nlattr *attr; if (!attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) { u32 interval; /* * If scan plans are not specified, * %NL80211_ATTR_SCHED_SCAN_INTERVAL will be specified. In this * case one scan plan will be set with the specified scan * interval and infinite number of iterations. */ interval = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]); if (!interval) return -EINVAL; request->scan_plans[0].interval = DIV_ROUND_UP(interval, MSEC_PER_SEC); if (!request->scan_plans[0].interval) return -EINVAL; if (request->scan_plans[0].interval > wiphy->max_sched_scan_plan_interval) request->scan_plans[0].interval = wiphy->max_sched_scan_plan_interval; return 0; } nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) { struct nlattr *plan[NL80211_SCHED_SCAN_PLAN_MAX + 1]; if (WARN_ON(i >= n_plans)) return -EINVAL; err = nla_parse_nested_deprecated(plan, NL80211_SCHED_SCAN_PLAN_MAX, attr, nl80211_plan_policy, NULL); if (err) return err; if (!plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]) return -EINVAL; request->scan_plans[i].interval = nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_INTERVAL]); if (!request->scan_plans[i].interval || request->scan_plans[i].interval > wiphy->max_sched_scan_plan_interval) return -EINVAL; if (plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]) { request->scan_plans[i].iterations = nla_get_u32(plan[NL80211_SCHED_SCAN_PLAN_ITERATIONS]); if (!request->scan_plans[i].iterations || (request->scan_plans[i].iterations > wiphy->max_sched_scan_plan_iterations)) return -EINVAL; } else if (i < n_plans - 1) { /* * All scan plans but the last one must specify * a finite number of iterations */ return -EINVAL; } i++; } /* * The last scan plan must not specify the number of * iterations, it is supposed to run infinitely */ if (request->scan_plans[n_plans - 1].iterations) return -EINVAL; return 0; } static struct cfg80211_sched_scan_request * nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, struct nlattr **attrs, int max_match_sets) { struct cfg80211_sched_scan_request *request; struct nlattr *attr; int err, tmp, n_ssids = 0, n_match_sets = 0, n_channels, i, n_plans = 0; enum nl80211_band band; size_t ie_len, size; struct nlattr *tb[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1]; s32 default_match_rssi = NL80211_SCAN_RSSI_THOLD_OFF; if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { n_channels = validate_scan_freqs( attrs[NL80211_ATTR_SCAN_FREQUENCIES]); if (!n_channels) return ERR_PTR(-EINVAL); } else { n_channels = ieee80211_get_num_supported_channels(wiphy); } if (attrs[NL80211_ATTR_SCAN_SSIDS]) nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS], tmp) n_ssids++; if (n_ssids > wiphy->max_sched_scan_ssids) return ERR_PTR(-EINVAL); /* * First, count the number of 'real' matchsets. Due to an issue with * the old implementation, matchsets containing only the RSSI attribute * (NL80211_SCHED_SCAN_MATCH_ATTR_RSSI) are considered as the 'default' * RSSI for all matchsets, rather than their own matchset for reporting * all APs with a strong RSSI. This is needed to be compatible with * older userspace that treated a matchset with only the RSSI as the * global RSSI for all other matchsets - if there are other matchsets. */ if (attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { struct nlattr *rssi; err = nla_parse_nested_deprecated(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, attr, nl80211_match_policy, NULL); if (err) return ERR_PTR(err); /* SSID and BSSID are mutually exclusive */ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] && tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) return ERR_PTR(-EINVAL); /* add other standalone attributes here */ if (tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] || tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]) { n_match_sets++; continue; } rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; if (rssi) default_match_rssi = nla_get_s32(rssi); } } /* However, if there's no other matchset, add the RSSI one */ if (!n_match_sets && default_match_rssi != NL80211_SCAN_RSSI_THOLD_OFF) n_match_sets = 1; if (n_match_sets > max_match_sets) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_IE]) ie_len = nla_len(attrs[NL80211_ATTR_IE]); else ie_len = 0; if (ie_len > wiphy->max_sched_scan_ie_len) return ERR_PTR(-EINVAL); if (attrs[NL80211_ATTR_SCHED_SCAN_PLANS]) { /* * NL80211_ATTR_SCHED_SCAN_INTERVAL must not be specified since * each scan plan already specifies its own interval */ if (attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) return ERR_PTR(-EINVAL); nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_PLANS], tmp) n_plans++; } else { /* * The scan interval attribute is kept for backward * compatibility. If no scan plans are specified and sched scan * interval is specified, one scan plan will be set with this * scan interval and infinite number of iterations. */ if (!attrs[NL80211_ATTR_SCHED_SCAN_INTERVAL]) return ERR_PTR(-EINVAL); n_plans = 1; } if (!n_plans || n_plans > wiphy->max_sched_scan_plans) return ERR_PTR(-EINVAL); if (!wiphy_ext_feature_isset( wiphy, NL80211_EXT_FEATURE_SCHED_SCAN_RELATIVE_RSSI) && (attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] || attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST])) return ERR_PTR(-EINVAL); size = struct_size(request, channels, n_channels); size = size_add(size, array_size(sizeof(*request->ssids), n_ssids)); size = size_add(size, array_size(sizeof(*request->match_sets), n_match_sets)); size = size_add(size, array_size(sizeof(*request->scan_plans), n_plans)); size = size_add(size, ie_len); request = kzalloc(size, GFP_KERNEL); if (!request) return ERR_PTR(-ENOMEM); request->n_channels = n_channels; if (n_ssids) request->ssids = (void *)request + struct_size(request, channels, n_channels); request->n_ssids = n_ssids; if (ie_len) { if (n_ssids) request->ie = (void *)(request->ssids + n_ssids); else request->ie = (void *)(request->channels + n_channels); } if (n_match_sets) { if (request->ie) request->match_sets = (void *)(request->ie + ie_len); else if (n_ssids) request->match_sets = (void *)(request->ssids + n_ssids); else request->match_sets = (void *)(request->channels + n_channels); } request->n_match_sets = n_match_sets; if (n_match_sets) request->scan_plans = (void *)(request->match_sets + n_match_sets); else if (request->ie) request->scan_plans = (void *)(request->ie + ie_len); else if (n_ssids) request->scan_plans = (void *)(request->ssids + n_ssids); else request->scan_plans = (void *)(request->channels + n_channels); request->n_scan_plans = n_plans; i = 0; if (attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { /* user specified, bail out if channel not found */ nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) { struct ieee80211_channel *chan; chan = ieee80211_get_channel(wiphy, nla_get_u32(attr)); if (!chan) { err = -EINVAL; goto out_free; } /* ignore disabled channels */ if (chan->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i] = chan; i++; } } else { /* all channels */ for (band = 0; band < NUM_NL80211_BANDS; band++) { int j; if (!wiphy->bands[band]) continue; for (j = 0; j < wiphy->bands[band]->n_channels; j++) { struct ieee80211_channel *chan; chan = &wiphy->bands[band]->channels[j]; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; request->channels[i] = chan; i++; } } } if (!i) { err = -EINVAL; goto out_free; } request->n_channels = i; i = 0; if (n_ssids) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { if (nla_len(attr) > IEEE80211_MAX_SSID_LEN) { err = -EINVAL; goto out_free; } request->ssids[i].ssid_len = nla_len(attr); memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); i++; } } i = 0; if (attrs[NL80211_ATTR_SCHED_SCAN_MATCH]) { nla_for_each_nested(attr, attrs[NL80211_ATTR_SCHED_SCAN_MATCH], tmp) { struct nlattr *ssid, *bssid, *rssi; err = nla_parse_nested_deprecated(tb, NL80211_SCHED_SCAN_MATCH_ATTR_MAX, attr, nl80211_match_policy, NULL); if (err) goto out_free; ssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_SSID]; bssid = tb[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID]; if (!ssid && !bssid) { i++; continue; } if (WARN_ON(i >= n_match_sets)) { /* this indicates a programming error, * the loop above should have verified * things properly */ err = -EINVAL; goto out_free; } if (ssid) { memcpy(request->match_sets[i].ssid.ssid, nla_data(ssid), nla_len(ssid)); request->match_sets[i].ssid.ssid_len = nla_len(ssid); } if (bssid) memcpy(request->match_sets[i].bssid, nla_data(bssid), ETH_ALEN); /* special attribute - old implementation w/a */ request->match_sets[i].rssi_thold = default_match_rssi; rssi = tb[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI]; if (rssi) request->match_sets[i].rssi_thold = nla_get_s32(rssi); i++; } /* there was no other matchset, so the RSSI one is alone */ if (i == 0 && n_match_sets) request->match_sets[0].rssi_thold = default_match_rssi; request->min_rssi_thold = INT_MAX; for (i = 0; i < n_match_sets; i++) request->min_rssi_thold = min(request->match_sets[i].rssi_thold, request->min_rssi_thold); } else { request->min_rssi_thold = NL80211_SCAN_RSSI_THOLD_OFF; } if (ie_len) { request->ie_len = ie_len; memcpy((void *)request->ie, nla_data(attrs[NL80211_ATTR_IE]), request->ie_len); } err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); if (err) goto out_free; if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) request->delay = nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]); if (attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI]) { request->relative_rssi = nla_get_s8( attrs[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI]); request->relative_rssi_set = true; } if (request->relative_rssi_set && attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]) { struct nl80211_bss_select_rssi_adjust *rssi_adjust; rssi_adjust = nla_data( attrs[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST]); request->rssi_adjust.band = rssi_adjust->band; request->rssi_adjust.delta = rssi_adjust->delta; if (!is_band_valid(wiphy, request->rssi_adjust.band)) { err = -EINVAL; goto out_free; } } err = nl80211_parse_sched_scan_plans(wiphy, n_plans, request, attrs); if (err) goto out_free; request->scan_start = jiffies; return request; out_free: kfree(request); return ERR_PTR(err); } static int nl80211_start_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_sched_scan_request *sched_scan_req; bool want_multi; int err; if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_start) return -EOPNOTSUPP; want_multi = info->attrs[NL80211_ATTR_SCHED_SCAN_MULTI]; err = cfg80211_sched_scan_req_possible(rdev, want_multi); if (err) return err; sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev, info->attrs, rdev->wiphy.max_match_sets); err = PTR_ERR_OR_ZERO(sched_scan_req); if (err) goto out_err; /* leave request id zero for legacy request * or if driver does not support multi-scheduled scan */ if (want_multi && rdev->wiphy.max_sched_scan_reqs > 1) sched_scan_req->reqid = cfg80211_assign_cookie(rdev); err = rdev_sched_scan_start(rdev, dev, sched_scan_req); if (err) goto out_free; sched_scan_req->dev = dev; sched_scan_req->wiphy = &rdev->wiphy; if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) sched_scan_req->owner_nlportid = info->snd_portid; cfg80211_add_sched_scan_req(rdev, sched_scan_req); nl80211_send_sched_scan(sched_scan_req, NL80211_CMD_START_SCHED_SCAN); return 0; out_free: kfree(sched_scan_req); out_err: return err; } static int nl80211_stop_sched_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_sched_scan_request *req; struct cfg80211_registered_device *rdev = info->user_ptr[0]; u64 cookie; if (!rdev->wiphy.max_sched_scan_reqs || !rdev->ops->sched_scan_stop) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_COOKIE]) { cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); return __cfg80211_stop_sched_scan(rdev, cookie, false); } req = list_first_or_null_rcu(&rdev->sched_scan_req_list, struct cfg80211_sched_scan_request, list); if (!req || req->reqid || (req->owner_nlportid && req->owner_nlportid != info->snd_portid)) return -ENOENT; return cfg80211_stop_sched_scan_req(rdev, req, false); } static int nl80211_start_radar_detection(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; int link_id = nl80211_link_id(info->attrs); struct wiphy *wiphy = wdev->wiphy; struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; unsigned int cac_time_ms; int err; flush_delayed_work(&rdev->dfs_update_channels_wk); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_ADHOC: break; default: /* caution - see cfg80211_beaconing_iface_active() below */ return -EINVAL; } guard(wiphy)(wiphy); dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) return err; if (err == 0) return -EINVAL; if (!cfg80211_chandef_dfs_usable(wiphy, &chandef)) return -EINVAL; if (nla_get_flag(info->attrs[NL80211_ATTR_RADAR_BACKGROUND])) return cfg80211_start_background_radar_detection(rdev, wdev, &chandef); if (cfg80211_beaconing_iface_active(wdev)) { /* During MLO other link(s) can beacon, only the current link * can not already beacon */ if (wdev->valid_links && !wdev->links[link_id].ap.beacon_interval) { /* nothing */ } else { return -EBUSY; } } if (wdev->links[link_id].cac_started) return -EBUSY; /* CAC start is offloaded to HW and can't be started manually */ if (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD)) return -EOPNOTSUPP; if (!rdev->ops->start_radar_detection) return -EOPNOTSUPP; cac_time_ms = cfg80211_chandef_dfs_cac_time(&rdev->wiphy, &chandef); if (WARN_ON(!cac_time_ms)) cac_time_ms = IEEE80211_DFS_MIN_CAC_TIME_MS; err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms, link_id); if (err) return err; switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: wdev->links[0].ap.chandef = chandef; break; case NL80211_IFTYPE_ADHOC: wdev->u.ibss.chandef = chandef; break; case NL80211_IFTYPE_MESH_POINT: wdev->u.mesh.chandef = chandef; break; default: break; } wdev->links[link_id].cac_started = true; wdev->links[link_id].cac_start_time = jiffies; wdev->links[link_id].cac_time_ms = cac_time_ms; return 0; } static int nl80211_notify_radar_detection(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_chan_def chandef; enum nl80211_dfs_regions dfs_region; int err; dfs_region = reg_get_dfs_region(wiphy); if (dfs_region == NL80211_DFS_UNSET) { GENL_SET_ERR_MSG(info, "DFS Region is not set. Unexpected Radar indication"); return -EINVAL; } err = nl80211_parse_chandef(rdev, info, &chandef); if (err) { GENL_SET_ERR_MSG(info, "Unable to extract chandef info"); return err; } err = cfg80211_chandef_dfs_required(wiphy, &chandef, wdev->iftype); if (err < 0) { GENL_SET_ERR_MSG(info, "chandef is invalid"); return err; } if (err == 0) { GENL_SET_ERR_MSG(info, "Unexpected Radar indication for chandef/iftype"); return -EINVAL; } /* Do not process this notification if radar is already detected * by kernel on this channel, and return success. */ if (chandef.chan->dfs_state == NL80211_DFS_UNAVAILABLE) return 0; cfg80211_set_dfs_state(wiphy, &chandef, NL80211_DFS_UNAVAILABLE); cfg80211_sched_dfs_chan_update(rdev); rdev->radar_chandef = chandef; /* Propagate this notification to other radios as well */ queue_work(cfg80211_wq, &rdev->propagate_radar_detect_wk); return 0; } static int nl80211_parse_counter_offsets(struct cfg80211_registered_device *rdev, const u8 *data, size_t datalen, int first_count, struct nlattr *attr, const u16 **offsets, unsigned int *n_offsets) { int i; *n_offsets = 0; if (!attr) return 0; if (!nla_len(attr) || (nla_len(attr) % sizeof(u16))) return -EINVAL; *n_offsets = nla_len(attr) / sizeof(u16); if (rdev->wiphy.max_num_csa_counters && (*n_offsets > rdev->wiphy.max_num_csa_counters)) return -EINVAL; *offsets = nla_data(attr); /* sanity checks - counters should fit and be the same */ for (i = 0; i < *n_offsets; i++) { u16 offset = (*offsets)[i]; if (offset >= datalen) return -EINVAL; if (first_count != -1 && data[offset] != first_count) return -EINVAL; } return 0; } static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_csa_settings params; struct nlattr **csa_attrs = NULL; int err; bool need_new_beacon = false; bool need_handle_dfs_flag = true; u32 cs_count; if (!rdev->ops->channel_switch || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)) return -EOPNOTSUPP; switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: need_new_beacon = true; /* For all modes except AP the handle_dfs flag needs to be * supplied to tell the kernel that userspace will handle radar * events when they happen. Otherwise a switch to a channel * requiring DFS will be rejected. */ need_handle_dfs_flag = false; /* useless if AP is not running */ if (!wdev->links[link_id].ap.beacon_interval) return -ENOTCONN; break; case NL80211_IFTYPE_ADHOC: if (!wdev->u.ibss.ssid_len) return -ENOTCONN; break; case NL80211_IFTYPE_MESH_POINT: if (!wdev->u.mesh.id_len) return -ENOTCONN; break; default: return -EOPNOTSUPP; } memset(&params, 0, sizeof(params)); params.beacon_csa.ftm_responder = -1; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]) return -EINVAL; /* only important for AP, IBSS and mesh create IEs internally */ if (need_new_beacon && !info->attrs[NL80211_ATTR_CSA_IES]) return -EINVAL; /* Even though the attribute is u32, the specification says * u8, so let's make sure we don't overflow. */ cs_count = nla_get_u32(info->attrs[NL80211_ATTR_CH_SWITCH_COUNT]); if (cs_count > 255) return -EINVAL; params.count = cs_count; if (!need_new_beacon) goto skip_beacons; err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_after, info->extack); if (err) goto free; csa_attrs = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*csa_attrs), GFP_KERNEL); if (!csa_attrs) { err = -ENOMEM; goto free; } err = nla_parse_nested_deprecated(csa_attrs, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_CSA_IES], nl80211_policy, info->extack); if (err) goto free; err = nl80211_parse_beacon(rdev, csa_attrs, &params.beacon_csa, info->extack); if (err) goto free; if (!csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { err = -EINVAL; goto free; } err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.tail, params.beacon_csa.tail_len, params.count, csa_attrs[NL80211_ATTR_CNTDWN_OFFS_BEACON], &params.counter_offsets_beacon, &params.n_counter_offsets_beacon); if (err) goto free; err = nl80211_parse_counter_offsets(rdev, params.beacon_csa.probe_resp, params.beacon_csa.probe_resp_len, params.count, csa_attrs[NL80211_ATTR_CNTDWN_OFFS_PRESP], &params.counter_offsets_presp, &params.n_counter_offsets_presp); if (err) goto free; skip_beacons: err = nl80211_parse_chandef(rdev, info, &params.chandef); if (err) goto free; if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params.chandef, wdev->iftype)) { err = -EINVAL; goto free; } err = cfg80211_chandef_dfs_required(wdev->wiphy, &params.chandef, wdev->iftype); if (err < 0) goto free; if (err > 0) { params.radar_required = true; if (need_handle_dfs_flag && !nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS])) { err = -EINVAL; goto free; } } if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX]) params.block_tx = true; params.link_id = link_id; err = rdev_channel_switch(rdev, dev, &params); free: kfree(params.beacon_after.mbssid_ies); kfree(params.beacon_csa.mbssid_ies); kfree(params.beacon_after.rnr_ies); kfree(params.beacon_csa.rnr_ies); kfree(csa_attrs); return err; } static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct cfg80211_internal_bss *intbss) { struct cfg80211_bss *res = &intbss->pub; const struct cfg80211_bss_ies *ies; unsigned int link_id; void *hdr; struct nlattr *bss; lockdep_assert_wiphy(wdev->wiphy); hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation)) goto nla_put_failure; if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; bss = nla_nest_start_noflag(msg, NL80211_ATTR_BSS); if (!bss) goto nla_put_failure; if ((!is_zero_ether_addr(res->bssid) && nla_put(msg, NL80211_BSS_BSSID, ETH_ALEN, res->bssid))) goto nla_put_failure; rcu_read_lock(); /* indicate whether we have probe response data or not */ if (rcu_access_pointer(res->proberesp_ies) && nla_put_flag(msg, NL80211_BSS_PRESP_DATA)) goto fail_unlock_rcu; /* this pointer prefers to be pointed to probe response data * but is always valid */ ies = rcu_dereference(res->ies); if (ies) { if (nla_put_u64_64bit(msg, NL80211_BSS_TSF, ies->tsf, NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_INFORMATION_ELEMENTS, ies->len, ies->data)) goto fail_unlock_rcu; } /* and this pointer is always (unless driver didn't know) beacon data */ ies = rcu_dereference(res->beacon_ies); if (ies && ies->from_beacon) { if (nla_put_u64_64bit(msg, NL80211_BSS_BEACON_TSF, ies->tsf, NL80211_BSS_PAD)) goto fail_unlock_rcu; if (ies->len && nla_put(msg, NL80211_BSS_BEACON_IES, ies->len, ies->data)) goto fail_unlock_rcu; } rcu_read_unlock(); if (res->beacon_interval && nla_put_u16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval)) goto nla_put_failure; if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) || nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) || nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET, res->channel->freq_offset) || nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO, jiffies_to_msecs(jiffies - intbss->ts))) goto nla_put_failure; if (intbss->parent_tsf && (nla_put_u64_64bit(msg, NL80211_BSS_PARENT_TSF, intbss->parent_tsf, NL80211_BSS_PAD) || nla_put(msg, NL80211_BSS_PARENT_BSSID, ETH_ALEN, intbss->parent_bssid))) goto nla_put_failure; if (intbss->ts_boottime && nla_put_u64_64bit(msg, NL80211_BSS_LAST_SEEN_BOOTTIME, intbss->ts_boottime, NL80211_BSS_PAD)) goto nla_put_failure; if (!nl80211_put_signal(msg, intbss->pub.chains, intbss->pub.chain_signal, NL80211_BSS_CHAIN_SIGNAL)) goto nla_put_failure; if (intbss->bss_source != BSS_SOURCE_STA_PROFILE) { switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: if (nla_put_u32(msg, NL80211_BSS_SIGNAL_MBM, res->signal)) goto nla_put_failure; break; case CFG80211_SIGNAL_TYPE_UNSPEC: if (nla_put_u8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal)) goto nla_put_failure; break; default: break; } } switch (wdev->iftype) { case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: for_each_valid_link(wdev, link_id) { if (intbss == wdev->links[link_id].client.current_bss && (nla_put_u32(msg, NL80211_BSS_STATUS, NL80211_BSS_STATUS_ASSOCIATED) || (wdev->valid_links && (nla_put_u8(msg, NL80211_BSS_MLO_LINK_ID, link_id) || nla_put(msg, NL80211_BSS_MLD_ADDR, ETH_ALEN, wdev->u.client.connected_addr))))) goto nla_put_failure; } break; case NL80211_IFTYPE_ADHOC: if (intbss == wdev->u.ibss.current_bss && nla_put_u32(msg, NL80211_BSS_STATUS, NL80211_BSS_STATUS_IBSS_JOINED)) goto nla_put_failure; break; default: break; } if (nla_put_u32(msg, NL80211_BSS_USE_FOR, res->use_for)) goto nla_put_failure; if (res->cannot_use_reasons && nla_put_u64_64bit(msg, NL80211_BSS_CANNOT_USE_REASONS, res->cannot_use_reasons, NL80211_BSS_PAD)) goto nla_put_failure; nla_nest_end(msg, bss); genlmsg_end(msg, hdr); return 0; fail_unlock_rcu: rcu_read_unlock(); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; struct cfg80211_internal_bss *scan; struct wireless_dev *wdev; struct nlattr **attrbuf; int start = cb->args[2], idx = 0; bool dump_include_use_data; int err; attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; err = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf); if (err) { kfree(attrbuf); return err; } /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); dump_include_use_data = attrbuf[NL80211_ATTR_BSS_DUMP_INCLUDE_USE_DATA]; kfree(attrbuf); spin_lock_bh(&rdev->bss_lock); /* * dump_scan will be called multiple times to break up the scan results * into multiple messages. It is unlikely that any more bss-es will be * expired after the first call, so only call only call this on the * first dump_scan invocation. */ if (start == 0) cfg80211_bss_expire(rdev); cb->seq = rdev->bss_generation; list_for_each_entry(scan, &rdev->bss_list, list) { if (++idx <= start) continue; if (!dump_include_use_data && !(scan->pub.use_for & NL80211_BSS_USE_FOR_NORMAL)) continue; if (nl80211_send_bss(skb, cb, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev, scan) < 0) { idx--; break; } } spin_unlock_bh(&rdev->bss_lock); cb->args[2] = idx; wiphy_unlock(&rdev->wiphy); return skb->len; } static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, bool allow_radio_stats, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; /* skip radio stats if userspace didn't request them */ if (!survey->channel && !allow_radio_stats) return 0; hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; infoattr = nla_nest_start_noflag(msg, NL80211_ATTR_SURVEY_INFO); if (!infoattr) goto nla_put_failure; if (survey->channel && nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY, survey->channel->center_freq)) goto nla_put_failure; if (survey->channel && survey->channel->freq_offset && nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY_OFFSET, survey->channel->freq_offset)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_NOISE_DBM) && nla_put_u8(msg, NL80211_SURVEY_INFO_NOISE, survey->noise)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_IN_USE) && nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME, survey->time, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_BUSY) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BUSY, survey->time_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY, survey->time_ext_busy, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_RX) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_RX, survey->time_rx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_TX) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_TX, survey->time_tx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_SCAN) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN, survey->time_scan, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) && nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX, survey->time_bss_rx, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; nla_nest_end(msg, infoattr); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { struct nlattr **attrbuf; struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; int survey_idx = cb->args[2]; int res; bool radio_stats; attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev, attrbuf); if (res) { kfree(attrbuf); return res; } /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); /* prepare_wdev_dump parsed the attributes */ radio_stats = attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS]; if (!wdev->netdev) { res = -EINVAL; goto out_err; } if (!rdev->ops->dump_survey) { res = -EOPNOTSUPP; goto out_err; } while (1) { res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); if (res == -ENOENT) break; if (res) goto out_err; /* don't send disabled channels, but do send non-channel data */ if (survey.channel && survey.channel->flags & IEEE80211_CHAN_DISABLED) { survey_idx++; continue; } if (nl80211_send_survey(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, wdev->netdev, radio_stats, &survey) < 0) goto out; survey_idx++; } out: cb->args[2] = survey_idx; res = skb->len; out_err: kfree(attrbuf); wiphy_unlock(&rdev->wiphy); return res; } static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ieee80211_channel *chan; const u8 *bssid, *ssid; int err, ssid_len; enum nl80211_auth_type auth_type; struct key_parse key; bool local_state_change; struct cfg80211_auth_request req = {}; u32 freq; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_AUTH_TYPE]) return -EINVAL; if (!info->attrs[NL80211_ATTR_SSID]) return -EINVAL; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; err = nl80211_parse_key(info, &key); if (err) return err; if (key.idx >= 0) { if (key.type != -1 && key.type != NL80211_KEYTYPE_GROUP) return -EINVAL; if (!key.p.key || !key.p.key_len) return -EINVAL; if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 || key.p.key_len != WLAN_KEY_LEN_WEP40) && (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 || key.p.key_len != WLAN_KEY_LEN_WEP104)) return -EINVAL; if (key.idx > 3) return -EINVAL; } else { key.p.key_len = 0; key.p.key = NULL; } if (key.idx >= 0) { int i; bool ok = false; for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) { if (key.p.cipher == rdev->wiphy.cipher_suites[i]) { ok = true; break; } } if (!ok) return -EINVAL; } if (!rdev->ops->auth) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) freq += nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); chan = nl80211_get_valid_chan(&rdev->wiphy, freq); if (!chan) return -EINVAL; ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } if (info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]) { req.supported_selectors = nla_data(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); req.supported_selectors_len = nla_len(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); } auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_AUTHENTICATE)) return -EINVAL; if ((auth_type == NL80211_AUTHTYPE_SAE || auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK) && !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; if (info->attrs[NL80211_ATTR_AUTH_DATA]) { if (auth_type != NL80211_AUTHTYPE_SAE && auth_type != NL80211_AUTHTYPE_FILS_SK && auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && auth_type != NL80211_AUTHTYPE_FILS_PK) return -EINVAL; req.auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); req.auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); } local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; /* * Since we no longer track auth state, ignore * requests to only change local state. */ if (local_state_change) return 0; req.auth_type = auth_type; req.key = key.p.key; req.key_len = key.p.key_len; req.key_idx = key.idx; req.link_id = nl80211_link_id_or_invalid(info->attrs); if (req.link_id >= 0) { if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO)) return -EINVAL; if (!info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); if (!is_valid_ether_addr(req.ap_mld_addr)) return -EINVAL; } req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY); if (!req.bss) return -ENOENT; err = cfg80211_mlme_auth(rdev, dev, &req); cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } static int validate_pae_over_nl80211(struct cfg80211_registered_device *rdev, struct genl_info *info) { if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { GENL_SET_ERR_MSG(info, "SOCKET_OWNER not set"); return -EINVAL; } if (!rdev->ops->tx_control_port || !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211)) return -EOPNOTSUPP; return 0; } static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit) { memset(settings, 0, sizeof(*settings)); settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { u16 proto; proto = nla_get_u16( info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); settings->control_port_ethertype = cpu_to_be16(proto); if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && proto != ETH_P_PAE) return -EINVAL; if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]) settings->control_port_no_encrypt = true; } else settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE); if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); if (r < 0) return r; settings->control_port_over_nl80211 = true; if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_PREAUTH]) settings->control_port_no_preauth = true; } if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { void *data; int len, i; data = nla_data(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]); len = nla_len(info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]); settings->n_ciphers_pairwise = len / sizeof(u32); if (len % sizeof(u32)) return -EINVAL; if (settings->n_ciphers_pairwise > cipher_limit) return -EINVAL; memcpy(settings->ciphers_pairwise, data, len); for (i = 0; i < settings->n_ciphers_pairwise; i++) if (!cfg80211_supported_cipher_suite( &rdev->wiphy, settings->ciphers_pairwise[i])) return -EINVAL; } if (info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]) { settings->cipher_group = nla_get_u32(info->attrs[NL80211_ATTR_CIPHER_SUITE_GROUP]); if (!cfg80211_supported_cipher_suite(&rdev->wiphy, settings->cipher_group)) return -EINVAL; } if (info->attrs[NL80211_ATTR_WPA_VERSIONS]) settings->wpa_versions = nla_get_u32(info->attrs[NL80211_ATTR_WPA_VERSIONS]); if (info->attrs[NL80211_ATTR_AKM_SUITES]) { void *data; int len; data = nla_data(info->attrs[NL80211_ATTR_AKM_SUITES]); len = nla_len(info->attrs[NL80211_ATTR_AKM_SUITES]); settings->n_akm_suites = len / sizeof(u32); if (len % sizeof(u32)) return -EINVAL; if (settings->n_akm_suites > rdev->wiphy.max_num_akm_suites) return -EINVAL; memcpy(settings->akm_suites, data, len); } if (info->attrs[NL80211_ATTR_PMK]) { if (nla_len(info->attrs[NL80211_ATTR_PMK]) != WLAN_PMK_LEN) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_PSK) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_AP_PSK)) return -EINVAL; settings->psk = nla_data(info->attrs[NL80211_ATTR_PMK]); } if (info->attrs[NL80211_ATTR_SAE_PASSWORD]) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD_AP)) return -EINVAL; settings->sae_pwd = nla_data(info->attrs[NL80211_ATTR_SAE_PASSWORD]); settings->sae_pwd_len = nla_len(info->attrs[NL80211_ATTR_SAE_PASSWORD]); } settings->sae_pwe = nla_get_u8_default(info->attrs[NL80211_ATTR_SAE_PWE], NL80211_SAE_PWE_UNSPECIFIED); return 0; } static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev, const u8 *ssid, int ssid_len, struct nlattr **attrs, int assoc_link_id, int link_id) { struct ieee80211_channel *chan; struct cfg80211_bss *bss; const u8 *bssid; u32 freq, use_for = 0; if (!attrs[NL80211_ATTR_MAC] || !attrs[NL80211_ATTR_WIPHY_FREQ]) return ERR_PTR(-EINVAL); bssid = nla_data(attrs[NL80211_ATTR_MAC]); freq = MHZ_TO_KHZ(nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ])); if (attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) freq += nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); chan = nl80211_get_valid_chan(&rdev->wiphy, freq); if (!chan) return ERR_PTR(-EINVAL); if (assoc_link_id >= 0) use_for = NL80211_BSS_USE_FOR_MLD_LINK; if (assoc_link_id == link_id) use_for |= NL80211_BSS_USE_FOR_NORMAL; bss = __cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, IEEE80211_BSS_TYPE_ESS, IEEE80211_PRIVACY_ANY, use_for); if (!bss) return ERR_PTR(-ENOENT); return bss; } static int nl80211_process_links(struct cfg80211_registered_device *rdev, struct cfg80211_assoc_link *links, const u8 *ssid, int ssid_len, struct genl_info *info) { unsigned int attrsize = NUM_NL80211_ATTR * sizeof(struct nlattr *); struct nlattr **attrs __free(kfree) = kzalloc(attrsize, GFP_KERNEL); struct nlattr *link; unsigned int link_id; int rem, err; if (!attrs) return -ENOMEM; nla_for_each_nested(link, info->attrs[NL80211_ATTR_MLO_LINKS], rem) { memset(attrs, 0, attrsize); nla_parse_nested(attrs, NL80211_ATTR_MAX, link, NULL, NULL); if (!attrs[NL80211_ATTR_MLO_LINK_ID]) { NL_SET_BAD_ATTR(info->extack, link); return -EINVAL; } link_id = nla_get_u8(attrs[NL80211_ATTR_MLO_LINK_ID]); /* cannot use the same link ID again */ if (links[link_id].bss) { NL_SET_BAD_ATTR(info->extack, link); return -EINVAL; } links[link_id].bss = nl80211_assoc_bss(rdev, ssid, ssid_len, attrs, link_id, link_id); if (IS_ERR(links[link_id].bss)) { err = PTR_ERR(links[link_id].bss); links[link_id].bss = NULL; NL_SET_ERR_MSG_ATTR(info->extack, link, "Error fetching BSS for link"); return err; } if (attrs[NL80211_ATTR_IE]) { links[link_id].elems = nla_data(attrs[NL80211_ATTR_IE]); links[link_id].elems_len = nla_len(attrs[NL80211_ATTR_IE]); if (cfg80211_find_elem(WLAN_EID_FRAGMENT, links[link_id].elems, links[link_id].elems_len)) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[NL80211_ATTR_IE], "cannot deal with fragmentation"); return -EINVAL; } if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, links[link_id].elems, links[link_id].elems_len)) { NL_SET_ERR_MSG_ATTR(info->extack, attrs[NL80211_ATTR_IE], "cannot deal with non-inheritance"); return -EINVAL; } } links[link_id].disabled = nla_get_flag(attrs[NL80211_ATTR_MLO_LINK_DISABLED]); } return 0; } static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_assoc_request req = {}; const u8 *ap_addr, *ssid; unsigned int link_id; int err, ssid_len; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_SSID]) return -EINVAL; if (!rdev->ops->assoc) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); if (cfg80211_find_ext_elem(WLAN_EID_EXT_NON_INHERITANCE, req.ie, req.ie_len)) { NL_SET_ERR_MSG_ATTR(info->extack, info->attrs[NL80211_ATTR_IE], "non-inheritance makes no sense"); return -EINVAL; } } if (info->attrs[NL80211_ATTR_USE_MFP]) { enum nl80211_mfp mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); if (mfp == NL80211_MFP_REQUIRED) req.use_mfp = true; else if (mfp != NL80211_MFP_NO) return -EINVAL; } if (info->attrs[NL80211_ATTR_PREV_BSSID]) req.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); if (info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]) { req.supported_selectors = nla_data(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); req.supported_selectors_len = nla_len(info->attrs[NL80211_ATTR_SUPPORTED_SELECTORS]); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) req.flags |= ASSOC_REQ_DISABLE_HT; if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) memcpy(&req.ht_capa_mask, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), sizeof(req.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) return -EINVAL; memcpy(&req.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(req.ht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) req.flags |= ASSOC_REQ_DISABLE_VHT; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) req.flags |= ASSOC_REQ_DISABLE_HE; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) req.flags |= ASSOC_REQ_DISABLE_EHT; if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&req.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), sizeof(req.vht_capa_mask)); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) return -EINVAL; memcpy(&req.vht_capa, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), sizeof(req.vht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { if (!((rdev->wiphy.features & NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) && (rdev->wiphy.features & NL80211_FEATURE_QUIET)) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_RRM)) return -EINVAL; req.flags |= ASSOC_REQ_USE_RRM; } if (info->attrs[NL80211_ATTR_FILS_KEK]) { req.fils_kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); req.fils_kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); if (!info->attrs[NL80211_ATTR_FILS_NONCES]) return -EINVAL; req.fils_nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); } if (info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]) { if (!info->attrs[NL80211_ATTR_S1G_CAPABILITY]) return -EINVAL; memcpy(&req.s1g_capa_mask, nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]), sizeof(req.s1g_capa_mask)); } if (info->attrs[NL80211_ATTR_S1G_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_S1G_CAPABILITY_MASK]) return -EINVAL; memcpy(&req.s1g_capa, nla_data(info->attrs[NL80211_ATTR_S1G_CAPABILITY]), sizeof(req.s1g_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_ASSOC_SPP_AMSDU])) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SPP_AMSDU_SUPPORT)) { GENL_SET_ERR_MSG(info, "SPP A-MSDUs not supported"); return -EINVAL; } req.flags |= ASSOC_REQ_SPP_AMSDU; } req.link_id = nl80211_link_id_or_invalid(info->attrs); if (info->attrs[NL80211_ATTR_MLO_LINKS]) { if (req.link_id < 0) return -EINVAL; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_MLO)) return -EINVAL; if (info->attrs[NL80211_ATTR_MAC] || info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); ap_addr = req.ap_mld_addr; err = nl80211_process_links(rdev, req.links, ssid, ssid_len, info); if (err) goto free; if (!req.links[req.link_id].bss) { err = -EINVAL; goto free; } if (req.links[req.link_id].elems_len) { GENL_SET_ERR_MSG(info, "cannot have per-link elems on assoc link"); err = -EINVAL; goto free; } if (req.links[req.link_id].disabled) { GENL_SET_ERR_MSG(info, "cannot have assoc link disabled"); err = -EINVAL; goto free; } } else { if (req.link_id >= 0) return -EINVAL; req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs, -1, -1); if (IS_ERR(req.bss)) return PTR_ERR(req.bss); ap_addr = req.bss->bssid; } err = nl80211_crypto_settings(rdev, info, &req.crypto, 1); if (!err) { struct nlattr *link; int rem = 0; err = cfg80211_mlme_assoc(rdev, dev, &req, info->extack); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; memcpy(dev->ieee80211_ptr->disconnect_bssid, ap_addr, ETH_ALEN); } /* Report error from first problematic link */ if (info->attrs[NL80211_ATTR_MLO_LINKS]) { nla_for_each_nested(link, info->attrs[NL80211_ATTR_MLO_LINKS], rem) { struct nlattr *link_id_attr = nla_find_nested(link, NL80211_ATTR_MLO_LINK_ID); if (!link_id_attr) continue; link_id = nla_get_u8(link_id_attr); if (link_id == req.link_id) continue; if (!req.links[link_id].error || WARN_ON(req.links[link_id].error > 0)) continue; WARN_ON(err >= 0); NL_SET_BAD_ATTR(info->extack, link); err = req.links[link_id].error; break; } } } free: for (link_id = 0; link_id < ARRAY_SIZE(req.links); link_id++) cfg80211_put_bss(&rdev->wiphy, req.links[link_id].bss); cfg80211_put_bss(&rdev->wiphy, req.bss); return err; } static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; int ie_len = 0; u16 reason_code; bool local_state_change; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_REASON_CODE]) return -EINVAL; if (!rdev->ops->deauth) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (reason_code == 0) { /* Reason Code 0 is reserved */ return -EINVAL; } if (info->attrs[NL80211_ATTR_IE]) { ie = nla_data(info->attrs[NL80211_ATTR_IE]); ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, local_state_change); } static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *ie = NULL, *bssid; int ie_len = 0; u16 reason_code; bool local_state_change; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_REASON_CODE]) return -EINVAL; if (!rdev->ops->disassoc) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); if (reason_code == 0) { /* Reason Code 0 is reserved */ return -EINVAL; } if (info->attrs[NL80211_ATTR_IE]) { ie = nla_data(info->attrs[NL80211_ATTR_IE]); ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, local_state_change); } static bool nl80211_parse_mcast_rate(struct cfg80211_registered_device *rdev, int mcast_rate[NUM_NL80211_BANDS], int rateval) { struct wiphy *wiphy = &rdev->wiphy; bool found = false; int band, i; for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; sband = wiphy->bands[band]; if (!sband) continue; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == rateval) { mcast_rate[band] = i + 1; found = true; break; } } } return found; } static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_ibss_params ibss; struct wiphy *wiphy; struct cfg80211_cached_keys *connkeys = NULL; int err; memset(&ibss, 0, sizeof(ibss)); if (!info->attrs[NL80211_ATTR_SSID] || !nla_len(info->attrs[NL80211_ATTR_SSID])) return -EINVAL; ibss.beacon_interval = 100; if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_ADHOC, ibss.beacon_interval); if (err) return err; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; wiphy = &rdev->wiphy; if (info->attrs[NL80211_ATTR_MAC]) { ibss.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(ibss.bssid)) return -EINVAL; } ibss.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); ibss.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { ibss.ie = nla_data(info->attrs[NL80211_ATTR_IE]); ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } err = nl80211_parse_chandef(rdev, info, &ibss.chandef); if (err) return err; if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef, NL80211_IFTYPE_ADHOC)) return -EINVAL; switch (ibss.chandef.width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20_NOHT: break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_40: if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) return -EINVAL; break; case NL80211_CHAN_WIDTH_80: case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_160: if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) return -EINVAL; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_VHT_IBSS)) return -EINVAL; break; case NL80211_CHAN_WIDTH_320: return -EINVAL; default: return -EINVAL; } ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); int n_rates = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); struct ieee80211_supported_band *sband = wiphy->bands[ibss.chandef.chan->band]; err = ieee80211_get_ratemask(sband, rates, n_rates, &ibss.basic_rates); if (err) return err; } if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) memcpy(&ibss.ht_capa_mask, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), sizeof(ibss.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) return -EINVAL; memcpy(&ibss.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(ibss.ht_capa)); } if (info->attrs[NL80211_ATTR_MCAST_RATE] && !nl80211_parse_mcast_rate(rdev, ibss.mcast_rate, nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) { bool no_ht = false; connkeys = nl80211_parse_connkeys(rdev, info, &no_ht); if (IS_ERR(connkeys)) return PTR_ERR(connkeys); if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) && no_ht) { kfree_sensitive(connkeys); return -EINVAL; } } ibss.control_port = nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT]); if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); if (r < 0) { kfree_sensitive(connkeys); return r; } ibss.control_port_over_nl80211 = true; } ibss.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); err = __cfg80211_join_ibss(rdev, dev, &ibss, connkeys); if (err) kfree_sensitive(connkeys); else if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; return err; } static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->leave_ibss) return -EOPNOTSUPP; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; return cfg80211_leave_ibss(rdev, dev, false); } static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; int mcast_rate[NUM_NL80211_BANDS]; u32 nla_rate; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB) return -EOPNOTSUPP; if (!rdev->ops->set_mcast_rate) return -EOPNOTSUPP; memset(mcast_rate, 0, sizeof(mcast_rate)); if (!info->attrs[NL80211_ATTR_MCAST_RATE]) return -EINVAL; nla_rate = nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate)) return -EINVAL; return rdev_set_mcast_rate(rdev, dev, mcast_rate); } static struct sk_buff * __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int approxlen, u32 portid, u32 seq, enum nl80211_commands cmd, enum nl80211_attrs attr, const struct nl80211_vendor_cmd_info *info, gfp_t gfp) { struct sk_buff *skb; void *hdr; struct nlattr *data; skb = nlmsg_new(approxlen + 100, gfp); if (!skb) return NULL; hdr = nl80211hdr_put(skb, portid, seq, 0, cmd); if (!hdr) { kfree_skb(skb); return NULL; } if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) goto nla_put_failure; if (info) { if (nla_put_u32(skb, NL80211_ATTR_VENDOR_ID, info->vendor_id)) goto nla_put_failure; if (nla_put_u32(skb, NL80211_ATTR_VENDOR_SUBCMD, info->subcmd)) goto nla_put_failure; } if (wdev) { if (nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (wdev->netdev && nla_put_u32(skb, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto nla_put_failure; } data = nla_nest_start_noflag(skb, attr); if (!data) goto nla_put_failure; ((void **)skb->cb)[0] = rdev; ((void **)skb->cb)[1] = hdr; ((void **)skb->cb)[2] = data; return skb; nla_put_failure: kfree_skb(skb); return NULL; } struct sk_buff *__cfg80211_alloc_event_skb(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_commands cmd, enum nl80211_attrs attr, unsigned int portid, int vendor_event_idx, int approxlen, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct nl80211_vendor_cmd_info *info; switch (cmd) { case NL80211_CMD_TESTMODE: if (WARN_ON(vendor_event_idx != -1)) return NULL; info = NULL; break; case NL80211_CMD_VENDOR: if (WARN_ON(vendor_event_idx < 0 || vendor_event_idx >= wiphy->n_vendor_events)) return NULL; info = &wiphy->vendor_events[vendor_event_idx]; break; default: WARN_ON(1); return NULL; } return __cfg80211_alloc_vendor_skb(rdev, wdev, approxlen, portid, 0, cmd, attr, info, gfp); } EXPORT_SYMBOL(__cfg80211_alloc_event_skb); void __cfg80211_send_event_skb(struct sk_buff *skb, gfp_t gfp) { struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlmsghdr *nlhdr = nlmsg_hdr(skb); struct nlattr *data = ((void **)skb->cb)[2]; enum nl80211_multicast_groups mcgrp = NL80211_MCGRP_TESTMODE; /* clear CB data for netlink core to own from now on */ memset(skb->cb, 0, sizeof(skb->cb)); nla_nest_end(skb, data); genlmsg_end(skb, hdr); if (nlhdr->nlmsg_pid) { genlmsg_unicast(wiphy_net(&rdev->wiphy), skb, nlhdr->nlmsg_pid); } else { if (data->nla_type == NL80211_ATTR_VENDOR_DATA) mcgrp = NL80211_MCGRP_VENDOR; genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), skb, 0, mcgrp, gfp); } } EXPORT_SYMBOL(__cfg80211_send_event_skb); #ifdef CONFIG_NL80211_TESTMODE static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev; int err; lockdep_assert_held(&rdev->wiphy.mtx); wdev = __cfg80211_wdev_from_attrs(rdev, genl_info_net(info), info->attrs); if (!rdev->ops->testmode_cmd) return -EOPNOTSUPP; if (IS_ERR(wdev)) { err = PTR_ERR(wdev); if (err != -EINVAL) return err; wdev = NULL; } else if (wdev->wiphy != &rdev->wiphy) { return -EINVAL; } if (!info->attrs[NL80211_ATTR_TESTDATA]) return -EINVAL; rdev->cur_cmd_info = info; err = rdev_testmode_cmd(rdev, wdev, nla_data(info->attrs[NL80211_ATTR_TESTDATA]), nla_len(info->attrs[NL80211_ATTR_TESTDATA])); rdev->cur_cmd_info = NULL; return err; } static int nl80211_testmode_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; struct nlattr **attrbuf = NULL; int err; long phy_idx; void *data = NULL; int data_len = 0; rtnl_lock(); if (cb->args[0]) { /* * 0 is a valid index, but not valid for args[0], * so we need to offset by 1. */ phy_idx = cb->args[0] - 1; rdev = cfg80211_rdev_by_wiphy_idx(phy_idx); if (!rdev) { err = -ENOENT; goto out_err; } } else { attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) { err = -ENOMEM; goto out_err; } err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) goto out_err; rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_err; } phy_idx = rdev->wiphy_idx; if (attrbuf[NL80211_ATTR_TESTDATA]) cb->args[1] = (long)attrbuf[NL80211_ATTR_TESTDATA]; } if (cb->args[1]) { data = nla_data((void *)cb->args[1]); data_len = nla_len((void *)cb->args[1]); } if (!rdev->ops->testmode_dump) { err = -EOPNOTSUPP; goto out_err; } while (1) { void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; if (!hdr) break; if (nla_put_u32(skb, NL80211_ATTR_WIPHY, phy_idx)) { genlmsg_cancel(skb, hdr); break; } tmdata = nla_nest_start_noflag(skb, NL80211_ATTR_TESTDATA); if (!tmdata) { genlmsg_cancel(skb, hdr); break; } err = rdev_testmode_dump(rdev, skb, cb, data, data_len); nla_nest_end(skb, tmdata); if (err == -ENOBUFS || err == -ENOENT) { genlmsg_cancel(skb, hdr); break; } else if (err) { genlmsg_cancel(skb, hdr); goto out_err; } genlmsg_end(skb, hdr); } err = skb->len; /* see above */ cb->args[0] = phy_idx + 1; out_err: kfree(attrbuf); rtnl_unlock(); return err; } #endif static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_connect_params connect; struct wiphy *wiphy; struct cfg80211_cached_keys *connkeys = NULL; u32 freq = 0; int err; memset(&connect, 0, sizeof(connect)); if (!info->attrs[NL80211_ATTR_SSID] || !nla_len(info->attrs[NL80211_ATTR_SSID])) return -EINVAL; if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { connect.auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, connect.auth_type, NL80211_CMD_CONNECT)) return -EINVAL; } else connect.auth_type = NL80211_AUTHTYPE_AUTOMATIC; connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; if (info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS] && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) return -EINVAL; connect.want_1x = info->attrs[NL80211_ATTR_WANT_1X_4WAY_HS]; err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) return err; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; wiphy = &rdev->wiphy; connect.bg_scan_period = -1; if (info->attrs[NL80211_ATTR_BG_SCAN_PERIOD] && (wiphy->flags & WIPHY_FLAG_SUPPORTS_FW_ROAM)) { connect.bg_scan_period = nla_get_u16(info->attrs[NL80211_ATTR_BG_SCAN_PERIOD]); } if (info->attrs[NL80211_ATTR_MAC]) connect.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); else if (info->attrs[NL80211_ATTR_MAC_HINT]) connect.bssid_hint = nla_data(info->attrs[NL80211_ATTR_MAC_HINT]); connect.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); connect.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (info->attrs[NL80211_ATTR_IE]) { connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } if (info->attrs[NL80211_ATTR_USE_MFP]) { connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); if (connect.mfp == NL80211_MFP_OPTIONAL && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_MFP_OPTIONAL)) return -EOPNOTSUPP; } else { connect.mfp = NL80211_MFP_NO; } if (info->attrs[NL80211_ATTR_PREV_BSSID]) connect.prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) freq = MHZ_TO_KHZ(nla_get_u32( info->attrs[NL80211_ATTR_WIPHY_FREQ])); if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) freq += nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); if (freq) { connect.channel = nl80211_get_valid_chan(wiphy, freq); if (!connect.channel) return -EINVAL; } else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) { freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]); freq = MHZ_TO_KHZ(freq); connect.channel_hint = nl80211_get_valid_chan(wiphy, freq); if (!connect.channel_hint) return -EINVAL; } if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { connect.edmg.channels = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) connect.edmg.bw_config = nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); } if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, info, NULL); if (IS_ERR(connkeys)) return PTR_ERR(connkeys); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT])) connect.flags |= ASSOC_REQ_DISABLE_HT; if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) memcpy(&connect.ht_capa_mask, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]), sizeof(connect.ht_capa_mask)); if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]) { kfree_sensitive(connkeys); return -EINVAL; } memcpy(&connect.ht_capa, nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]), sizeof(connect.ht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_VHT])) connect.flags |= ASSOC_REQ_DISABLE_VHT; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HE])) connect.flags |= ASSOC_REQ_DISABLE_HE; if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_EHT])) connect.flags |= ASSOC_REQ_DISABLE_EHT; if (info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) memcpy(&connect.vht_capa_mask, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]), sizeof(connect.vht_capa_mask)); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) { if (!info->attrs[NL80211_ATTR_VHT_CAPABILITY_MASK]) { kfree_sensitive(connkeys); return -EINVAL; } memcpy(&connect.vht_capa, nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]), sizeof(connect.vht_capa)); } if (nla_get_flag(info->attrs[NL80211_ATTR_USE_RRM])) { if (!((rdev->wiphy.features & NL80211_FEATURE_DS_PARAM_SET_IE_IN_PROBES) && (rdev->wiphy.features & NL80211_FEATURE_QUIET)) && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_RRM)) { kfree_sensitive(connkeys); return -EINVAL; } connect.flags |= ASSOC_REQ_USE_RRM; } connect.pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (connect.pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { kfree_sensitive(connkeys); return -EOPNOTSUPP; } if (info->attrs[NL80211_ATTR_BSS_SELECT]) { /* bss selection makes no sense if bssid is set */ if (connect.bssid) { kfree_sensitive(connkeys); return -EINVAL; } err = parse_bss_select(info->attrs[NL80211_ATTR_BSS_SELECT], wiphy, &connect.bss_select); if (err) { kfree_sensitive(connkeys); return err; } } if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD) && info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] && info->attrs[NL80211_ATTR_FILS_ERP_REALM] && info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] && info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { connect.fils_erp_username = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_username_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_realm = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_realm_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_next_seq_num = nla_get_u16( info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]); connect.fils_erp_rrk = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); connect.fils_erp_rrk_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] || info->attrs[NL80211_ATTR_FILS_ERP_REALM] || info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] || info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { kfree_sensitive(connkeys); return -EINVAL; } if (nla_get_flag(info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT])) { if (!info->attrs[NL80211_ATTR_SOCKET_OWNER]) { kfree_sensitive(connkeys); GENL_SET_ERR_MSG(info, "external auth requires connection ownership"); return -EINVAL; } connect.flags |= CONNECT_REQ_EXTERNAL_AUTH_SUPPORT; } if (nla_get_flag(info->attrs[NL80211_ATTR_MLO_SUPPORT])) connect.flags |= CONNECT_REQ_MLO_SUPPORT; err = cfg80211_connect(rdev, dev, &connect, connkeys, connect.prev_bssid); if (err) kfree_sensitive(connkeys); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) { dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; if (connect.bssid) memcpy(dev->ieee80211_ptr->disconnect_bssid, connect.bssid, ETH_ALEN); else eth_zero_addr(dev->ieee80211_ptr->disconnect_bssid); } return err; } static int nl80211_update_connect_params(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_connect_params connect = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; bool fils_sk_offload; u32 auth_type; u32 changed = 0; if (!rdev->ops->update_connect_params) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_IE]) { connect.ie = nla_data(info->attrs[NL80211_ATTR_IE]); connect.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); changed |= UPDATE_ASSOC_IES; } fils_sk_offload = wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_FILS_SK_OFFLOAD); /* * when driver supports fils-sk offload all attributes must be * provided. So the else covers "fils-sk-not-all" and * "no-fils-sk-any". */ if (fils_sk_offload && info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] && info->attrs[NL80211_ATTR_FILS_ERP_REALM] && info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] && info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { connect.fils_erp_username = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_username_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_USERNAME]); connect.fils_erp_realm = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_realm_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_REALM]); connect.fils_erp_next_seq_num = nla_get_u16( info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM]); connect.fils_erp_rrk = nla_data(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); connect.fils_erp_rrk_len = nla_len(info->attrs[NL80211_ATTR_FILS_ERP_RRK]); changed |= UPDATE_FILS_ERP_INFO; } else if (info->attrs[NL80211_ATTR_FILS_ERP_USERNAME] || info->attrs[NL80211_ATTR_FILS_ERP_REALM] || info->attrs[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] || info->attrs[NL80211_ATTR_FILS_ERP_RRK]) { return -EINVAL; } if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); if (!nl80211_valid_auth_type(rdev, auth_type, NL80211_CMD_CONNECT)) return -EINVAL; if (auth_type == NL80211_AUTHTYPE_FILS_SK && fils_sk_offload && !(changed & UPDATE_FILS_ERP_INFO)) return -EINVAL; connect.auth_type = auth_type; changed |= UPDATE_AUTH_TYPE; } if (!wdev->connected) return -ENOLINK; return rdev_update_connect_params(rdev, dev, &connect, changed); } static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u16 reason; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; reason = nla_get_u16_default(info->attrs[NL80211_ATTR_REASON_CODE], WLAN_REASON_DEAUTH_LEAVING); if (reason == 0) return -EINVAL; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; return cfg80211_disconnect(rdev, dev, reason, true); } static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net *net; int err; if (info->attrs[NL80211_ATTR_PID]) { u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); net = get_net_ns_by_pid(pid); } else if (info->attrs[NL80211_ATTR_NETNS_FD]) { u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]); net = get_net_ns_by_fd(fd); } else { return -EINVAL; } if (IS_ERR(net)) return PTR_ERR(net); err = 0; /* check if anything to do */ if (!net_eq(wiphy_net(&rdev->wiphy), net)) err = cfg80211_switch_netns(rdev, net); put_net(net); return err; } static int nl80211_set_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_pmksa pmksa; bool ap_pmksa_caching_support = false; memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AP_PMKSA_CACHING); if (!info->attrs[NL80211_ATTR_PMKID]) return -EINVAL; pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); if (info->attrs[NL80211_ATTR_MAC]) { pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); } else if (info->attrs[NL80211_ATTR_SSID] && info->attrs[NL80211_ATTR_FILS_CACHE_ID] && info->attrs[NL80211_ATTR_PMK]) { pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); } else { return -EINVAL; } if (info->attrs[NL80211_ATTR_PMK]) { pmksa.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmksa.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); } if (info->attrs[NL80211_ATTR_PMK_LIFETIME]) pmksa.pmk_lifetime = nla_get_u32(info->attrs[NL80211_ATTR_PMK_LIFETIME]); if (info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]) pmksa.pmk_reauth_threshold = nla_get_u8(info->attrs[NL80211_ATTR_PMK_REAUTH_THRESHOLD]); if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) && ap_pmksa_caching_support)) return -EOPNOTSUPP; if (!rdev->ops->set_pmksa) return -EOPNOTSUPP; return rdev_set_pmksa(rdev, dev, &pmksa); } static int nl80211_del_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_pmksa pmksa; bool sae_offload_support = false; bool owe_offload_support = false; bool ap_pmksa_caching_support = false; memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); sae_offload_support = wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_SAE_OFFLOAD); owe_offload_support = wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_OWE_OFFLOAD); ap_pmksa_caching_support = wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_AP_PMKSA_CACHING); if (info->attrs[NL80211_ATTR_PMKID]) pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); if (info->attrs[NL80211_ATTR_MAC]) { pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); } else if (info->attrs[NL80211_ATTR_SSID]) { /* SSID based pmksa flush supported only for FILS, * OWE/SAE OFFLOAD cases */ if (info->attrs[NL80211_ATTR_FILS_CACHE_ID] && info->attrs[NL80211_ATTR_PMK]) { pmksa.cache_id = nla_data(info->attrs[NL80211_ATTR_FILS_CACHE_ID]); } else if (!sae_offload_support && !owe_offload_support) { return -EINVAL; } pmksa.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); pmksa.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); } else { return -EINVAL; } if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT && !((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_AP || dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO) && ap_pmksa_caching_support)) return -EOPNOTSUPP; if (!rdev->ops->del_pmksa) return -EOPNOTSUPP; return rdev_del_pmksa(rdev, dev, &pmksa); } static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!rdev->ops->flush_pmksa) return -EOPNOTSUPP; return rdev_flush_pmksa(rdev, dev); } static int nl80211_tdls_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; u8 action_code, dialog_token; u32 peer_capability = 0; u16 status_code; u8 *peer; int link_id; bool initiator; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || !rdev->ops->tdls_mgmt) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TDLS_ACTION] || !info->attrs[NL80211_ATTR_STATUS_CODE] || !info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN] || !info->attrs[NL80211_ATTR_IE] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; peer = nla_data(info->attrs[NL80211_ATTR_MAC]); action_code = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_ACTION]); status_code = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); dialog_token = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_DIALOG_TOKEN]); initiator = nla_get_flag(info->attrs[NL80211_ATTR_TDLS_INITIATOR]); if (info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]) peer_capability = nla_get_u32(info->attrs[NL80211_ATTR_TDLS_PEER_CAPABILITY]); link_id = nl80211_link_id_or_invalid(info->attrs); return rdev_tdls_mgmt(rdev, dev, peer, link_id, action_code, dialog_token, status_code, peer_capability, initiator, nla_data(info->attrs[NL80211_ATTR_IE]), nla_len(info->attrs[NL80211_ATTR_IE])); } static int nl80211_tdls_oper(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; enum nl80211_tdls_operation operation; u8 *peer; if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) || !rdev->ops->tdls_oper) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TDLS_OPERATION] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; operation = nla_get_u8(info->attrs[NL80211_ATTR_TDLS_OPERATION]); peer = nla_data(info->attrs[NL80211_ATTR_MAC]); return rdev_tdls_oper(rdev, dev, peer, operation); } static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; struct sk_buff *msg; void *hdr; u64 cookie; u32 duration; int err; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ] || !info->attrs[NL80211_ATTR_DURATION]) return -EINVAL; duration = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); if (!rdev->ops->remain_on_channel || !(rdev->wiphy.flags & WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL)) return -EOPNOTSUPP; /* * We should be on that channel for at least a minimum amount of * time (10ms) but no longer than the driver supports. */ if (duration < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { const struct cfg80211_chan_def *oper_chandef, *compat_chandef; oper_chandef = wdev_chandef(wdev, link_id); if (WARN_ON(!oper_chandef)) { /* cannot happen since we must beacon to get here */ WARN_ON(1); return -EBUSY; } /* note: returns first one if identical chandefs */ compat_chandef = cfg80211_chandef_compatible(&chandef, oper_chandef); if (compat_chandef != &chandef) return -EBUSY; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (!hdr) { err = -ENOBUFS; goto free_msg; } err = rdev_remain_on_channel(rdev, wdev, chandef.chan, duration, &cookie); if (err) goto free_msg; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; if (!rdev->ops->cancel_remain_on_channel) return -EOPNOTSUPP; cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); return rdev_cancel_remain_on_channel(rdev, wdev, cookie); } static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_bitrate_mask mask; unsigned int link_id = nl80211_link_id(info->attrs); struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, dev, true, link_id); if (err) return err; return rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); } static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) return -EINVAL; if (info->attrs[NL80211_ATTR_FRAME_TYPE]) frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_NAN: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP; } /* not much point in registering if we can't reply */ if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_RECEIVE_MULTICAST] && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS)) { GENL_SET_ERR_MSG(info, "multicast RX registrations are not supported"); return -EOPNOTSUPP; } return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]), info->attrs[NL80211_ATTR_RECEIVE_MULTICAST], info->extack); } static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; int err; void *hdr = NULL; u64 cookie; struct sk_buff *msg = NULL; struct cfg80211_mgmt_tx_params params = { .dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK], }; if (!info->attrs[NL80211_ATTR_FRAME]) return -EINVAL; if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: break; case NL80211_IFTYPE_NAN: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP; } if (info->attrs[NL80211_ATTR_DURATION]) { if (!(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; params.wait = nla_get_u32(info->attrs[NL80211_ATTR_DURATION]); /* * We should wait on the channel for at least a minimum amount * of time (10ms) but no longer than the driver supports. */ if (params.wait < NL80211_MIN_REMAIN_ON_CHANNEL_TIME || params.wait > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; } params.offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; if (params.offchan && !(rdev->wiphy.flags & WIPHY_FLAG_OFFCHAN_TX)) return -EINVAL; params.no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); /* get the channel if any has been specified, otherwise pass NULL to * the driver. The latter will use the current one */ chandef.chan = NULL; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; } if (!chandef.chan && params.offchan) return -EINVAL; if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) return -EBUSY; params.link_id = nl80211_link_id_or_invalid(info->attrs); /* * This now races due to the unlock, but we cannot check * the valid links for the _station_ anyway, so that's up * to the driver. */ if (params.link_id >= 0 && !(wdev->valid_links & BIT(params.link_id))) return -EINVAL; params.buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); params.len = nla_len(info->attrs[NL80211_ATTR_FRAME]); err = nl80211_parse_counter_offsets(rdev, NULL, params.len, -1, info->attrs[NL80211_ATTR_CSA_C_OFFSETS_TX], &params.csa_offsets, &params.n_csa_offsets); if (err) return err; if (!params.dont_wait_for_ack) { msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (!hdr) { err = -ENOBUFS; goto free_msg; } } params.chan = chandef.chan; err = cfg80211_mlme_mgmt_tx(rdev, wdev, &params, &cookie); if (err) goto free_msg; if (msg) { if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); } return 0; nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; if (!rdev->ops->mgmt_tx_cancel_wait) return -EOPNOTSUPP; switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_NAN: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return -EOPNOTSUPP; break; default: return -EOPNOTSUPP; } cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); return rdev_mgmt_tx_cancel_wait(rdev, wdev, cookie); } static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev; struct net_device *dev = info->user_ptr[1]; u8 ps_state; bool state; int err; if (!info->attrs[NL80211_ATTR_PS_STATE]) return -EINVAL; ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]); wdev = dev->ieee80211_ptr; if (!rdev->ops->set_power_mgmt) return -EOPNOTSUPP; state = (ps_state == NL80211_PS_ENABLED) ? true : false; if (state == wdev->ps) return 0; err = rdev_set_power_mgmt(rdev, dev, state, wdev->ps_timeout); if (!err) wdev->ps = state; return err; } static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; enum nl80211_ps_state ps_state; struct wireless_dev *wdev; struct net_device *dev = info->user_ptr[1]; struct sk_buff *msg; void *hdr; int err; wdev = dev->ieee80211_ptr; if (!rdev->ops->set_power_mgmt) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; goto free_msg; } if (wdev->ps) ps_state = NL80211_PS_ENABLED; else ps_state = NL80211_PS_DISABLED; if (nla_put_u32(msg, NL80211_ATTR_PS_STATE, ps_state)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static const struct nla_policy nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] = { [NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_BINARY }, [NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_RATE] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_PKTS] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_TXE_INTVL] = { .type = NLA_U32 }, [NL80211_ATTR_CQM_RSSI_LEVEL] = { .type = NLA_S32 }, }; static int nl80211_set_cqm_txe(struct genl_info *info, u32 rate, u32 pkts, u32 intvl) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; if (rate > 100 || intvl > NL80211_CQM_TXE_MAX_INTVL) return -EINVAL; if (!rdev->ops->set_cqm_txe_config) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; return rdev_set_cqm_txe_config(rdev, dev, rate, pkts, intvl); } static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_cqm_config *cqm_config) { struct wireless_dev *wdev = dev->ieee80211_ptr; s32 last, low, high; u32 hyst; int i, n, low_index; int err; /* * Obtain current RSSI value if possible, if not and no RSSI threshold * event has been received yet, we should receive an event after a * connection is established and enough beacons received to calculate * the average. */ if (!cqm_config->last_rssi_event_value && wdev->links[0].client.current_bss && rdev->ops->get_station) { struct station_info sinfo = {}; u8 *mac_addr; mac_addr = wdev->links[0].client.current_bss->pub.bssid; err = rdev_get_station(rdev, dev, mac_addr, &sinfo); if (err) return err; cfg80211_sinfo_release_content(&sinfo); if (sinfo.filled & BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG)) cqm_config->last_rssi_event_value = (s8) sinfo.rx_beacon_signal_avg; } last = cqm_config->last_rssi_event_value; hyst = cqm_config->rssi_hyst; n = cqm_config->n_rssi_thresholds; for (i = 0; i < n; i++) { i = array_index_nospec(i, n); if (last < cqm_config->rssi_thresholds[i]) break; } low_index = i - 1; if (low_index >= 0) { low_index = array_index_nospec(low_index, n); low = cqm_config->rssi_thresholds[low_index] - hyst; } else { low = S32_MIN; } if (i < n) { i = array_index_nospec(i, n); high = cqm_config->rssi_thresholds[i] + hyst - 1; } else { high = S32_MAX; } return rdev_set_cqm_rssi_range_config(rdev, dev, low, high); } static int nl80211_set_cqm_rssi(struct genl_info *info, const s32 *thresholds, int n_thresholds, u32 hysteresis) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_cqm_config *cqm_config = NULL, *old; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; s32 prev = S32_MIN; int i, err; /* Check all values negative and sorted */ for (i = 0; i < n_thresholds; i++) { if (thresholds[i] > 0 || thresholds[i] <= prev) return -EINVAL; prev = thresholds[i]; } if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (n_thresholds == 1 && thresholds[0] == 0) /* Disabling */ n_thresholds = 0; old = wiphy_dereference(wdev->wiphy, wdev->cqm_config); /* if already disabled just succeed */ if (!n_thresholds && !old) return 0; if (n_thresholds > 1) { if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST) || !rdev->ops->set_cqm_rssi_range_config) return -EOPNOTSUPP; } else { if (!rdev->ops->set_cqm_rssi_config) return -EOPNOTSUPP; } if (n_thresholds) { cqm_config = kzalloc(struct_size(cqm_config, rssi_thresholds, n_thresholds), GFP_KERNEL); if (!cqm_config) return -ENOMEM; cqm_config->rssi_hyst = hysteresis; cqm_config->n_rssi_thresholds = n_thresholds; memcpy(cqm_config->rssi_thresholds, thresholds, flex_array_size(cqm_config, rssi_thresholds, n_thresholds)); cqm_config->use_range_api = n_thresholds > 1 || !rdev->ops->set_cqm_rssi_config; rcu_assign_pointer(wdev->cqm_config, cqm_config); if (cqm_config->use_range_api) err = cfg80211_cqm_rssi_update(rdev, dev, cqm_config); else err = rdev_set_cqm_rssi_config(rdev, dev, thresholds[0], hysteresis); } else { RCU_INIT_POINTER(wdev->cqm_config, NULL); /* if enabled as range also disable via range */ if (old->use_range_api) err = rdev_set_cqm_rssi_range_config(rdev, dev, 0, 0); else err = rdev_set_cqm_rssi_config(rdev, dev, 0, 0); } if (err) { rcu_assign_pointer(wdev->cqm_config, old); kfree_rcu(cqm_config, rcu_head); } else { kfree_rcu(old, rcu_head); } return err; } static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) { struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1]; struct nlattr *cqm; int err; cqm = info->attrs[NL80211_ATTR_CQM]; if (!cqm) return -EINVAL; err = nla_parse_nested_deprecated(attrs, NL80211_ATTR_CQM_MAX, cqm, nl80211_attr_cqm_policy, info->extack); if (err) return err; if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] && attrs[NL80211_ATTR_CQM_RSSI_HYST]) { const s32 *thresholds = nla_data(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); int len = nla_len(attrs[NL80211_ATTR_CQM_RSSI_THOLD]); u32 hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]); if (len % 4) return -EINVAL; return nl80211_set_cqm_rssi(info, thresholds, len / 4, hysteresis); } if (attrs[NL80211_ATTR_CQM_TXE_RATE] && attrs[NL80211_ATTR_CQM_TXE_PKTS] && attrs[NL80211_ATTR_CQM_TXE_INTVL]) { u32 rate = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_RATE]); u32 pkts = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_PKTS]); u32 intvl = nla_get_u32(attrs[NL80211_ATTR_CQM_TXE_INTVL]); return nl80211_set_cqm_txe(info, rate, pkts, intvl); } return -EINVAL; } static int nl80211_join_ocb(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct ocb_setup setup = {}; int err; err = nl80211_parse_chandef(rdev, info, &setup.chandef); if (err) return err; return cfg80211_join_ocb(rdev, dev, &setup); } static int nl80211_leave_ocb(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; return cfg80211_leave_ocb(rdev, dev); } static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct mesh_config cfg; struct mesh_setup setup; int err; /* start with default */ memcpy(&cfg, &default_mesh_config, sizeof(cfg)); memcpy(&setup, &default_mesh_setup, sizeof(setup)); if (info->attrs[NL80211_ATTR_MESH_CONFIG]) { /* and parse parameters if given */ err = nl80211_parse_mesh_config(info, &cfg, NULL); if (err) return err; } if (!info->attrs[NL80211_ATTR_MESH_ID] || !nla_len(info->attrs[NL80211_ATTR_MESH_ID])) return -EINVAL; setup.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); setup.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); if (info->attrs[NL80211_ATTR_MCAST_RATE] && !nl80211_parse_mcast_rate(rdev, setup.mcast_rate, nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]))) return -EINVAL; if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); err = cfg80211_validate_beacon_int(rdev, NL80211_IFTYPE_MESH_POINT, setup.beacon_interval); if (err) return err; } if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { setup.dtim_period = nla_get_u32(info->attrs[NL80211_ATTR_DTIM_PERIOD]); if (setup.dtim_period < 1 || setup.dtim_period > 100) return -EINVAL; } if (info->attrs[NL80211_ATTR_MESH_SETUP]) { /* parse additional setup parameters if given */ err = nl80211_parse_mesh_setup(info, &setup); if (err) return err; } if (setup.user_mpm) cfg.auto_open_plinks = false; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, &setup.chandef); if (err) return err; } else { /* __cfg80211_join_mesh() will sort it out */ setup.chandef.chan = NULL; } if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { u8 *rates = nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); int n_rates = nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); struct ieee80211_supported_band *sband; if (!setup.chandef.chan) return -EINVAL; sband = rdev->wiphy.bands[setup.chandef.chan->band]; err = ieee80211_get_ratemask(sband, rates, n_rates, &setup.basic_rates); if (err) return err; } if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &setup.beacon_rate, dev, false, 0); if (err) return err; if (!setup.chandef.chan) return -EINVAL; err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, &setup.beacon_rate); if (err) return err; } setup.userspace_handles_dfs = nla_get_flag(info->attrs[NL80211_ATTR_HANDLE_DFS]); if (info->attrs[NL80211_ATTR_CONTROL_PORT_OVER_NL80211]) { int r = validate_pae_over_nl80211(rdev, info); if (r < 0) return r; setup.control_port_over_nl80211 = true; } err = __cfg80211_join_mesh(rdev, dev, &setup, &cfg); if (!err && info->attrs[NL80211_ATTR_SOCKET_OWNER]) dev->ieee80211_ptr->conn_owner_nlportid = info->snd_portid; return err; } static int nl80211_leave_mesh(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; return cfg80211_leave_mesh(rdev, dev); } #ifdef CONFIG_PM static int nl80211_send_wowlan_patterns(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct cfg80211_wowlan *wowlan = rdev->wiphy.wowlan_config; struct nlattr *nl_pats, *nl_pat; int i, pat_len; if (!wowlan->n_patterns) return 0; nl_pats = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (i = 0; i < wowlan->n_patterns; i++) { nl_pat = nla_nest_start_noflag(msg, i + 1); if (!nl_pat) return -ENOBUFS; pat_len = wowlan->patterns[i].pattern_len; if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), wowlan->patterns[i].mask) || nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len, wowlan->patterns[i].pattern) || nla_put_u32(msg, NL80211_PKTPAT_OFFSET, wowlan->patterns[i].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } nla_nest_end(msg, nl_pats); return 0; } static int nl80211_send_wowlan_tcp(struct sk_buff *msg, struct cfg80211_wowlan_tcp *tcp) { struct nlattr *nl_tcp; if (!tcp) return 0; nl_tcp = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_TCP_CONNECTION); if (!nl_tcp) return -ENOBUFS; if (nla_put_in_addr(msg, NL80211_WOWLAN_TCP_SRC_IPV4, tcp->src) || nla_put_in_addr(msg, NL80211_WOWLAN_TCP_DST_IPV4, tcp->dst) || nla_put(msg, NL80211_WOWLAN_TCP_DST_MAC, ETH_ALEN, tcp->dst_mac) || nla_put_u16(msg, NL80211_WOWLAN_TCP_SRC_PORT, tcp->src_port) || nla_put_u16(msg, NL80211_WOWLAN_TCP_DST_PORT, tcp->dst_port) || nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD, tcp->payload_len, tcp->payload) || nla_put_u32(msg, NL80211_WOWLAN_TCP_DATA_INTERVAL, tcp->data_interval) || nla_put(msg, NL80211_WOWLAN_TCP_WAKE_PAYLOAD, tcp->wake_len, tcp->wake_data) || nla_put(msg, NL80211_WOWLAN_TCP_WAKE_MASK, DIV_ROUND_UP(tcp->wake_len, 8), tcp->wake_mask)) return -ENOBUFS; if (tcp->payload_seq.len && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ, sizeof(tcp->payload_seq), &tcp->payload_seq)) return -ENOBUFS; if (tcp->payload_tok.len && nla_put(msg, NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN, sizeof(tcp->payload_tok) + tcp->tokens_size, &tcp->payload_tok)) return -ENOBUFS; nla_nest_end(msg, nl_tcp); return 0; } static int nl80211_send_wowlan_nd(struct sk_buff *msg, struct cfg80211_sched_scan_request *req) { struct nlattr *nd, *freqs, *matches, *match, *scan_plans, *scan_plan; int i; if (!req) return 0; nd = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT); if (!nd) return -ENOBUFS; if (req->n_scan_plans == 1 && nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->scan_plans[0].interval * 1000)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_DELAY, req->delay)) return -ENOBUFS; if (req->relative_rssi_set) { struct nl80211_bss_select_rssi_adjust rssi_adjust; if (nla_put_s8(msg, NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI, req->relative_rssi)) return -ENOBUFS; rssi_adjust.band = req->rssi_adjust.band; rssi_adjust.delta = req->rssi_adjust.delta; if (nla_put(msg, NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST, sizeof(rssi_adjust), &rssi_adjust)) return -ENOBUFS; } freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!freqs) return -ENOBUFS; for (i = 0; i < req->n_channels; i++) { if (nla_put_u32(msg, i, req->channels[i]->center_freq)) return -ENOBUFS; } nla_nest_end(msg, freqs); if (req->n_match_sets) { matches = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_MATCH); if (!matches) return -ENOBUFS; for (i = 0; i < req->n_match_sets; i++) { match = nla_nest_start_noflag(msg, i); if (!match) return -ENOBUFS; if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, req->match_sets[i].ssid.ssid_len, req->match_sets[i].ssid.ssid)) return -ENOBUFS; nla_nest_end(msg, match); } nla_nest_end(msg, matches); } scan_plans = nla_nest_start_noflag(msg, NL80211_ATTR_SCHED_SCAN_PLANS); if (!scan_plans) return -ENOBUFS; for (i = 0; i < req->n_scan_plans; i++) { scan_plan = nla_nest_start_noflag(msg, i + 1); if (!scan_plan) return -ENOBUFS; if (nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL, req->scan_plans[i].interval) || (req->scan_plans[i].iterations && nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_ITERATIONS, req->scan_plans[i].iterations))) return -ENOBUFS; nla_nest_end(msg, scan_plan); } nla_nest_end(msg, scan_plans); nla_nest_end(msg, nd); return 0; } static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; u32 size = NLMSG_DEFAULT_SIZE; if (!rdev->wiphy.wowlan) return -EOPNOTSUPP; if (rdev->wiphy.wowlan_config && rdev->wiphy.wowlan_config->tcp) { /* adjust size to have room for all the data */ size += rdev->wiphy.wowlan_config->tcp->tokens_size + rdev->wiphy.wowlan_config->tcp->payload_len + rdev->wiphy.wowlan_config->tcp->wake_len + rdev->wiphy.wowlan_config->tcp->wake_len / 8; } msg = nlmsg_new(size, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; if (rdev->wiphy.wowlan_config) { struct nlattr *nl_wowlan; nl_wowlan = nla_nest_start_noflag(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!nl_wowlan) goto nla_put_failure; if ((rdev->wiphy.wowlan_config->any && nla_put_flag(msg, NL80211_WOWLAN_TRIG_ANY)) || (rdev->wiphy.wowlan_config->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) || (rdev->wiphy.wowlan_config->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) || (rdev->wiphy.wowlan_config->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) || (rdev->wiphy.wowlan_config->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) || (rdev->wiphy.wowlan_config->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) || (rdev->wiphy.wowlan_config->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE))) goto nla_put_failure; if (nl80211_send_wowlan_patterns(msg, rdev)) goto nla_put_failure; if (nl80211_send_wowlan_tcp(msg, rdev->wiphy.wowlan_config->tcp)) goto nla_put_failure; if (nl80211_send_wowlan_nd( msg, rdev->wiphy.wowlan_config->nd_config)) goto nla_put_failure; nla_nest_end(msg, nl_wowlan); } genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int nl80211_parse_wowlan_tcp(struct cfg80211_registered_device *rdev, struct nlattr *attr, struct cfg80211_wowlan *trig) { struct nlattr *tb[NUM_NL80211_WOWLAN_TCP]; struct cfg80211_wowlan_tcp *cfg; struct nl80211_wowlan_tcp_data_token *tok = NULL; struct nl80211_wowlan_tcp_data_seq *seq = NULL; u32 size; u32 data_size, wake_size, tokens_size = 0, wake_mask_size; int err, port; if (!rdev->wiphy.wowlan->tcp) return -EINVAL; err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TCP, attr, nl80211_wowlan_tcp_policy, NULL); if (err) return err; if (!tb[NL80211_WOWLAN_TCP_SRC_IPV4] || !tb[NL80211_WOWLAN_TCP_DST_IPV4] || !tb[NL80211_WOWLAN_TCP_DST_MAC] || !tb[NL80211_WOWLAN_TCP_DST_PORT] || !tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD] || !tb[NL80211_WOWLAN_TCP_DATA_INTERVAL] || !tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD] || !tb[NL80211_WOWLAN_TCP_WAKE_MASK]) return -EINVAL; data_size = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]); if (data_size > rdev->wiphy.wowlan->tcp->data_payload_max) return -EINVAL; if (nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) > rdev->wiphy.wowlan->tcp->data_interval_max || nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]) == 0) return -EINVAL; wake_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]); if (wake_size > rdev->wiphy.wowlan->tcp->wake_payload_max) return -EINVAL; wake_mask_size = nla_len(tb[NL80211_WOWLAN_TCP_WAKE_MASK]); if (wake_mask_size != DIV_ROUND_UP(wake_size, 8)) return -EINVAL; if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]) { u32 tokln = nla_len(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); tok = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_TOKEN]); tokens_size = tokln - sizeof(*tok); if (!tok->len || tokens_size % tok->len) return -EINVAL; if (!rdev->wiphy.wowlan->tcp->tok) return -EINVAL; if (tok->len > rdev->wiphy.wowlan->tcp->tok->max_len) return -EINVAL; if (tok->len < rdev->wiphy.wowlan->tcp->tok->min_len) return -EINVAL; if (tokens_size > rdev->wiphy.wowlan->tcp->tok->bufsize) return -EINVAL; if (tok->offset + tok->len > data_size) return -EINVAL; } if (tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]) { seq = nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD_SEQ]); if (!rdev->wiphy.wowlan->tcp->seq) return -EINVAL; if (seq->len == 0 || seq->len > 4) return -EINVAL; if (seq->len + seq->offset > data_size) return -EINVAL; } size = sizeof(*cfg); size += data_size; size += wake_size + wake_mask_size; size += tokens_size; cfg = kzalloc(size, GFP_KERNEL); if (!cfg) return -ENOMEM; cfg->src = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_SRC_IPV4]); cfg->dst = nla_get_in_addr(tb[NL80211_WOWLAN_TCP_DST_IPV4]); memcpy(cfg->dst_mac, nla_data(tb[NL80211_WOWLAN_TCP_DST_MAC]), ETH_ALEN); port = nla_get_u16_default(tb[NL80211_WOWLAN_TCP_SRC_PORT], 0); #ifdef CONFIG_INET /* allocate a socket and port for it and use it */ err = __sock_create(wiphy_net(&rdev->wiphy), PF_INET, SOCK_STREAM, IPPROTO_TCP, &cfg->sock, 1); if (err) { kfree(cfg); return err; } if (inet_csk_get_port(cfg->sock->sk, port)) { sock_release(cfg->sock); kfree(cfg); return -EADDRINUSE; } cfg->src_port = inet_sk(cfg->sock->sk)->inet_num; #else if (!port) { kfree(cfg); return -EINVAL; } cfg->src_port = port; #endif cfg->dst_port = nla_get_u16(tb[NL80211_WOWLAN_TCP_DST_PORT]); cfg->payload_len = data_size; cfg->payload = (u8 *)cfg + sizeof(*cfg) + tokens_size; memcpy((void *)cfg->payload, nla_data(tb[NL80211_WOWLAN_TCP_DATA_PAYLOAD]), data_size); if (seq) cfg->payload_seq = *seq; cfg->data_interval = nla_get_u32(tb[NL80211_WOWLAN_TCP_DATA_INTERVAL]); cfg->wake_len = wake_size; cfg->wake_data = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size; memcpy((void *)cfg->wake_data, nla_data(tb[NL80211_WOWLAN_TCP_WAKE_PAYLOAD]), wake_size); cfg->wake_mask = (u8 *)cfg + sizeof(*cfg) + tokens_size + data_size + wake_size; memcpy((void *)cfg->wake_mask, nla_data(tb[NL80211_WOWLAN_TCP_WAKE_MASK]), wake_mask_size); if (tok) { cfg->tokens_size = tokens_size; cfg->payload_tok = *tok; memcpy(cfg->payload_tok.token_stream, tok->token_stream, tokens_size); } trig->tcp = cfg; return 0; } static int nl80211_parse_wowlan_nd(struct cfg80211_registered_device *rdev, const struct wiphy_wowlan_support *wowlan, struct nlattr *attr, struct cfg80211_wowlan *trig) { struct nlattr **tb; int err; tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; if (!(wowlan->flags & WIPHY_WOWLAN_NET_DETECT)) { err = -EOPNOTSUPP; goto out; } err = nla_parse_nested_deprecated(tb, NL80211_ATTR_MAX, attr, nl80211_policy, NULL); if (err) goto out; trig->nd_config = nl80211_parse_sched_scan(&rdev->wiphy, NULL, tb, wowlan->max_nd_match_sets); err = PTR_ERR_OR_ZERO(trig->nd_config); if (err) trig->nd_config = NULL; out: kfree(tb); return err; } static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *tb[NUM_NL80211_WOWLAN_TRIG]; struct cfg80211_wowlan new_triggers = {}; struct cfg80211_wowlan *ntrig; const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan; int err, i; bool prev_enabled = rdev->wiphy.wowlan_config; bool regular = false; if (!wowlan) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS]) { cfg80211_rdev_free_wowlan(rdev); rdev->wiphy.wowlan_config = NULL; goto set_wakeup; } err = nla_parse_nested_deprecated(tb, MAX_NL80211_WOWLAN_TRIG, info->attrs[NL80211_ATTR_WOWLAN_TRIGGERS], nl80211_wowlan_policy, info->extack); if (err) return err; if (tb[NL80211_WOWLAN_TRIG_ANY]) { if (!(wowlan->flags & WIPHY_WOWLAN_ANY)) return -EINVAL; new_triggers.any = true; } if (tb[NL80211_WOWLAN_TRIG_DISCONNECT]) { if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT)) return -EINVAL; new_triggers.disconnect = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) { if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT)) return -EINVAL; new_triggers.magic_pkt = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED]) return -EINVAL; if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE]) { if (!(wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE)) return -EINVAL; new_triggers.gtk_rekey_failure = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST]) { if (!(wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ)) return -EINVAL; new_triggers.eap_identity_req = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE]) { if (!(wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE)) return -EINVAL; new_triggers.four_way_handshake = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_RFKILL_RELEASE]) { if (!(wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE)) return -EINVAL; new_triggers.rfkill_release = true; regular = true; } if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) { struct nlattr *pat; int n_patterns = 0; int rem, pat_len, mask_len, pkt_offset; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; regular = true; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) n_patterns++; if (n_patterns > wowlan->n_patterns) return -EINVAL; new_triggers.patterns = kcalloc(n_patterns, sizeof(new_triggers.patterns[0]), GFP_KERNEL); if (!new_triggers.patterns) return -ENOMEM; new_triggers.n_patterns = n_patterns; i = 0; nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN], rem) { u8 *mask_pat; err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT, pat, nl80211_packet_pattern_policy, info->extack); if (err) goto error; err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) goto error; pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]); mask_len = DIV_ROUND_UP(pat_len, 8); if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len) goto error; if (pat_len > wowlan->pattern_max_len || pat_len < wowlan->pattern_min_len) goto error; pkt_offset = nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET], 0); if (pkt_offset > wowlan->max_pkt_offset) goto error; new_triggers.patterns[i].pkt_offset = pkt_offset; mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!mask_pat) { err = -ENOMEM; goto error; } new_triggers.patterns[i].mask = mask_pat; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); mask_pat += mask_len; new_triggers.patterns[i].pattern = mask_pat; new_triggers.patterns[i].pattern_len = pat_len; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; } } if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) { regular = true; err = nl80211_parse_wowlan_tcp( rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION], &new_triggers); if (err) goto error; } if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) { regular = true; err = nl80211_parse_wowlan_nd( rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT], &new_triggers); if (err) goto error; } /* The 'any' trigger means the device continues operating more or less * as in its normal operation mode and wakes up the host on most of the * normal interrupts (like packet RX, ...) * It therefore makes little sense to combine with the more constrained * wakeup trigger modes. */ if (new_triggers.any && regular) { err = -EINVAL; goto error; } ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL); if (!ntrig) { err = -ENOMEM; goto error; } cfg80211_rdev_free_wowlan(rdev); rdev->wiphy.wowlan_config = ntrig; set_wakeup: if (rdev->ops->set_wakeup && prev_enabled != !!rdev->wiphy.wowlan_config) rdev_set_wakeup(rdev, rdev->wiphy.wowlan_config); return 0; error: for (i = 0; i < new_triggers.n_patterns; i++) kfree(new_triggers.patterns[i].mask); kfree(new_triggers.patterns); if (new_triggers.tcp && new_triggers.tcp->sock) sock_release(new_triggers.tcp->sock); kfree(new_triggers.tcp); kfree(new_triggers.nd_config); return err; } #endif static int nl80211_send_coalesce_rules(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct nlattr *nl_pats, *nl_pat, *nl_rule, *nl_rules; int i, j, pat_len; struct cfg80211_coalesce_rules *rule; if (!rdev->coalesce->n_rules) return 0; nl_rules = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE); if (!nl_rules) return -ENOBUFS; for (i = 0; i < rdev->coalesce->n_rules; i++) { nl_rule = nla_nest_start_noflag(msg, i + 1); if (!nl_rule) return -ENOBUFS; rule = &rdev->coalesce->rules[i]; if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_DELAY, rule->delay)) return -ENOBUFS; if (nla_put_u32(msg, NL80211_ATTR_COALESCE_RULE_CONDITION, rule->condition)) return -ENOBUFS; nl_pats = nla_nest_start_noflag(msg, NL80211_ATTR_COALESCE_RULE_PKT_PATTERN); if (!nl_pats) return -ENOBUFS; for (j = 0; j < rule->n_patterns; j++) { nl_pat = nla_nest_start_noflag(msg, j + 1); if (!nl_pat) return -ENOBUFS; pat_len = rule->patterns[j].pattern_len; if (nla_put(msg, NL80211_PKTPAT_MASK, DIV_ROUND_UP(pat_len, 8), rule->patterns[j].mask) || nla_put(msg, NL80211_PKTPAT_PATTERN, pat_len, rule->patterns[j].pattern) || nla_put_u32(msg, NL80211_PKTPAT_OFFSET, rule->patterns[j].pkt_offset)) return -ENOBUFS; nla_nest_end(msg, nl_pat); } nla_nest_end(msg, nl_pats); nla_nest_end(msg, nl_rule); } nla_nest_end(msg, nl_rules); return 0; } static int nl80211_get_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct sk_buff *msg; void *hdr; if (!rdev->wiphy.coalesce) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_COALESCE); if (!hdr) goto nla_put_failure; if (rdev->coalesce && nl80211_send_coalesce_rules(msg, rdev)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } void cfg80211_free_coalesce(struct cfg80211_coalesce *coalesce) { int i, j; struct cfg80211_coalesce_rules *rule; if (!coalesce) return; for (i = 0; i < coalesce->n_rules; i++) { rule = &coalesce->rules[i]; for (j = 0; j < rule->n_patterns; j++) kfree(rule->patterns[j].mask); kfree(rule->patterns); } kfree(coalesce); } static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, struct nlattr *rule, struct cfg80211_coalesce_rules *new_rule) { int err, i; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; struct nlattr *tb[NUM_NL80211_ATTR_COALESCE_RULE], *pat; int rem, pat_len, mask_len, pkt_offset, n_patterns = 0; struct nlattr *pat_tb[NUM_NL80211_PKTPAT]; err = nla_parse_nested_deprecated(tb, NL80211_ATTR_COALESCE_RULE_MAX, rule, nl80211_coalesce_policy, NULL); if (err) return err; if (tb[NL80211_ATTR_COALESCE_RULE_DELAY]) new_rule->delay = nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_DELAY]); if (new_rule->delay > coalesce->max_delay) return -EINVAL; if (tb[NL80211_ATTR_COALESCE_RULE_CONDITION]) new_rule->condition = nla_get_u32(tb[NL80211_ATTR_COALESCE_RULE_CONDITION]); if (!tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN]) return -EINVAL; nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) n_patterns++; if (n_patterns > coalesce->n_patterns) return -EINVAL; new_rule->patterns = kcalloc(n_patterns, sizeof(new_rule->patterns[0]), GFP_KERNEL); if (!new_rule->patterns) return -ENOMEM; new_rule->n_patterns = n_patterns; i = 0; nla_for_each_nested(pat, tb[NL80211_ATTR_COALESCE_RULE_PKT_PATTERN], rem) { u8 *mask_pat; err = nla_parse_nested_deprecated(pat_tb, MAX_NL80211_PKTPAT, pat, nl80211_packet_pattern_policy, NULL); if (err) return err; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; pat_len = nla_len(pat_tb[NL80211_PKTPAT_PATTERN]); mask_len = DIV_ROUND_UP(pat_len, 8); if (nla_len(pat_tb[NL80211_PKTPAT_MASK]) != mask_len) return -EINVAL; if (pat_len > coalesce->pattern_max_len || pat_len < coalesce->pattern_min_len) return -EINVAL; pkt_offset = nla_get_u32_default(pat_tb[NL80211_PKTPAT_OFFSET], 0); if (pkt_offset > coalesce->max_pkt_offset) return -EINVAL; new_rule->patterns[i].pkt_offset = pkt_offset; mask_pat = kmalloc(mask_len + pat_len, GFP_KERNEL); if (!mask_pat) return -ENOMEM; new_rule->patterns[i].mask = mask_pat; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_MASK]), mask_len); mask_pat += mask_len; new_rule->patterns[i].pattern = mask_pat; new_rule->patterns[i].pattern_len = pat_len; memcpy(mask_pat, nla_data(pat_tb[NL80211_PKTPAT_PATTERN]), pat_len); i++; } return 0; } static int nl80211_set_coalesce(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; const struct wiphy_coalesce_support *coalesce = rdev->wiphy.coalesce; struct cfg80211_coalesce *new_coalesce; int err, rem_rule, n_rules = 0, i; struct nlattr *rule; if (!rdev->wiphy.coalesce || !rdev->ops->set_coalesce) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COALESCE_RULE]) { cfg80211_free_coalesce(rdev->coalesce); rdev->coalesce = NULL; rdev_set_coalesce(rdev, NULL); return 0; } nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) n_rules++; if (n_rules > coalesce->n_rules) return -EINVAL; new_coalesce = kzalloc(struct_size(new_coalesce, rules, n_rules), GFP_KERNEL); if (!new_coalesce) return -ENOMEM; new_coalesce->n_rules = n_rules; i = 0; nla_for_each_nested(rule, info->attrs[NL80211_ATTR_COALESCE_RULE], rem_rule) { err = nl80211_parse_coalesce_rule(rdev, rule, &new_coalesce->rules[i]); if (err) goto error; i++; } err = rdev_set_coalesce(rdev, new_coalesce); if (err) goto error; cfg80211_free_coalesce(rdev->coalesce); rdev->coalesce = new_coalesce; return 0; error: cfg80211_free_coalesce(new_coalesce); return err; } static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr *tb[NUM_NL80211_REKEY_DATA]; struct cfg80211_gtk_rekey_data rekey_data = {}; int err; if (!info->attrs[NL80211_ATTR_REKEY_DATA]) return -EINVAL; err = nla_parse_nested_deprecated(tb, MAX_NL80211_REKEY_DATA, info->attrs[NL80211_ATTR_REKEY_DATA], nl80211_rekey_policy, info->extack); if (err) return err; if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || !tb[NL80211_REKEY_DATA_KCK]) return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK && nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KEK_EXT_LEN)) return -ERANGE; if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK && nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN) && !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KCK_32 && nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN_32)) return -ERANGE; rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]); rekey_data.kck = nla_data(tb[NL80211_REKEY_DATA_KCK]); rekey_data.replay_ctr = nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]); rekey_data.kek_len = nla_len(tb[NL80211_REKEY_DATA_KEK]); rekey_data.kck_len = nla_len(tb[NL80211_REKEY_DATA_KCK]); if (tb[NL80211_REKEY_DATA_AKM]) rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); if (!wdev->connected) return -ENOTCONN; if (!rdev->ops->set_rekey_data) return -EOPNOTSUPP; return rdev_set_rekey_data(rdev, dev, &rekey_data); } static int nl80211_register_unexpected_frame(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; if (wdev->ap_unexpected_nlportid) return -EBUSY; wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } static int nl80211_probe_client(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct sk_buff *msg; void *hdr; const u8 *addr; u64 cookie; int err; if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!rdev->ops->probe_client) return -EOPNOTSUPP; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (!hdr) { err = -ENOBUFS; goto free_msg; } addr = nla_data(info->attrs[NL80211_ATTR_MAC]); err = rdev_probe_client(rdev, dev, addr, &cookie); if (err) goto free_msg; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: err = -ENOBUFS; free_msg: nlmsg_free(msg); return err; } static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_beacon_registration *reg, *nreg; int rv; if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; nreg = kzalloc(sizeof(*nreg), GFP_KERNEL); if (!nreg) return -ENOMEM; /* First, check if already registered. */ spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry(reg, &rdev->beacon_registrations, list) { if (reg->nlportid == info->snd_portid) { rv = -EALREADY; goto out_err; } } /* Add it to the list */ nreg->nlportid = info->snd_portid; list_add(&nreg->list, &rdev->beacon_registrations); spin_unlock_bh(&rdev->beacon_registrations_lock); return 0; out_err: spin_unlock_bh(&rdev->beacon_registrations_lock); kfree(nreg); return rv; } static int nl80211_start_p2p_device(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; int err; if (!rdev->ops->start_p2p_device) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; if (wdev_running(wdev)) return 0; if (rfkill_blocked(rdev->wiphy.rfkill)) return -ERFKILL; err = rdev_start_p2p_device(rdev, wdev); if (err) return err; wdev->is_running = true; rdev->opencount++; return 0; } static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (wdev->iftype != NL80211_IFTYPE_P2P_DEVICE) return -EOPNOTSUPP; if (!rdev->ops->stop_p2p_device) return -EOPNOTSUPP; cfg80211_stop_p2p_device(rdev, wdev); return 0; } static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_nan_conf conf = {}; int err; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (wdev_running(wdev)) return -EEXIST; if (rfkill_blocked(rdev->wiphy.rfkill)) return -ERFKILL; if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) return -EINVAL; conf.master_pref = nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); if (info->attrs[NL80211_ATTR_BANDS]) { u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); if (bands & ~(u32)wdev->wiphy->nan_supported_bands) return -EOPNOTSUPP; if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) return -EINVAL; conf.bands = bands; } err = rdev_start_nan(rdev, wdev, &conf); if (err) return err; wdev->is_running = true; rdev->opencount++; return 0; } static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; cfg80211_stop_nan(rdev, wdev); return 0; } static int validate_nan_filter(struct nlattr *filter_attr) { struct nlattr *attr; int len = 0, n_entries = 0, rem; nla_for_each_nested(attr, filter_attr, rem) { len += nla_len(attr); n_entries++; } if (len >= U8_MAX) return -EINVAL; return n_entries; } static int handle_nan_filter(struct nlattr *attr_filter, struct cfg80211_nan_func *func, bool tx) { struct nlattr *attr; int n_entries, rem, i; struct cfg80211_nan_func_filter *filter; n_entries = validate_nan_filter(attr_filter); if (n_entries < 0) return n_entries; BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters)); filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL); if (!filter) return -ENOMEM; i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); if (!filter[i].filter) goto err; filter[i].len = nla_len(attr); i++; } if (tx) { func->num_tx_filters = n_entries; func->tx_filters = filter; } else { func->num_rx_filters = n_entries; func->rx_filters = filter; } return 0; err: i = 0; nla_for_each_nested(attr, attr_filter, rem) { kfree(filter[i].filter); i++; } kfree(filter); return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr; struct cfg80211_nan_func *func; struct sk_buff *msg = NULL; void *hdr = NULL; int err = 0; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_NAN_FUNC]) return -EINVAL; err = nla_parse_nested_deprecated(tb, NL80211_NAN_FUNC_ATTR_MAX, info->attrs[NL80211_ATTR_NAN_FUNC], nl80211_nan_func_policy, info->extack); if (err) return err; func = kzalloc(sizeof(*func), GFP_KERNEL); if (!func) return -ENOMEM; func->cookie = cfg80211_assign_cookie(rdev); if (!tb[NL80211_NAN_FUNC_TYPE]) { err = -EINVAL; goto out; } func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]); if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) { err = -EINVAL; goto out; } memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]), sizeof(func->service_id)); func->close_range = nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]); if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) { func->serv_spec_info_len = nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]); func->serv_spec_info = kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]), func->serv_spec_info_len, GFP_KERNEL); if (!func->serv_spec_info) { err = -ENOMEM; goto out; } } if (tb[NL80211_NAN_FUNC_TTL]) func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]); switch (func->type) { case NL80211_NAN_FUNC_PUBLISH: if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) { err = -EINVAL; goto out; } func->publish_type = nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]); func->publish_bcast = nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]); if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) && func->publish_bcast) { err = -EINVAL; goto out; } break; case NL80211_NAN_FUNC_SUBSCRIBE: func->subscribe_active = nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]); break; case NL80211_NAN_FUNC_FOLLOW_UP: if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] || !tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]) { err = -EINVAL; goto out; } func->followup_id = nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]); func->followup_reqid = nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]); memcpy(func->followup_dest.addr, nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]), sizeof(func->followup_dest.addr)); if (func->ttl) { err = -EINVAL; goto out; } break; default: err = -EINVAL; goto out; } if (tb[NL80211_NAN_FUNC_SRF]) { struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; err = nla_parse_nested_deprecated(srf_tb, NL80211_NAN_SRF_ATTR_MAX, tb[NL80211_NAN_FUNC_SRF], nl80211_nan_srf_policy, info->extack); if (err) goto out; func->srf_include = nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]); if (srf_tb[NL80211_NAN_SRF_BF]) { if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] || !srf_tb[NL80211_NAN_SRF_BF_IDX]) { err = -EINVAL; goto out; } func->srf_bf_len = nla_len(srf_tb[NL80211_NAN_SRF_BF]); func->srf_bf = kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]), func->srf_bf_len, GFP_KERNEL); if (!func->srf_bf) { err = -ENOMEM; goto out; } func->srf_bf_idx = nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]); } else { struct nlattr *attr, *mac_attr = srf_tb[NL80211_NAN_SRF_MAC_ADDRS]; int n_entries, rem, i = 0; if (!mac_attr) { err = -EINVAL; goto out; } n_entries = validate_acl_mac_addrs(mac_attr); if (n_entries <= 0) { err = -EINVAL; goto out; } func->srf_num_macs = n_entries; func->srf_macs = kcalloc(n_entries, sizeof(*func->srf_macs), GFP_KERNEL); if (!func->srf_macs) { err = -ENOMEM; goto out; } nla_for_each_nested(attr, mac_attr, rem) memcpy(func->srf_macs[i++].addr, nla_data(attr), sizeof(*func->srf_macs)); } } if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) { err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER], func, true); if (err) goto out; } if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) { err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER], func, false); if (err) goto out; } msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out; } hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_ADD_NAN_FUNCTION); /* This can't really happen - we just allocated 4KB */ if (WARN_ON(!hdr)) { err = -ENOMEM; goto out; } err = rdev_add_nan_func(rdev, wdev, func); out: if (err < 0) { cfg80211_free_nan_func(func); nlmsg_free(msg); return err; } /* propagate the instance id and cookie to userspace */ if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie, NL80211_ATTR_PAD)) goto nla_put_failure; func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, func->instance_id)) goto nla_put_failure; nla_nest_end(msg, func_attr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int nl80211_nan_del_func(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; u64 cookie; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!wdev_running(wdev)) return -ENOTCONN; if (!info->attrs[NL80211_ATTR_COOKIE]) return -EINVAL; cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); rdev_del_nan_func(rdev, wdev, cookie); return 0; } static int nl80211_nan_change_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_nan_conf conf = {}; u32 changed = 0; if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!wdev_running(wdev)) return -ENOTCONN; if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { conf.master_pref = nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); if (conf.master_pref <= 1 || conf.master_pref == 255) return -EINVAL; changed |= CFG80211_NAN_CONF_CHANGED_PREF; } if (info->attrs[NL80211_ATTR_BANDS]) { u32 bands = nla_get_u32(info->attrs[NL80211_ATTR_BANDS]); if (bands & ~(u32)wdev->wiphy->nan_supported_bands) return -EOPNOTSUPP; if (bands && !(bands & BIT(NL80211_BAND_2GHZ))) return -EINVAL; conf.bands = bands; changed |= CFG80211_NAN_CONF_CHANGED_BANDS; } if (!changed) return -EINVAL; return rdev_nan_change_conf(rdev, wdev, &conf, changed); } void cfg80211_nan_match(struct wireless_dev *wdev, struct cfg80211_nan_match_params *match, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct nlattr *match_attr, *local_func_attr, *peer_func_attr; struct sk_buff *msg; void *hdr; if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie, NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) goto nla_put_failure; match_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_MATCH); if (!match_attr) goto nla_put_failure; local_func_attr = nla_nest_start_noflag(msg, NL80211_NAN_MATCH_FUNC_LOCAL); if (!local_func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id)) goto nla_put_failure; nla_nest_end(msg, local_func_attr); peer_func_attr = nla_nest_start_noflag(msg, NL80211_NAN_MATCH_FUNC_PEER); if (!peer_func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) || nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id)) goto nla_put_failure; if (match->info && match->info_len && nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len, match->info)) goto nla_put_failure; nla_nest_end(msg, peer_func_attr); nla_nest_end(msg, match_attr); genlmsg_end(msg, hdr); if (!wdev->owner_nlportid) genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_NAN, gfp); else genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, wdev->owner_nlportid); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_nan_match); void cfg80211_nan_func_terminated(struct wireless_dev *wdev, u8 inst_id, enum nl80211_nan_func_term_reason reason, u64 cookie, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *func_attr; void *hdr; if (WARN_ON(!inst_id)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; func_attr = nla_nest_start_noflag(msg, NL80211_ATTR_NAN_FUNC); if (!func_attr) goto nla_put_failure; if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) || nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason)) goto nla_put_failure; nla_nest_end(msg, func_attr); genlmsg_end(msg, hdr); if (!wdev->owner_nlportid) genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_NAN, gfp); else genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, wdev->owner_nlportid); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_nan_func_terminated); static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { void *hdr; struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_PROTOCOL_FEATURES); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_PROTOCOL_FEATURES, NL80211_PROTOCOL_FEATURE_SPLIT_WIPHY_DUMP)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: kfree_skb(msg); return -ENOBUFS; } static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_update_ft_ies_params ft_params; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->update_ft_ies) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MDID] || !info->attrs[NL80211_ATTR_IE]) return -EINVAL; memset(&ft_params, 0, sizeof(ft_params)); ft_params.md = nla_get_u16(info->attrs[NL80211_ATTR_MDID]); ft_params.ie = nla_data(info->attrs[NL80211_ATTR_IE]); ft_params.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); return rdev_update_ft_ies(rdev, dev, &ft_params); } static int nl80211_crit_protocol_start(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; enum nl80211_crit_proto_id proto = NL80211_CRIT_PROTO_UNSPEC; u16 duration; int ret; if (!rdev->ops->crit_proto_start) return -EOPNOTSUPP; if (WARN_ON(!rdev->ops->crit_proto_stop)) return -EINVAL; if (rdev->crit_proto_nlportid) return -EBUSY; /* determine protocol if provided */ if (info->attrs[NL80211_ATTR_CRIT_PROT_ID]) proto = nla_get_u16(info->attrs[NL80211_ATTR_CRIT_PROT_ID]); if (proto >= NUM_NL80211_CRIT_PROTO) return -EINVAL; /* timeout must be provided */ if (!info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]) return -EINVAL; duration = nla_get_u16(info->attrs[NL80211_ATTR_MAX_CRIT_PROT_DURATION]); ret = rdev_crit_proto_start(rdev, wdev, proto, duration); if (!ret) rdev->crit_proto_nlportid = info->snd_portid; return ret; } static int nl80211_crit_protocol_stop(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; if (!rdev->ops->crit_proto_stop) return -EOPNOTSUPP; if (rdev->crit_proto_nlportid) { rdev->crit_proto_nlportid = 0; rdev_crit_proto_stop(rdev, wdev); } return 0; } static int nl80211_vendor_check_policy(const struct wiphy_vendor_command *vcmd, struct nlattr *attr, struct netlink_ext_ack *extack) { if (vcmd->policy == VENDOR_CMD_RAW_DATA) { if (attr->nla_type & NLA_F_NESTED) { NL_SET_ERR_MSG_ATTR(extack, attr, "unexpected nested data"); return -EINVAL; } return 0; } if (!(attr->nla_type & NLA_F_NESTED)) { NL_SET_ERR_MSG_ATTR(extack, attr, "expected nested data"); return -EINVAL; } return nla_validate_nested(attr, vcmd->maxattr, vcmd->policy, extack); } static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = __cfg80211_wdev_from_attrs(rdev, genl_info_net(info), info->attrs); int i, err; u32 vid, subcmd; if (!rdev->wiphy.vendor_commands) return -EOPNOTSUPP; if (IS_ERR(wdev)) { err = PTR_ERR(wdev); if (err != -EINVAL) return err; wdev = NULL; } else if (wdev->wiphy != &rdev->wiphy) { return -EINVAL; } if (!info->attrs[NL80211_ATTR_VENDOR_ID] || !info->attrs[NL80211_ATTR_VENDOR_SUBCMD]) return -EINVAL; vid = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(info->attrs[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < rdev->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; void *data = NULL; int len = 0; vcmd = &rdev->wiphy.vendor_commands[i]; if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { if (!wdev) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && !wdev->netdev) return -EINVAL; if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { if (!wdev_running(wdev)) return -ENETDOWN; } } else { wdev = NULL; } if (!vcmd->doit) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); err = nl80211_vendor_check_policy(vcmd, info->attrs[NL80211_ATTR_VENDOR_DATA], info->extack); if (err) return err; } rdev->cur_cmd_info = info; err = vcmd->doit(&rdev->wiphy, wdev, data, len); rdev->cur_cmd_info = NULL; return err; } return -EOPNOTSUPP; } static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { struct nlattr **attrbuf; u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; int err; void *data = NULL; unsigned int data_len = 0; if (cb->args[0]) { /* subtract the 1 again here */ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; if (!wiphy) return -ENODEV; *rdev = wiphy_to_rdev(wiphy); *wdev = NULL; if (cb->args[1]) { list_for_each_entry(tmp, &wiphy->wdev_list, list) { if (tmp->identifier == cb->args[1] - 1) { *wdev = tmp; break; } } } /* keep rtnl locked in successful case */ return 0; } attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); if (!attrbuf) return -ENOMEM; err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) goto out; if (!attrbuf[NL80211_ATTR_VENDOR_ID] || !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { err = -EINVAL; goto out; } *wdev = __cfg80211_wdev_from_attrs(NULL, sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*rdev)) { err = PTR_ERR(*rdev); goto out; } vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) { const struct wiphy_vendor_command *vcmd; vcmd = &(*rdev)->wiphy.vendor_commands[i]; if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; if (!vcmd->dumpit) { err = -EOPNOTSUPP; goto out; } vcmd_idx = i; break; } if (vcmd_idx < 0) { err = -EOPNOTSUPP; goto out; } if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); data_len = nla_len(attrbuf[NL80211_ATTR_VENDOR_DATA]); err = nl80211_vendor_check_policy( &(*rdev)->wiphy.vendor_commands[vcmd_idx], attrbuf[NL80211_ATTR_VENDOR_DATA], cb->extack); if (err) goto out; } /* 0 is the first index - add 1 to parse only once */ cb->args[0] = (*rdev)->wiphy_idx + 1; /* add 1 to know if it was NULL */ cb->args[1] = *wdev ? (*wdev)->identifier + 1 : 0; cb->args[2] = vcmd_idx; cb->args[3] = (unsigned long)data; cb->args[4] = data_len; /* keep rtnl locked in successful case */ err = 0; out: kfree(attrbuf); return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; unsigned int vcmd_idx; const struct wiphy_vendor_command *vcmd; void *data; int data_len; int err; struct nlattr *vendor_data; rtnl_lock(); err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev); if (err) goto out; vcmd_idx = cb->args[2]; data = (void *)cb->args[3]; data_len = cb->args[4]; vcmd = &rdev->wiphy.vendor_commands[vcmd_idx]; if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV | WIPHY_VENDOR_CMD_NEED_NETDEV)) { if (!wdev) { err = -EINVAL; goto out; } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV && !wdev->netdev) { err = -EINVAL; goto out; } if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) { if (!wdev_running(wdev)) { err = -ENETDOWN; goto out; } } } while (1) { void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_VENDOR); if (!hdr) break; if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev && nla_put_u64_64bit(skb, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD))) { genlmsg_cancel(skb, hdr); break; } vendor_data = nla_nest_start_noflag(skb, NL80211_ATTR_VENDOR_DATA); if (!vendor_data) { genlmsg_cancel(skb, hdr); break; } err = vcmd->dumpit(&rdev->wiphy, wdev, skb, data, data_len, (unsigned long *)&cb->args[5]); nla_nest_end(skb, vendor_data); if (err == -ENOBUFS || err == -ENOENT) { genlmsg_cancel(skb, hdr); break; } else if (err <= 0) { genlmsg_cancel(skb, hdr); goto out; } genlmsg_end(skb, hdr); } err = skb->len; out: rtnl_unlock(); return err; } struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy, enum nl80211_commands cmd, enum nl80211_attrs attr, int approxlen) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return NULL; return __cfg80211_alloc_vendor_skb(rdev, NULL, approxlen, rdev->cur_cmd_info->snd_portid, rdev->cur_cmd_info->snd_seq, cmd, attr, NULL, GFP_KERNEL); } EXPORT_SYMBOL(__cfg80211_alloc_reply_skb); int cfg80211_vendor_cmd_reply(struct sk_buff *skb) { struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; /* clear CB data for netlink core to own from now on */ memset(skb->cb, 0, sizeof(skb->cb)); if (WARN_ON(!rdev->cur_cmd_info)) { kfree_skb(skb); return -EINVAL; } nla_nest_end(skb, data); genlmsg_end(skb, hdr); return genlmsg_reply(skb, rdev->cur_cmd_info); } EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_reply); unsigned int cfg80211_vendor_cmd_get_sender(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (WARN_ON(!rdev->cur_cmd_info)) return 0; return rdev->cur_cmd_info->snd_portid; } EXPORT_SYMBOL_GPL(cfg80211_vendor_cmd_get_sender); static int nl80211_set_qos_map(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_qos_map *qos_map = NULL; struct net_device *dev = info->user_ptr[1]; u8 *pos, len, num_des, des_len, des; int ret; if (!rdev->ops->set_qos_map) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_QOS_MAP]) { pos = nla_data(info->attrs[NL80211_ATTR_QOS_MAP]); len = nla_len(info->attrs[NL80211_ATTR_QOS_MAP]); if (len % 2) return -EINVAL; qos_map = kzalloc(sizeof(struct cfg80211_qos_map), GFP_KERNEL); if (!qos_map) return -ENOMEM; num_des = (len - IEEE80211_QOS_MAP_LEN_MIN) >> 1; if (num_des) { des_len = num_des * sizeof(struct cfg80211_dscp_exception); memcpy(qos_map->dscp_exception, pos, des_len); qos_map->num_des = num_des; for (des = 0; des < num_des; des++) { if (qos_map->dscp_exception[des].up > 7) { kfree(qos_map); return -EINVAL; } } pos += des_len; } memcpy(qos_map->up, pos, IEEE80211_QOS_MAP_LEN_MIN); } ret = nl80211_key_allowed(dev->ieee80211_ptr); if (!ret) ret = rdev_set_qos_map(rdev, dev, qos_map); kfree(qos_map); return ret; } static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *peer; u8 tsid, up; u16 admitted_time = 0; if (!(rdev->wiphy.features & NL80211_FEATURE_SUPPORTS_WMM_ADMISSION)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_USER_PRIO]) return -EINVAL; tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]); /* WMM uses TIDs 0-7 even for TSPEC */ if (tsid >= IEEE80211_FIRST_TSPEC_TSID) { /* TODO: handle 802.11 TSPEC/admission control * need more attributes for that (e.g. BA session requirement); * change the WMM admission test above to allow both then */ return -EINVAL; } peer = nla_data(info->attrs[NL80211_ATTR_MAC]); if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) { admitted_time = nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]); if (!admitted_time) return -EINVAL; } switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; return -ENOTCONN; default: return -EOPNOTSUPP; } return rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time); } static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *peer; u8 tsid; if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]); peer = nla_data(info->attrs[NL80211_ATTR_MAC]); return rdev_del_tx_ts(rdev, dev, tsid, peer); } static int nl80211_tdls_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_chan_def chandef = {}; const u8 *addr; u8 oper_class; int err; if (!rdev->ops->tdls_channel_switch || !(rdev->wiphy.features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) return -EOPNOTSUPP; switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: break; default: return -EOPNOTSUPP; } if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_OPER_CLASS]) return -EINVAL; err = nl80211_parse_chandef(rdev, info, &chandef); if (err) return err; /* * Don't allow wide channels on the 2.4Ghz band, as per IEEE802.11-2012 * section 10.22.6.2.1. Disallow 5/10Mhz channels as well for now, the * specification is not defined for them. */ if (chandef.chan->band == NL80211_BAND_2GHZ && chandef.width != NL80211_CHAN_WIDTH_20_NOHT && chandef.width != NL80211_CHAN_WIDTH_20) return -EINVAL; /* we will be active on the TDLS link */ if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, wdev->iftype)) return -EINVAL; /* don't allow switching to DFS channels */ if (cfg80211_chandef_dfs_required(wdev->wiphy, &chandef, wdev->iftype)) return -EINVAL; addr = nla_data(info->attrs[NL80211_ATTR_MAC]); oper_class = nla_get_u8(info->attrs[NL80211_ATTR_OPER_CLASS]); return rdev_tdls_channel_switch(rdev, dev, addr, oper_class, &chandef); } static int nl80211_tdls_cancel_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; const u8 *addr; if (!rdev->ops->tdls_channel_switch || !rdev->ops->tdls_cancel_channel_switch || !(rdev->wiphy.features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH)) return -EOPNOTSUPP; switch (dev->ieee80211_ptr->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: break; default: return -EOPNOTSUPP; } if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; addr = nla_data(info->attrs[NL80211_ATTR_MAC]); rdev_tdls_cancel_channel_switch(rdev, dev, addr); return 0; } static int nl80211_set_multicast_to_unicast(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const struct nlattr *nla; bool enabled; if (!rdev->ops->set_multicast_to_unicast) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; nla = info->attrs[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED]; enabled = nla_get_flag(nla); return rdev_set_multicast_to_unicast(rdev, dev, enabled); } static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_pmk_conf pmk_conf = {}; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_PMK]) return -EINVAL; if (!wdev->connected) return -ENOTCONN; pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) return -EINVAL; pmk_conf.pmk = nla_data(info->attrs[NL80211_ATTR_PMK]); pmk_conf.pmk_len = nla_len(info->attrs[NL80211_ATTR_PMK]); if (pmk_conf.pmk_len != WLAN_PMK_LEN && pmk_conf.pmk_len != WLAN_PMK_LEN_SUITE_B_192) return -EINVAL; if (info->attrs[NL80211_ATTR_PMKR0_NAME]) pmk_conf.pmk_r0_name = nla_data(info->attrs[NL80211_ATTR_PMKR0_NAME]); return rdev_set_pmk(rdev, dev, &pmk_conf); } static int nl80211_del_pmk(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *aa; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_4WAY_HANDSHAKE_STA_1X)) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; aa = nla_data(info->attrs[NL80211_ATTR_MAC]); return rdev_del_pmk(rdev, dev, aa); } static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_external_auth_params params; if (!rdev->ops->external_auth) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_SSID] && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; if (!info->attrs[NL80211_ATTR_BSSID]) return -EINVAL; if (!info->attrs[NL80211_ATTR_STATUS_CODE]) return -EINVAL; memset(&params, 0, sizeof(params)); if (info->attrs[NL80211_ATTR_SSID]) { params.ssid.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); if (params.ssid.ssid_len == 0) return -EINVAL; memcpy(params.ssid.ssid, nla_data(info->attrs[NL80211_ATTR_SSID]), params.ssid.ssid_len); } memcpy(params.bssid, nla_data(info->attrs[NL80211_ATTR_BSSID]), ETH_ALEN); params.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); if (info->attrs[NL80211_ATTR_PMKID]) params.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); return rdev_external_auth(rdev, dev, &params); } static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) { bool dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; const u8 *buf; size_t len; u8 *dest; u16 proto; bool noencrypt; u64 cookie = 0; int link_id; int err; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211)) return -EOPNOTSUPP; if (!rdev->ops->tx_control_port) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_FRAME] || !info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { GENL_SET_ERR_MSG(info, "Frame, MAC or ethertype missing"); return -EINVAL; } switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.current_bss) break; return -ENOTCONN; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (wdev->connected) break; return -ENOTCONN; default: return -EOPNOTSUPP; } buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); len = nla_len(info->attrs[NL80211_ATTR_FRAME]); dest = nla_data(info->attrs[NL80211_ATTR_MAC]); proto = nla_get_u16(info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); noencrypt = nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); link_id = nl80211_link_id_or_invalid(info->attrs); err = rdev_tx_control_port(rdev, dev, buf, len, dest, cpu_to_be16(proto), noencrypt, link_id, dont_wait_for_ack ? NULL : &cookie); if (!err && !dont_wait_for_ack) nl_set_extack_cookie_u64(info->extack, cookie); return err; } static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ftm_responder_stats ftm_stats = {}; unsigned int link_id = nl80211_link_id(info->attrs); struct sk_buff *msg; void *hdr; struct nlattr *ftm_stats_attr; int err; if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->links[link_id].ap.beacon_interval) return -EOPNOTSUPP; err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats); if (err) return err; if (!ftm_stats.filled) return -ENODATA; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_FTM_RESPONDER_STATS); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; ftm_stats_attr = nla_nest_start_noflag(msg, NL80211_ATTR_FTM_RESPONDER_STATS); if (!ftm_stats_attr) goto nla_put_failure; #define SET_FTM(field, name, type) \ do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \ nla_put_ ## type(msg, NL80211_FTM_STATS_ ## name, \ ftm_stats.field)) \ goto nla_put_failure; } while (0) #define SET_FTM_U64(field, name) \ do { if ((ftm_stats.filled & BIT(NL80211_FTM_STATS_ ## name)) && \ nla_put_u64_64bit(msg, NL80211_FTM_STATS_ ## name, \ ftm_stats.field, NL80211_FTM_STATS_PAD)) \ goto nla_put_failure; } while (0) SET_FTM(success_num, SUCCESS_NUM, u32); SET_FTM(partial_num, PARTIAL_NUM, u32); SET_FTM(failed_num, FAILED_NUM, u32); SET_FTM(asap_num, ASAP_NUM, u32); SET_FTM(non_asap_num, NON_ASAP_NUM, u32); SET_FTM_U64(total_duration_ms, TOTAL_DURATION_MSEC); SET_FTM(unknown_triggers_num, UNKNOWN_TRIGGERS_NUM, u32); SET_FTM(reschedule_requests_num, RESCHEDULE_REQUESTS_NUM, u32); SET_FTM(out_of_window_triggers_num, OUT_OF_WINDOW_TRIGGERS_NUM, u32); #undef SET_FTM nla_nest_end(msg, ftm_stats_attr); genlmsg_end(msg, hdr); return genlmsg_reply(msg, info); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static int nl80211_update_owe_info(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_update_owe_info owe_info; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->update_owe_info) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_STATUS_CODE] || !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; memset(&owe_info, 0, sizeof(owe_info)); owe_info.status = nla_get_u16(info->attrs[NL80211_ATTR_STATUS_CODE]); nla_memcpy(owe_info.peer, info->attrs[NL80211_ATTR_MAC], ETH_ALEN); if (info->attrs[NL80211_ATTR_IE]) { owe_info.ie = nla_data(info->attrs[NL80211_ATTR_IE]); owe_info.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } return rdev_update_owe_info(rdev, dev, &owe_info); } static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct station_info sinfo = {}; const u8 *buf; size_t len; u8 *dest; int err; if (!rdev->ops->probe_mesh_link || !rdev->ops->get_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_FRAME]) { GENL_SET_ERR_MSG(info, "Frame or MAC missing"); return -EINVAL; } if (wdev->iftype != NL80211_IFTYPE_MESH_POINT) return -EOPNOTSUPP; dest = nla_data(info->attrs[NL80211_ATTR_MAC]); buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); len = nla_len(info->attrs[NL80211_ATTR_FRAME]); if (len < sizeof(struct ethhdr)) return -EINVAL; if (!ether_addr_equal(buf, dest) || is_multicast_ether_addr(buf) || !ether_addr_equal(buf + ETH_ALEN, dev->dev_addr)) return -EINVAL; err = rdev_get_station(rdev, dev, dest, &sinfo); if (err) return err; cfg80211_sinfo_release_content(&sinfo); return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } static int parse_tid_conf(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct net_device *dev, struct cfg80211_tid_cfg *tid_conf, struct genl_info *info, const u8 *peer, unsigned int link_id) { struct netlink_ext_ack *extack = info->extack; u64 mask; int err; if (!attrs[NL80211_TID_CONFIG_ATTR_TIDS]) return -EINVAL; tid_conf->config_override = nla_get_flag(attrs[NL80211_TID_CONFIG_ATTR_OVERRIDE]); tid_conf->tids = nla_get_u16(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); if (tid_conf->config_override) { if (rdev->ops->reset_tid_config) { err = rdev_reset_tid_config(rdev, dev, peer, tid_conf->tids); if (err) return err; } else { return -EINVAL; } } if (attrs[NL80211_TID_CONFIG_ATTR_NOACK]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_NOACK); tid_conf->noack = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); } if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_SHORT); tid_conf->retry_short = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]); if (tid_conf->retry_short > rdev->wiphy.max_data_retry_count) return -EINVAL; } if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG); tid_conf->retry_long = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]); if (tid_conf->retry_long > rdev->wiphy.max_data_retry_count) return -EINVAL; } if (attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL); tid_conf->ampdu = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]); } if (attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL); tid_conf->rtscts = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]); } if (attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]) { tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMSDU_CTRL); tid_conf->amsdu = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]); } if (attrs[NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE]) { u32 idx = NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, attr; tid_conf->txrate_type = nla_get_u8(attrs[idx]); if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) { attr = NL80211_TID_CONFIG_ATTR_TX_RATE; err = nl80211_parse_tx_bitrate_mask(info, attrs, attr, &tid_conf->txrate_mask, dev, true, link_id); if (err) return err; tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE); } tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE); } if (peer) mask = rdev->wiphy.tid_config_support.peer; else mask = rdev->wiphy.tid_config_support.vif; if (tid_conf->mask & ~mask) { NL_SET_ERR_MSG(extack, "unsupported TID configuration"); return -EOPNOTSUPP; } return 0; } static int nl80211_set_tid_config(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct cfg80211_tid_config *tid_config; struct nlattr *tid; int conf_idx = 0, rem_conf; int ret = -EINVAL; u32 num_conf = 0; if (!info->attrs[NL80211_ATTR_TID_CONFIG]) return -EINVAL; if (!rdev->ops->set_tid_config) return -EOPNOTSUPP; nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) num_conf++; tid_config = kzalloc(struct_size(tid_config, tid_conf, num_conf), GFP_KERNEL); if (!tid_config) return -ENOMEM; tid_config->n_tid_conf = num_conf; if (info->attrs[NL80211_ATTR_MAC]) tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) { ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, tid, NULL, NULL); if (ret) goto bad_tid_conf; ret = parse_tid_conf(rdev, attrs, dev, &tid_config->tid_conf[conf_idx], info, tid_config->peer, link_id); if (ret) goto bad_tid_conf; conf_idx++; } ret = rdev_set_tid_config(rdev, dev, tid_config); bad_tid_conf: kfree(tid_config); return ret; } static int nl80211_color_change(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_color_change_settings params = {}; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr **tb; u16 offset; int err; if (!rdev->ops->color_change) return -EOPNOTSUPP; if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BSS_COLOR)) return -EOPNOTSUPP; if (wdev->iftype != NL80211_IFTYPE_AP) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT] || !info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR] || !info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS]) return -EINVAL; params.count = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COUNT]); params.color = nla_get_u8(info->attrs[NL80211_ATTR_COLOR_CHANGE_COLOR]); err = nl80211_parse_beacon(rdev, info->attrs, &params.beacon_next, info->extack); if (err) return err; tb = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*tb), GFP_KERNEL); if (!tb) return -ENOMEM; err = nla_parse_nested(tb, NL80211_ATTR_MAX, info->attrs[NL80211_ATTR_COLOR_CHANGE_ELEMS], nl80211_policy, info->extack); if (err) goto out; err = nl80211_parse_beacon(rdev, tb, &params.beacon_color_change, info->extack); if (err) goto out; if (!tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) { err = -EINVAL; goto out; } if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]) != sizeof(u16)) { err = -EINVAL; goto out; } offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_BEACON]); if (offset >= params.beacon_color_change.tail_len) { err = -EINVAL; goto out; } if (params.beacon_color_change.tail[offset] != params.count) { err = -EINVAL; goto out; } params.counter_offset_beacon = offset; if (tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) { if (nla_len(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]) != sizeof(u16)) { err = -EINVAL; goto out; } offset = nla_get_u16(tb[NL80211_ATTR_CNTDWN_OFFS_PRESP]); if (offset >= params.beacon_color_change.probe_resp_len) { err = -EINVAL; goto out; } if (params.beacon_color_change.probe_resp[offset] != params.count) { err = -EINVAL; goto out; } params.counter_offset_presp = offset; } params.link_id = nl80211_link_id(info->attrs); err = rdev_color_change(rdev, dev, &params); out: kfree(params.beacon_next.mbssid_ies); kfree(params.beacon_color_change.mbssid_ies); kfree(params.beacon_next.rnr_ies); kfree(params.beacon_color_change.rnr_ies); kfree(tb); return err; } static int nl80211_set_fils_aad(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_fils_aad fils_aad = {}; u8 *nonces; if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_FILS_KEK] || !info->attrs[NL80211_ATTR_FILS_NONCES]) return -EINVAL; fils_aad.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); fils_aad.kek_len = nla_len(info->attrs[NL80211_ATTR_FILS_KEK]); fils_aad.kek = nla_data(info->attrs[NL80211_ATTR_FILS_KEK]); nonces = nla_data(info->attrs[NL80211_ATTR_FILS_NONCES]); fils_aad.snonce = nonces; fils_aad.anonce = nonces + FILS_NONCE_LEN; return rdev_set_fils_aad(rdev, dev, &fils_aad); } static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; int ret; if (!(wdev->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) return -EINVAL; switch (wdev->iftype) { case NL80211_IFTYPE_AP: break; default: return -EINVAL; } if (!info->attrs[NL80211_ATTR_MAC] || !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC]))) return -EINVAL; wdev->valid_links |= BIT(link_id); ether_addr_copy(wdev->links[link_id].addr, nla_data(info->attrs[NL80211_ATTR_MAC])); ret = rdev_add_intf_link(rdev, wdev, link_id); if (ret) { wdev->valid_links &= ~BIT(link_id); eth_zero_addr(wdev->links[link_id].addr); } return ret; } static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info) { unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; /* cannot remove if there's no link */ if (!info->attrs[NL80211_ATTR_MLO_LINK_ID]) return -EINVAL; switch (wdev->iftype) { case NL80211_IFTYPE_AP: break; default: return -EINVAL; } cfg80211_remove_link(wdev, link_id); return 0; } static int nl80211_add_mod_link_station(struct sk_buff *skb, struct genl_info *info, bool add) { struct link_station_parameters params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; int err; if ((add && !rdev->ops->add_link_station) || (!add && !rdev->ops->mod_link_station)) return -EOPNOTSUPP; if (add && !info->attrs[NL80211_ATTR_MAC]) return -EINVAL; if (!info->attrs[NL80211_ATTR_MLD_ADDR]) return -EINVAL; if (add && !info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) return -EINVAL; params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); if (info->attrs[NL80211_ATTR_MAC]) { params.link_mac = nla_data(info->attrs[NL80211_ATTR_MAC]); if (!is_valid_ether_addr(params.link_mac)) return -EINVAL; } if (!info->attrs[NL80211_ATTR_MLO_LINK_ID]) return -EINVAL; params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]); if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) { params.supported_rates = nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); params.supported_rates_len = nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); } if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) params.vht_capa = nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); if (info->attrs[NL80211_ATTR_HE_CAPABILITY]) { params.he_capa = nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]); params.he_capa_len = nla_len(info->attrs[NL80211_ATTR_HE_CAPABILITY]); if (info->attrs[NL80211_ATTR_EHT_CAPABILITY]) { params.eht_capa = nla_data(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); params.eht_capa_len = nla_len(info->attrs[NL80211_ATTR_EHT_CAPABILITY]); if (!ieee80211_eht_capa_size_ok((const u8 *)params.he_capa, (const u8 *)params.eht_capa, params.eht_capa_len, false)) return -EINVAL; } } if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]) params.he_6ghz_capa = nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]); if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) { params.opmode_notif_used = true; params.opmode_notif = nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]); } err = nl80211_parse_sta_txpower_setting(info, &params.txpwr, &params.txpwr_set); if (err) return err; if (add) return rdev_add_link_station(rdev, dev, &params); return rdev_mod_link_station(rdev, dev, &params); } static int nl80211_add_link_station(struct sk_buff *skb, struct genl_info *info) { return nl80211_add_mod_link_station(skb, info, true); } static int nl80211_modify_link_station(struct sk_buff *skb, struct genl_info *info) { return nl80211_add_mod_link_station(skb, info, false); } static int nl80211_remove_link_station(struct sk_buff *skb, struct genl_info *info) { struct link_station_del_parameters params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; if (!rdev->ops->del_link_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MLD_ADDR] || !info->attrs[NL80211_ATTR_MLO_LINK_ID]) return -EINVAL; params.mld_mac = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]); params.link_id = nla_get_u8(info->attrs[NL80211_ATTR_MLO_LINK_ID]); return rdev_del_link_station(rdev, dev, &params); } static int nl80211_set_hw_timestamp(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct cfg80211_set_hw_timestamp hwts = {}; if (!rdev->wiphy.hw_timestamp_max_peers) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC] && rdev->wiphy.hw_timestamp_max_peers != CFG80211_HW_TIMESTAMP_ALL_PEERS) return -EOPNOTSUPP; if (info->attrs[NL80211_ATTR_MAC]) hwts.macaddr = nla_data(info->attrs[NL80211_ATTR_MAC]); hwts.enable = nla_get_flag(info->attrs[NL80211_ATTR_HW_TIMESTAMP_ENABLED]); return rdev_set_hw_timestamp(rdev, dev, &hwts); } static int nl80211_set_ttlm(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_ttlm_params params = {}; struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!wdev->connected) return -ENOLINK; if (!info->attrs[NL80211_ATTR_MLO_TTLM_DLINK] || !info->attrs[NL80211_ATTR_MLO_TTLM_ULINK]) return -EINVAL; nla_memcpy(params.dlink, info->attrs[NL80211_ATTR_MLO_TTLM_DLINK], sizeof(params.dlink)); nla_memcpy(params.ulink, info->attrs[NL80211_ATTR_MLO_TTLM_ULINK], sizeof(params.ulink)); return rdev_set_ttlm(rdev, dev, &params); } static int nl80211_assoc_ml_reconf(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_assoc_link links[IEEE80211_MLD_MAX_NUM_LINKS] = {}; unsigned int link_id; u16 add_links, rem_links; int err; if (!wdev->valid_links) return -EINVAL; if (dev->ieee80211_ptr->conn_owner_nlportid && dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid) return -EPERM; if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; add_links = 0; if (info->attrs[NL80211_ATTR_MLO_LINKS]) { err = nl80211_process_links(rdev, links, NULL, 0, info); if (err) return err; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { if (!links[link_id].bss) continue; add_links |= BIT(link_id); } } if (info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]) rem_links = nla_get_u16(info->attrs[NL80211_ATTR_MLO_RECONF_REM_LINKS]); else rem_links = 0; /* Validate that existing links are not added, removed links are valid * and don't allow adding and removing the same links */ if ((add_links & rem_links) || !(add_links | rem_links) || (wdev->valid_links & add_links) || ((wdev->valid_links & rem_links) != rem_links)) { err = -EINVAL; goto out; } err = cfg80211_assoc_ml_reconf(rdev, dev, links, rem_links); out: for (link_id = 0; link_id < ARRAY_SIZE(links); link_id++) cfg80211_put_bss(&rdev->wiphy, links[link_id].bss); return err; } static int nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; bool val; if (wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT) return -EOPNOTSUPP; if (!wdev->connected) return -ENOLINK; val = nla_get_flag(info->attrs[NL80211_ATTR_EPCS]); return rdev_set_epcs(rdev, dev, val); } #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 #define NL80211_FLAG_CHECK_NETDEV_UP 0x08 #define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_NEED_WDEV 0x10 /* If a netdev is associated, it must be UP, P2P must be started */ #define NL80211_FLAG_NEED_WDEV_UP (NL80211_FLAG_NEED_WDEV |\ NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_CLEAR_SKB 0x20 #define NL80211_FLAG_NO_WIPHY_MTX 0x40 #define NL80211_FLAG_MLO_VALID_LINK_ID 0x80 #define NL80211_FLAG_MLO_UNSUPPORTED 0x100 #define INTERNAL_FLAG_SELECTORS(__sel) \ SELECTOR(__sel, NONE, 0) /* must be first */ \ SELECTOR(__sel, WIPHY, \ NL80211_FLAG_NEED_WIPHY) \ SELECTOR(__sel, WDEV, \ NL80211_FLAG_NEED_WDEV) \ SELECTOR(__sel, NETDEV, \ NL80211_FLAG_NEED_NETDEV) \ SELECTOR(__sel, NETDEV_LINK, \ NL80211_FLAG_NEED_NETDEV | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, NETDEV_NO_MLO, \ NL80211_FLAG_NEED_NETDEV | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, WIPHY_RTNL, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, WIPHY_RTNL_NOMTX, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_NEED_RTNL | \ NL80211_FLAG_NO_WIPHY_MTX) \ SELECTOR(__sel, WDEV_RTNL, \ NL80211_FLAG_NEED_WDEV | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, NETDEV_RTNL, \ NL80211_FLAG_NEED_NETDEV | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, NETDEV_UP, \ NL80211_FLAG_NEED_NETDEV_UP) \ SELECTOR(__sel, NETDEV_UP_LINK, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, NETDEV_UP_NO_MLO, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_NO_MLO_CLEAR, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_CLEAR_SKB | \ NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_NOTMX, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_NO_WIPHY_MTX) \ SELECTOR(__sel, NETDEV_UP_NOTMX_MLO, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_NO_WIPHY_MTX | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, NETDEV_UP_CLEAR, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP, \ NL80211_FLAG_NEED_WDEV_UP) \ SELECTOR(__sel, WDEV_UP_LINK, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, WDEV_UP_RTNL, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, WIPHY_CLEAR, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_CLEAR_SKB) enum nl80211_internal_flags_selector { #define SELECTOR(_, name, value) NL80211_IFL_SEL_##name, INTERNAL_FLAG_SELECTORS(_) #undef SELECTOR }; static u32 nl80211_internal_flags[] = { #define SELECTOR(_, name, value) [NL80211_IFL_SEL_##name] = value, INTERNAL_FLAG_SELECTORS(_) #undef SELECTOR }; static int nl80211_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; struct wireless_dev *wdev = NULL; struct net_device *dev = NULL; u32 internal_flags; int err; if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags))) return -EINVAL; internal_flags = nl80211_internal_flags[ops->internal_flags]; rtnl_lock(); if (internal_flags & NL80211_FLAG_NEED_WIPHY) { rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { err = PTR_ERR(rdev); goto out_unlock; } info->user_ptr[0] = rdev; } else if (internal_flags & NL80211_FLAG_NEED_NETDEV || internal_flags & NL80211_FLAG_NEED_WDEV) { wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { err = PTR_ERR(wdev); goto out_unlock; } dev = wdev->netdev; dev_hold(dev); rdev = wiphy_to_rdev(wdev->wiphy); if (internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { err = -EINVAL; goto out_unlock; } info->user_ptr[1] = dev; } else { info->user_ptr[1] = wdev; } if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !wdev_running(wdev)) { err = -ENETDOWN; goto out_unlock; } info->user_ptr[0] = rdev; } if (internal_flags & NL80211_FLAG_MLO_VALID_LINK_ID) { struct nlattr *link_id = info->attrs[NL80211_ATTR_MLO_LINK_ID]; if (!wdev) { err = -EINVAL; goto out_unlock; } /* MLO -> require valid link ID */ if (wdev->valid_links && (!link_id || !(wdev->valid_links & BIT(nla_get_u8(link_id))))) { err = -EINVAL; goto out_unlock; } /* non-MLO -> no link ID attribute accepted */ if (!wdev->valid_links && link_id) { err = -EINVAL; goto out_unlock; } } if (internal_flags & NL80211_FLAG_MLO_UNSUPPORTED) { if (info->attrs[NL80211_ATTR_MLO_LINK_ID] || (wdev && wdev->valid_links)) { err = -EINVAL; goto out_unlock; } } if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { wiphy_lock(&rdev->wiphy); /* we keep the mutex locked until post_doit */ __release(&rdev->wiphy.mtx); } if (!(internal_flags & NL80211_FLAG_NEED_RTNL)) rtnl_unlock(); return 0; out_unlock: rtnl_unlock(); dev_put(dev); return err; } static void nl80211_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) { u32 internal_flags = nl80211_internal_flags[ops->internal_flags]; if (info->user_ptr[1]) { if (internal_flags & NL80211_FLAG_NEED_WDEV) { struct wireless_dev *wdev = info->user_ptr[1]; dev_put(wdev->netdev); } else { dev_put(info->user_ptr[1]); } } if (info->user_ptr[0] && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; /* we kept the mutex locked since pre_doit */ __acquire(&rdev->wiphy.mtx); wiphy_unlock(&rdev->wiphy); } if (internal_flags & NL80211_FLAG_NEED_RTNL) rtnl_unlock(); /* If needed, clear the netlink message payload from the SKB * as it might contain key data that shouldn't stick around on * the heap after the SKB is freed. The netlink message header * is still needed for further processing, so leave it intact. */ if (internal_flags & NL80211_FLAG_CLEAR_SKB) { struct nlmsghdr *nlh = nlmsg_hdr(skb); memset(nlmsg_data(nlh), 0, nlmsg_len(nlh)); } } static int nl80211_set_sar_sub_specs(struct cfg80211_registered_device *rdev, struct cfg80211_sar_specs *sar_specs, struct nlattr *spec[], int index) { u32 range_index, i; if (!sar_specs || !spec) return -EINVAL; if (!spec[NL80211_SAR_ATTR_SPECS_POWER] || !spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]) return -EINVAL; range_index = nla_get_u32(spec[NL80211_SAR_ATTR_SPECS_RANGE_INDEX]); /* check if range_index exceeds num_freq_ranges */ if (range_index >= rdev->wiphy.sar_capa->num_freq_ranges) return -EINVAL; /* check if range_index duplicates */ for (i = 0; i < index; i++) { if (sar_specs->sub_specs[i].freq_range_index == range_index) return -EINVAL; } sar_specs->sub_specs[index].power = nla_get_s32(spec[NL80211_SAR_ATTR_SPECS_POWER]); sar_specs->sub_specs[index].freq_range_index = range_index; return 0; } static int nl80211_set_sar_specs(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *spec[NL80211_SAR_ATTR_SPECS_MAX + 1]; struct nlattr *tb[NL80211_SAR_ATTR_MAX + 1]; struct cfg80211_sar_specs *sar_spec; enum nl80211_sar_type type; struct nlattr *spec_list; u32 specs; int rem, err; if (!rdev->wiphy.sar_capa || !rdev->ops->set_sar_specs) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_SAR_SPEC]) return -EINVAL; nla_parse_nested(tb, NL80211_SAR_ATTR_MAX, info->attrs[NL80211_ATTR_SAR_SPEC], NULL, NULL); if (!tb[NL80211_SAR_ATTR_TYPE] || !tb[NL80211_SAR_ATTR_SPECS]) return -EINVAL; type = nla_get_u32(tb[NL80211_SAR_ATTR_TYPE]); if (type != rdev->wiphy.sar_capa->type) return -EINVAL; specs = 0; nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) specs++; if (specs > rdev->wiphy.sar_capa->num_freq_ranges) return -EINVAL; sar_spec = kzalloc(struct_size(sar_spec, sub_specs, specs), GFP_KERNEL); if (!sar_spec) return -ENOMEM; sar_spec->type = type; specs = 0; nla_for_each_nested(spec_list, tb[NL80211_SAR_ATTR_SPECS], rem) { nla_parse_nested(spec, NL80211_SAR_ATTR_SPECS_MAX, spec_list, NULL, NULL); switch (type) { case NL80211_SAR_TYPE_POWER: if (nl80211_set_sar_sub_specs(rdev, sar_spec, spec, specs)) { err = -EINVAL; goto error; } break; default: err = -EINVAL; goto error; } specs++; } sar_spec->num_sub_specs = specs; rdev->cur_cmd_info = info; err = rdev_set_sar_specs(rdev, sar_spec); rdev->cur_cmd_info = NULL; error: kfree(sar_spec); return err; } #define SELECTOR(__sel, name, value) \ ((__sel) == (value)) ? NL80211_IFL_SEL_##name : int __missing_selector(void); #define IFLAGS(__val) INTERNAL_FLAG_SELECTORS(__val) __missing_selector() static const struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wiphy, .dumpit = nl80211_dump_wiphy, .done = nl80211_dump_wiphy_done, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, }; static const struct genl_small_ops nl80211_small_ops[] = { { .cmd = NL80211_CMD_SET_WIPHY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wiphy, .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_interface, .dumpit = nl80211_dump_interface, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_SET_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_NEW_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL | /* we take the wiphy mutex later ourselves */ NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_DEL_INTERFACE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_interface, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_GET_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_NEW_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_KEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_BEACON, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_START_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_STOP_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_NEW_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_station, .flags = GENL_UNS_ADMIN_PERM, /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on * whether MAC address is passed or not. If MAC address is * passed, then even during MLO, link ID is not required. */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpath, .dumpit = nl80211_dump_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_MPP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mpp, .dumpit = nl80211_dump_mpp, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_NEW_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DEL_MPATH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_mpath, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_BSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_bss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_reg_do, .dumpit = nl80211_get_reg_dump, /* can be retrieved by unprivileged users */ }, #ifdef CONFIG_CFG80211_CRDA_SUPPORT { .cmd = NL80211_CMD_SET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_reg, .flags = GENL_ADMIN_PERM, }, #endif { .cmd = NL80211_CMD_REQ_SET_REG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_req_set_reg, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_RELOAD_REGDB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_reload_regdb, .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_GET_MESH_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_mesh_config, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MESH_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_mesh_config, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TRIGGER_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_trigger_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_ABORT_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_abort_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_scan, }, { .cmd = NL80211_CMD_START_SCHED_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_sched_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_STOP_SCHED_SCAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_sched_scan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_AUTHENTICATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_authenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_ASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_associate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEAUTHENTICATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_deauthenticate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_DISASSOCIATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disassociate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_JOIN_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ibss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_IBSS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ibss, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, #ifdef CONFIG_NL80211_TESTMODE { .cmd = NL80211_CMD_TESTMODE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_testmode_do, .dumpit = nl80211_testmode_dump, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, #endif { .cmd = NL80211_CMD_CONNECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_connect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_UPDATE_CONNECT_PARAMS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_connect_params, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DISCONNECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_disconnect, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_WIPHY_NETNS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_wiphy_netns, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL | NL80211_FLAG_NO_WIPHY_MTX), }, { .cmd = NL80211_CMD_GET_SURVEY, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = nl80211_dump_survey, }, { .cmd = NL80211_CMD_SET_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_FLUSH_PMKSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_flush_pmksa, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, /* FIXME: requiring a link ID here is probably not good */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_cancel_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_REGISTER_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_FRAME_WAIT_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_mgmt_cancel_wait, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_power_save, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_GET_POWER_SAVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_power_save, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_CQM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_cqm, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_CHANNEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_JOIN_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_mesh, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_mesh, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_JOIN_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_join_ocb, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_LEAVE_OCB, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_leave_ocb, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, #ifdef CONFIG_PM { .cmd = NL80211_CMD_GET_WOWLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_wowlan, /* can be retrieved by unprivileged users */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_WOWLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_wowlan, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, #endif { .cmd = NL80211_CMD_SET_REKEY_OFFLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_rekey_data, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_TDLS_MGMT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_mgmt, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_TDLS_OPER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_oper, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_UNEXPECTED_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_unexpected_frame, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_PROBE_CLIENT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_probe_client, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REGISTER_BEACONS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_register_beacons, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_NOACK_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_noack_map, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_START_P2P_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_p2p_device, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_STOP_P2P_DEVICE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_p2p_device, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_START_NAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_nan, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_STOP_NAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_stop_nan, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_ADD_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_add_func, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_DEL_NAN_FUNCTION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_del_func, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_nan_change_config, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mcast_rate, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_MAC_ACL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_RADAR_DETECT, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NO_WIPHY_MTX | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_protocol_features, }, { .cmd = NL80211_CMD_UPDATE_FT_IES, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_update_ft_ies, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_start, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_CRIT_PROTOCOL_STOP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_crit_protocol_stop, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_coalesce, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_SET_COALESCE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_coalesce, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY), }, { .cmd = NL80211_CMD_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_VENDOR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_vendor_cmd, .dumpit = nl80211_vendor_cmd_dump, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_SET_QOS_MAP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_qos_map, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_ADD_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_DEL_TX_TS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_tx_ts, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tdls_cancel_channel_switch, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_MULTICAST_TO_UNICAST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_multicast_to_unicast, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), }, { .cmd = NL80211_CMD_SET_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_pmk, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { .cmd = NL80211_CMD_DEL_PMK, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_pmk, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_EXTERNAL_AUTH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_external_auth, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_CONTROL_PORT_FRAME, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_tx_control_port, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_pmsr_start, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_NOTIFY_RADAR, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_notify_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_UPDATE_OWE_INFO, .doit = nl80211_update_owe_info, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_PROBE_MESH_LINK, .doit = nl80211_probe_mesh_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_TID_CONFIG, .doit = nl80211_set_tid_config, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_SAR_SPECS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_sar_specs, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WIPHY | NL80211_FLAG_NEED_RTNL), }, { .cmd = NL80211_CMD_COLOR_CHANGE_REQUEST, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_color_change, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_FILS_AAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_fils_aad, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_ADD_LINK, .doit = nl80211_add_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_REMOVE_LINK, .doit = nl80211_remove_link, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_ADD_LINK_STA, .doit = nl80211_add_link_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_MODIFY_LINK_STA, .doit = nl80211_modify_link_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_REMOVE_LINK_STA, .doit = nl80211_remove_link_station, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_HW_TIMESTAMP, .doit = nl80211_set_hw_timestamp, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_SET_TID_TO_LINK_MAPPING, .doit = nl80211_set_ttlm, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_ASSOC_MLO_RECONF, .doit = nl80211_assoc_ml_reconf, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, { .cmd = NL80211_CMD_EPCS_CFG, .doit = nl80211_epcs_cfg, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, }; static struct genl_family nl80211_fam __ro_after_init = { .name = NL80211_GENL_NAME, /* have users key off the name instead */ .hdrsize = 0, /* no private header */ .version = 1, /* no particular meaning now */ .maxattr = NL80211_ATTR_MAX, .policy = nl80211_policy, .netnsok = true, .pre_doit = nl80211_pre_doit, .post_doit = nl80211_post_doit, .module = THIS_MODULE, .ops = nl80211_ops, .n_ops = ARRAY_SIZE(nl80211_ops), .small_ops = nl80211_small_ops, .n_small_ops = ARRAY_SIZE(nl80211_small_ops), .resv_start_op = NL80211_CMD_REMOVE_LINK_STA + 1, .mcgrps = nl80211_mcgrps, .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), .parallel_ops = true, }; /* notification functions */ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd) { struct sk_buff *msg; struct nl80211_dump_wiphy_state state = {}; WARN_ON(cmd != NL80211_CMD_NEW_WIPHY && cmd != NL80211_CMD_DEL_WIPHY); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_send_wiphy(rdev, cmd, msg, 0, 0, 0, &state) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_CONFIG, GFP_KERNEL); } void nl80211_notify_iface(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, enum nl80211_commands cmd) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, cmd) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_CONFIG, GFP_KERNEL); } static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { struct cfg80211_scan_request *req = rdev->scan_req; struct nlattr *nest; int i; struct cfg80211_scan_info *info; if (WARN_ON(!req)) return 0; nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_SSIDS); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_ssids; i++) { if (nla_put(msg, i, req->ssids[i].ssid_len, req->ssids[i].ssid)) goto nla_put_failure; } nla_nest_end(msg, nest); if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) { nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_channels; i++) { if (nla_put_u32(msg, i, ieee80211_channel_to_khz(req->channels[i]))) goto nla_put_failure; } nla_nest_end(msg, nest); } else { nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nest) goto nla_put_failure; for (i = 0; i < req->n_channels; i++) { if (nla_put_u32(msg, i, req->channels[i]->center_freq)) goto nla_put_failure; } nla_nest_end(msg, nest); } if (req->ie && nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; if (req->flags && nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags)) goto nla_put_failure; info = rdev->int_scan_req ? &rdev->int_scan_req->info : &rdev->scan_req->info; if (info->scan_start_tsf && (nla_put_u64_64bit(msg, NL80211_ATTR_SCAN_START_TIME_TSF, info->scan_start_tsf, NL80211_BSS_PAD) || nla_put(msg, NL80211_ATTR_SCAN_START_TIME_TSF_BSSID, ETH_ALEN, info->tsf_bssid))) goto nla_put_failure; return 0; nla_put_failure: return -ENOBUFS; } static int nl80211_prep_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; /* ignore errors and send incomplete event anyway */ nl80211_add_scan_req(msg, rdev); genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static int nl80211_prep_sched_scan_msg(struct sk_buff *msg, struct cfg80211_sched_scan_request *req, u32 cmd) { void *hdr; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) return -1; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, wiphy_to_rdev(req->wiphy)->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, req->dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, req->reqid, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_prep_scan_msg(msg, rdev, wdev, 0, 0, 0, NL80211_CMD_TRIGGER_SCAN) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, bool aborted) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return NULL; if (nl80211_prep_scan_msg(msg, rdev, wdev, 0, 0, 0, aborted ? NL80211_CMD_SCAN_ABORTED : NL80211_CMD_NEW_SCAN_RESULTS) < 0) { nlmsg_free(msg); return NULL; } return msg; } /* send message created by nl80211_build_scan_msg() */ void nl80211_send_scan_msg(struct cfg80211_registered_device *rdev, struct sk_buff *msg) { if (!msg) return; genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } void nl80211_send_sched_scan(struct cfg80211_sched_scan_request *req, u32 cmd) { struct sk_buff *msg; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; if (nl80211_prep_sched_scan_msg(msg, req, cmd) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(req->wiphy), msg, 0, NL80211_MCGRP_SCAN, GFP_KERNEL); } static bool nl80211_reg_change_event_fill(struct sk_buff *msg, struct regulatory_request *request) { /* Userspace can always count this one always being set */ if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator)) goto nla_put_failure; if (request->alpha2[0] == '0' && request->alpha2[1] == '0') { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_WORLD)) goto nla_put_failure; } else if (request->alpha2[0] == '9' && request->alpha2[1] == '9') { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_CUSTOM_WORLD)) goto nla_put_failure; } else if ((request->alpha2[0] == '9' && request->alpha2[1] == '8') || request->intersect) { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_INTERSECTION)) goto nla_put_failure; } else { if (nla_put_u8(msg, NL80211_ATTR_REG_TYPE, NL80211_REGDOM_TYPE_COUNTRY) || nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, request->alpha2)) goto nla_put_failure; } if (request->wiphy_idx != WIPHY_IDX_INVALID) { struct wiphy *wiphy = wiphy_idx_to_wiphy(request->wiphy_idx); if (wiphy && nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx)) goto nla_put_failure; if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED && nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG)) goto nla_put_failure; } return true; nla_put_failure: return false; } /* * This can happen on global regulatory changes or device specific settings * based on custom regulatory domains. */ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, struct regulatory_request *request) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id); if (!hdr) goto nla_put_failure; if (!nl80211_reg_change_event_fill(msg, request)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_allns(&nl80211_fam, msg, 0, NL80211_MCGRP_REGULATORY); return; nla_put_failure: nlmsg_free(msg); } struct nl80211_mlme_event { enum nl80211_commands cmd; const u8 *buf; size_t buf_len; int uapsd_queues; const u8 *req_ies; size_t req_ies_len; bool reconnect; }; static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, struct net_device *netdev, const struct nl80211_mlme_event *event, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(100 + event->buf_len + event->req_ies_len, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, event->cmd); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_FRAME, event->buf_len, event->buf) || (event->req_ies && nla_put(msg, NL80211_ATTR_REQ_IE, event->req_ies_len, event->req_ies))) goto nla_put_failure; if (event->reconnect && nla_put_flag(msg, NL80211_ATTR_RECONNECT_REQUESTED)) goto nla_put_failure; if (event->uapsd_queues >= 0) { struct nlattr *nla_wmm = nla_nest_start_noflag(msg, NL80211_ATTR_STA_WME); if (!nla_wmm) goto nla_put_failure; if (nla_put_u8(msg, NL80211_STA_WME_UAPSD_QUEUES, event->uapsd_queues)) goto nla_put_failure; nla_nest_end(msg, nla_wmm); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, gfp_t gfp) { struct nl80211_mlme_event event = { .cmd = NL80211_CMD_AUTHENTICATE, .buf = buf, .buf_len = len, .uapsd_queues = -1, }; nl80211_send_mlme_event(rdev, netdev, &event, gfp); } void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const struct cfg80211_rx_assoc_resp_data *data) { struct nl80211_mlme_event event = { .cmd = NL80211_CMD_ASSOCIATE, .buf = data->buf, .buf_len = data->len, .uapsd_queues = data->uapsd_queues, .req_ies = data->req_ies, .req_ies_len = data->req_ies_len, }; nl80211_send_mlme_event(rdev, netdev, &event, GFP_KERNEL); } void nl80211_send_deauth(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, bool reconnect, gfp_t gfp) { struct nl80211_mlme_event event = { .cmd = NL80211_CMD_DEAUTHENTICATE, .buf = buf, .buf_len = len, .reconnect = reconnect, .uapsd_queues = -1, }; nl80211_send_mlme_event(rdev, netdev, &event, gfp); } void nl80211_send_disassoc(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *buf, size_t len, bool reconnect, gfp_t gfp) { struct nl80211_mlme_event event = { .cmd = NL80211_CMD_DISASSOCIATE, .buf = buf, .buf_len = len, .reconnect = reconnect, .uapsd_queues = -1, }; nl80211_send_mlme_event(rdev, netdev, &event, gfp); } void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); const struct ieee80211_mgmt *mgmt = (void *)buf; struct nl80211_mlme_event event = { .buf = buf, .buf_len = len, .uapsd_queues = -1, }; if (WARN_ON(len < 2)) return; if (ieee80211_is_deauth(mgmt->frame_control)) { event.cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE; } else if (ieee80211_is_disassoc(mgmt->frame_control)) { event.cmd = NL80211_CMD_UNPROT_DISASSOCIATE; } else if (ieee80211_is_beacon(mgmt->frame_control)) { if (wdev->unprot_beacon_reported && elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000) return; event.cmd = NL80211_CMD_UNPROT_BEACON; wdev->unprot_beacon_reported = jiffies; } else { return; } trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len); nl80211_send_mlme_event(rdev, dev, &event, GFP_ATOMIC); } EXPORT_SYMBOL(cfg80211_rx_unprot_mlme_mgmt); static void nl80211_send_mlme_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, int cmd, const u8 *addr, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_auth_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp) { nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_AUTHENTICATE, addr, gfp); } void nl80211_send_assoc_timeout(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, gfp_t gfp) { nl80211_send_mlme_timeout(rdev, netdev, NL80211_CMD_ASSOCIATE, addr, gfp); } void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_connect_resp_params *cr, gfp_t gfp) { struct sk_buff *msg; void *hdr; unsigned int link; size_t link_info_size = 0; const u8 *connected_addr = cr->valid_links ? cr->ap_mld_addr : cr->links[0].bssid; if (cr->valid_links) { for_each_valid_link(cr, link) { /* Nested attribute header */ link_info_size += NLA_HDRLEN; /* Link ID */ link_info_size += nla_total_size(sizeof(u8)); link_info_size += cr->links[link].addr ? nla_total_size(ETH_ALEN) : 0; link_info_size += (cr->links[link].bssid || cr->links[link].bss) ? nla_total_size(ETH_ALEN) : 0; link_info_size += nla_total_size(sizeof(u16)); } } msg = nlmsg_new(100 + cr->req_ie_len + cr->resp_ie_len + cr->fils.kek_len + cr->fils.pmk_len + (cr->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONNECT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || (connected_addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, connected_addr)) || nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, cr->status < 0 ? WLAN_STATUS_UNSPECIFIED_FAILURE : cr->status) || (cr->status < 0 && (nla_put_flag(msg, NL80211_ATTR_TIMED_OUT) || nla_put_u32(msg, NL80211_ATTR_TIMEOUT_REASON, cr->timeout_reason))) || (cr->req_ie && nla_put(msg, NL80211_ATTR_REQ_IE, cr->req_ie_len, cr->req_ie)) || (cr->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, cr->resp_ie_len, cr->resp_ie)) || (cr->fils.update_erp_next_seq_num && nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, cr->fils.erp_next_seq_num)) || (cr->status == WLAN_STATUS_SUCCESS && ((cr->fils.kek && nla_put(msg, NL80211_ATTR_FILS_KEK, cr->fils.kek_len, cr->fils.kek)) || (cr->fils.pmk && nla_put(msg, NL80211_ATTR_PMK, cr->fils.pmk_len, cr->fils.pmk)) || (cr->fils.pmkid && nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, cr->fils.pmkid))))) goto nla_put_failure; if (cr->valid_links) { int i = 1; struct nlattr *nested; nested = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!nested) goto nla_put_failure; for_each_valid_link(cr, link) { struct nlattr *nested_mlo_links; const u8 *bssid = cr->links[link].bss ? cr->links[link].bss->bssid : cr->links[link].bssid; nested_mlo_links = nla_nest_start(msg, i); if (!nested_mlo_links) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link) || (bssid && nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) || (cr->links[link].addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, cr->links[link].addr)) || nla_put_u16(msg, NL80211_ATTR_STATUS_CODE, cr->links[link].status)) goto nla_put_failure; nla_nest_end(msg, nested_mlo_links); i++; } nla_nest_end(msg, nested); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp) { struct sk_buff *msg; void *hdr; size_t link_info_size = 0; unsigned int link; const u8 *connected_addr = info->ap_mld_addr ? info->ap_mld_addr : (info->links[0].bss ? info->links[0].bss->bssid : info->links[0].bssid); if (info->valid_links) { for_each_valid_link(info, link) { /* Nested attribute header */ link_info_size += NLA_HDRLEN; /* Link ID */ link_info_size += nla_total_size(sizeof(u8)); link_info_size += info->links[link].addr ? nla_total_size(ETH_ALEN) : 0; link_info_size += (info->links[link].bssid || info->links[link].bss) ? nla_total_size(ETH_ALEN) : 0; } } msg = nlmsg_new(100 + info->req_ie_len + info->resp_ie_len + info->fils.kek_len + info->fils.pmk_len + (info->fils.pmkid ? WLAN_PMKID_LEN : 0) + link_info_size, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ROAM); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, connected_addr) || (info->req_ie && nla_put(msg, NL80211_ATTR_REQ_IE, info->req_ie_len, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, info->resp_ie)) || (info->fils.update_erp_next_seq_num && nla_put_u16(msg, NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM, info->fils.erp_next_seq_num)) || (info->fils.kek && nla_put(msg, NL80211_ATTR_FILS_KEK, info->fils.kek_len, info->fils.kek)) || (info->fils.pmk && nla_put(msg, NL80211_ATTR_PMK, info->fils.pmk_len, info->fils.pmk)) || (info->fils.pmkid && nla_put(msg, NL80211_ATTR_PMKID, WLAN_PMKID_LEN, info->fils.pmkid))) goto nla_put_failure; if (info->valid_links) { int i = 1; struct nlattr *nested; nested = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!nested) goto nla_put_failure; for_each_valid_link(info, link) { struct nlattr *nested_mlo_links; const u8 *bssid = info->links[link].bss ? info->links[link].bss->bssid : info->links[link].bssid; nested_mlo_links = nla_nest_start(msg, i); if (!nested_mlo_links) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link) || (bssid && nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, bssid)) || (info->links[link].addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, info->links[link].addr))) goto nla_put_failure; nla_nest_end(msg, nested_mlo_links); i++; } nla_nest_end(msg, nested); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *peer_addr, const u8 *td_bitmap, u8 td_bitmap_len) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer_addr)) goto nla_put_failure; if (td_bitmap_len > 0 && td_bitmap && nla_put(msg, NL80211_ATTR_TD_BITMAP, td_bitmap_len, td_bitmap)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(100 + ie_len, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DISCONNECT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || (reason && nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason)) || (from_ap && nla_put_flag(msg, NL80211_ATTR_DISCONNECTED_BY_AP)) || (ie && nla_put(msg, NL80211_ATTR_IE, ie_len, ie))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_links_removed(struct net_device *dev, u16 link_mask) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct nlattr *links; void *hdr; lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_links_removed(dev, link_mask); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION && wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; if (WARN_ON(!wdev->valid_links || !link_mask || (wdev->valid_links & link_mask) != link_mask || wdev->valid_links == link_mask)) return; cfg80211_wdev_release_link_bsses(wdev, link_mask); wdev->valid_links &= ~link_mask; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_LINKS_REMOVED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; links = nla_nest_start(msg, NL80211_ATTR_MLO_LINKS); if (!links) goto nla_put_failure; while (link_mask) { struct nlattr *link; int link_id = __ffs(link_mask); link = nla_nest_start(msg, link_id + 1); if (!link) goto nla_put_failure; if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; nla_nest_end(msg, link); link_mask &= ~(1 << link_id); } nla_nest_end(msg, links); genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_links_removed); void nl80211_mlo_reconf_add_done(struct net_device *dev, struct cfg80211_mlo_reconf_done_data *data) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct nl80211_mlme_event event = { .cmd = NL80211_CMD_ASSOC_MLO_RECONF, .buf = data->buf, .buf_len = data->len, .uapsd_queues = -1, }; nl80211_send_mlme_event(rdev, dev, &event, GFP_KERNEL); } EXPORT_SYMBOL(nl80211_mlo_reconf_add_done); void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_JOIN_IBSS); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_notify_new_peer_candidate(struct net_device *dev, const u8 *addr, const u8 *ie, u8 ie_len, int sig_dbm, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; if (WARN_ON(wdev->iftype != NL80211_IFTYPE_MESH_POINT)) return; trace_cfg80211_notify_new_peer_candidate(dev, addr); msg = nlmsg_new(100 + ie_len, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NEW_PEER_CANDIDATE); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || (ie_len && ie && nla_put(msg, NL80211_ATTR_IE, ie_len, ie)) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_notify_new_peer_candidate); void nl80211_michael_mic_failure(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *addr, enum nl80211_key_type key_type, int key_id, const u8 *tsc, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_MICHAEL_MIC_FAILURE); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || (addr && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) || nla_put_u32(msg, NL80211_ATTR_KEY_TYPE, key_type) || (key_id != -1 && nla_put_u8(msg, NL80211_ATTR_KEY_IDX, key_id)) || (tsc && nla_put(msg, NL80211_ATTR_KEY_SEQ, 6, tsc))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void nl80211_send_beacon_hint_event(struct wiphy *wiphy, struct ieee80211_channel *channel_before, struct ieee80211_channel *channel_after) { struct sk_buff *msg; void *hdr; struct nlattr *nl_freq; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_BEACON_HINT); if (!hdr) { nlmsg_free(msg); return; } /* * Since we are applying the beacon hint to a wiphy we know its * wiphy_idx is valid */ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy))) goto nla_put_failure; /* Before */ nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_BEFORE); if (!nl_freq) goto nla_put_failure; if (nl80211_msg_put_channel(msg, wiphy, channel_before, false)) goto nla_put_failure; nla_nest_end(msg, nl_freq); /* After */ nl_freq = nla_nest_start_noflag(msg, NL80211_ATTR_FREQ_AFTER); if (!nl_freq) goto nla_put_failure; if (nl80211_msg_put_channel(msg, wiphy, channel_after, false)) goto nla_put_failure; nla_nest_end(msg, nl_freq); genlmsg_end(msg, hdr); genlmsg_multicast_allns(&nl80211_fam, msg, 0, NL80211_MCGRP_REGULATORY); return; nla_put_failure: nlmsg_free(msg); } static void nl80211_send_remain_on_chan_event( int cmd, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, NL80211_CHAN_NO_HT) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD)) goto nla_put_failure; if (cmd == NL80211_CMD_REMAIN_ON_CHANNEL && nla_put_u32(msg, NL80211_ATTR_DURATION, duration)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_assoc_comeback(struct net_device *netdev, const u8 *ap_addr, u32 timeout) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_assoc_comeback(wdev, ap_addr, timeout); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ASSOC_COMEBACK); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ap_addr) || nla_put_u32(msg, NL80211_ATTR_TIMEOUT, timeout)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_assoc_comeback); void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, duration, gfp); } EXPORT_SYMBOL(cfg80211_ready_on_channel); void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, 0, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_tx_mgmt_expired(wdev, cookie, chan); nl80211_send_remain_on_chan_event(NL80211_CMD_FRAME_WAIT_CANCEL, rdev, wdev, cookie, chan, 0, gfp); } EXPORT_SYMBOL(cfg80211_tx_mgmt_expired); void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; trace_cfg80211_new_sta(dev, mac_addr, sinfo); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); } EXPORT_SYMBOL(cfg80211_new_sta); void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct station_info empty_sinfo = {}; if (!sinfo) sinfo = &empty_sinfo; trace_cfg80211_del_sta(dev, mac_addr); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) { cfg80211_sinfo_release_content(sinfo); return; } if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, rdev, dev, mac_addr, sinfo) < 0) { nlmsg_free(msg); return; } genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); } EXPORT_SYMBOL(cfg80211_del_sta_sinfo); void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr, enum nl80211_connect_failed_reason reason, gfp_t gfp) { struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_GOODSIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONN_FAILED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || nla_put_u32(msg, NL80211_ATTR_CONN_FAILED_REASON, reason)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_conn_failed); static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) return false; msg = nlmsg_new(100, gfp); if (!msg) return true; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) { nlmsg_free(msg); return true; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: nlmsg_free(msg); return true; } bool cfg80211_rx_spurious_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; bool ret; trace_cfg80211_rx_spurious_frame(dev, addr); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO)) { trace_cfg80211_return_bool(false); return false; } ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME, addr, gfp); trace_cfg80211_return_bool(ret); return ret; } EXPORT_SYMBOL(cfg80211_rx_spurious_frame); bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev, const u8 *addr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; bool ret; trace_cfg80211_rx_unexpected_4addr_frame(dev, addr); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO && wdev->iftype != NL80211_IFTYPE_AP_VLAN)) { trace_cfg80211_return_bool(false); return false; } ret = __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_4ADDR_FRAME, addr, gfp); trace_cfg80211_return_bool(ret); return ret; } EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame); int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u32 nlportid, struct cfg80211_rx_info *info, gfp_t gfp) { struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; msg = nlmsg_new(100 + info->len, gfp); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOMEM; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || (info->have_link_id && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, info->link_id)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(info->freq)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, info->freq % 1000) || (info->sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, info->sig_dbm)) || nla_put(msg, NL80211_ATTR_FRAME, info->len, info->buf) || (info->flags && nla_put_u32(msg, NL80211_ATTR_RXMGMT_FLAGS, info->flags)) || (info->rx_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_RX_HW_TIMESTAMP, info->rx_tstamp, NL80211_ATTR_PAD)) || (info->ack_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_TX_HW_TIMESTAMP, info->ack_tstamp, NL80211_ATTR_PAD))) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } static void nl80211_frame_tx_status(struct wireless_dev *wdev, struct cfg80211_tx_status *status, gfp_t gfp, enum nl80211_commands command) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct net_device *netdev = wdev->netdev; struct sk_buff *msg; void *hdr; if (command == NL80211_CMD_FRAME_TX_STATUS) trace_cfg80211_mgmt_tx_status(wdev, status->cookie, status->ack); else trace_cfg80211_control_port_tx_status(wdev, status->cookie, status->ack); msg = nlmsg_new(100 + status->len, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, command); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_FRAME, status->len, status->buf) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, status->cookie, NL80211_ATTR_PAD) || (status->ack && nla_put_flag(msg, NL80211_ATTR_ACK)) || (status->tx_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_TX_HW_TIMESTAMP, status->tx_tstamp, NL80211_ATTR_PAD)) || (status->ack_tstamp && nla_put_u64_64bit(msg, NL80211_ATTR_RX_HW_TIMESTAMP, status->ack_tstamp, NL80211_ATTR_PAD))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie, const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct cfg80211_tx_status status = { .cookie = cookie, .buf = buf, .len = len, .ack = ack }; nl80211_frame_tx_status(wdev, &status, gfp, NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS); } EXPORT_SYMBOL(cfg80211_control_port_tx_status); void cfg80211_mgmt_tx_status_ext(struct wireless_dev *wdev, struct cfg80211_tx_status *status, gfp_t gfp) { nl80211_frame_tx_status(wdev, status, gfp, NL80211_CMD_FRAME_TX_STATUS); } EXPORT_SYMBOL(cfg80211_mgmt_tx_status_ext); static int __nl80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, bool unencrypted, int link_id, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ethhdr *ehdr = eth_hdr(skb); const u8 *addr = ehdr->h_source; u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; struct nlattr *frame; u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid); if (!nlportid) return -ENOENT; msg = nlmsg_new(100 + skb->len, gfp); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CONTROL_PORT_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOBUFS; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (link_id >= 0 && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) goto nla_put_failure; frame = nla_reserve(msg, NL80211_ATTR_FRAME, skb->len); if (!frame) goto nla_put_failure; skb_copy_bits(skb, 0, nla_data(frame), skb->len); genlmsg_end(msg, hdr); return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } bool cfg80211_rx_control_port(struct net_device *dev, struct sk_buff *skb, bool unencrypted, int link_id) { int ret; trace_cfg80211_rx_control_port(dev, skb, unencrypted, link_id); ret = __nl80211_rx_control_port(dev, skb, unencrypted, link_id, GFP_ATOMIC); trace_cfg80211_return_bool(ret == 0); return ret == 0; } EXPORT_SYMBOL(cfg80211_rx_control_port); static struct sk_buff *cfg80211_prepare_cqm(struct net_device *dev, const char *mac, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); void **cb; if (!msg) return NULL; cb = (void **)msg->cb; cb[0] = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM); if (!cb[0]) { nlmsg_free(msg); return NULL; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; if (mac && nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) goto nla_put_failure; cb[1] = nla_nest_start_noflag(msg, NL80211_ATTR_CQM); if (!cb[1]) goto nla_put_failure; cb[2] = rdev; return msg; nla_put_failure: nlmsg_free(msg); return NULL; } static void cfg80211_send_cqm(struct sk_buff *msg, gfp_t gfp) { void **cb = (void **)msg->cb; struct cfg80211_registered_device *rdev = cb[2]; nla_nest_end(msg, cb[1]); genlmsg_end(msg, cb[0]); memset(msg->cb, 0, sizeof(msg->cb)); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); } void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, s32 rssi_level, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_cqm_config *cqm_config; trace_cfg80211_cqm_rssi_notify(dev, rssi_event, rssi_level); if (WARN_ON(rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW && rssi_event != NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH)) return; rcu_read_lock(); cqm_config = rcu_dereference(wdev->cqm_config); if (cqm_config) { cqm_config->last_rssi_event_value = rssi_level; cqm_config->last_rssi_event_type = rssi_event; wiphy_work_queue(wdev->wiphy, &wdev->cqm_rssi_work); } rcu_read_unlock(); } EXPORT_SYMBOL(cfg80211_cqm_rssi_notify); void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, struct wiphy_work *work) { struct wireless_dev *wdev = container_of(work, struct wireless_dev, cqm_rssi_work); struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_cqm_rssi_threshold_event rssi_event; struct cfg80211_cqm_config *cqm_config; struct sk_buff *msg; s32 rssi_level; cqm_config = wiphy_dereference(wdev->wiphy, wdev->cqm_config); if (!cqm_config) return; if (cqm_config->use_range_api) cfg80211_cqm_rssi_update(rdev, wdev->netdev, cqm_config); rssi_level = cqm_config->last_rssi_event_value; rssi_event = cqm_config->last_rssi_event_type; msg = cfg80211_prepare_cqm(wdev->netdev, NULL, GFP_KERNEL); if (!msg) return; if (nla_put_u32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT, rssi_event)) goto nla_put_failure; if (rssi_level && nla_put_s32(msg, NL80211_ATTR_CQM_RSSI_LEVEL, rssi_level)) goto nla_put_failure; cfg80211_send_cqm(msg, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_cqm_txe_notify(struct net_device *dev, const u8 *peer, u32 num_packets, u32 rate, u32 intvl, gfp_t gfp) { struct sk_buff *msg; msg = cfg80211_prepare_cqm(dev, peer, gfp); if (!msg) return; if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_PKTS, num_packets)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_RATE, rate)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_CQM_TXE_INTVL, intvl)) goto nla_put_failure; cfg80211_send_cqm(msg, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_cqm_txe_notify); void cfg80211_cqm_pktloss_notify(struct net_device *dev, const u8 *peer, u32 num_packets, gfp_t gfp) { struct sk_buff *msg; trace_cfg80211_cqm_pktloss_notify(dev, peer, num_packets); msg = cfg80211_prepare_cqm(dev, peer, gfp); if (!msg) return; if (nla_put_u32(msg, NL80211_ATTR_CQM_PKT_LOSS_EVENT, num_packets)) goto nla_put_failure; cfg80211_send_cqm(msg, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_cqm_pktloss_notify); void cfg80211_cqm_beacon_loss_notify(struct net_device *dev, gfp_t gfp) { struct sk_buff *msg; msg = cfg80211_prepare_cqm(dev, NULL, gfp); if (!msg) return; if (nla_put_flag(msg, NL80211_ATTR_CQM_BEACON_LOSS_EVENT)) goto nla_put_failure; cfg80211_send_cqm(msg, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_cqm_beacon_loss_notify); static void nl80211_gtk_rekey_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { struct sk_buff *msg; struct nlattr *rekey_attr; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_REKEY_OFFLOAD); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) goto nla_put_failure; rekey_attr = nla_nest_start_noflag(msg, NL80211_ATTR_REKEY_DATA); if (!rekey_attr) goto nla_put_failure; if (nla_put(msg, NL80211_REKEY_DATA_REPLAY_CTR, NL80211_REPLAY_CTR_LEN, replay_ctr)) goto nla_put_failure; nla_nest_end(msg, rekey_attr); genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid, const u8 *replay_ctr, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_gtk_rekey_notify(dev, bssid); nl80211_gtk_rekey_notify(rdev, dev, bssid, replay_ctr, gfp); } EXPORT_SYMBOL(cfg80211_gtk_rekey_notify); static void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, int index, const u8 *bssid, bool preauth, gfp_t gfp) { struct sk_buff *msg; struct nlattr *attr; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PMKSA_CANDIDATE); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; attr = nla_nest_start_noflag(msg, NL80211_ATTR_PMKSA_CANDIDATE); if (!attr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_PMKSA_CANDIDATE_INDEX, index) || nla_put(msg, NL80211_PMKSA_CANDIDATE_BSSID, ETH_ALEN, bssid) || (preauth && nla_put_flag(msg, NL80211_PMKSA_CANDIDATE_PREAUTH))) goto nla_put_failure; nla_nest_end(msg, attr); genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index, const u8 *bssid, bool preauth, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); trace_cfg80211_pmksa_candidate_notify(dev, index, bssid, preauth); nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp); } EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify); static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, struct net_device *netdev, unsigned int link_id, struct cfg80211_chan_def *chandef, gfp_t gfp, enum nl80211_commands notif, u8 count, bool quiet) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, notif); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex)) goto nla_put_failure; if (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; if (notif == NL80211_CMD_CH_SWITCH_STARTED_NOTIFY) { if (nla_put_u32(msg, NL80211_ATTR_CH_SWITCH_COUNT, count)) goto nla_put_failure; if (quiet && nla_put_flag(msg, NL80211_ATTR_CH_SWITCH_BLOCK_TX)) goto nla_put_failure; } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_ch_switch_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_notify(dev, chandef, link_id); switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: if (!WARN_ON(!wdev->links[link_id].client.current_bss)) cfg80211_update_assoc_bss_entry(wdev, link_id, chandef->chan); break; case NL80211_IFTYPE_MESH_POINT: wdev->u.mesh.chandef = *chandef; wdev->u.mesh.preset_chandef = *chandef; break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: wdev->links[link_id].ap.chandef = *chandef; break; case NL80211_IFTYPE_ADHOC: wdev->u.ibss.chandef = *chandef; break; default: WARN_ON(1); break; } cfg80211_schedule_channels_check(wdev); cfg80211_sched_dfs_chan_update(rdev); nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0, false); } EXPORT_SYMBOL(cfg80211_ch_switch_notify); void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, bool quiet) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_wiphy(wdev->wiphy); WARN_INVALID_LINK_ID(wdev, link_id); trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id); nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_STARTED_NOTIFY, count, quiet); } EXPORT_SYMBOL(cfg80211_ch_switch_started_notify); int cfg80211_bss_color_notify(struct net_device *dev, enum nl80211_commands cmd, u8 count, u64 color_bitmap, u8 link_id) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; lockdep_assert_wiphy(wdev->wiphy); trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; if (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)) goto nla_put_failure; if (cmd == NL80211_CMD_COLOR_CHANGE_STARTED && nla_put_u32(msg, NL80211_ATTR_COLOR_CHANGE_COUNT, count)) goto nla_put_failure; if (cmd == NL80211_CMD_OBSS_COLOR_COLLISION && nla_put_u64_64bit(msg, NL80211_ATTR_OBSS_COLOR_BITMAP, color_bitmap, NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); nla_put_failure: nlmsg_free(msg); return -EINVAL; } EXPORT_SYMBOL(cfg80211_bss_color_notify); void nl80211_radar_notify(struct cfg80211_registered_device *rdev, const struct cfg80211_chan_def *chandef, enum nl80211_radar_event event, struct net_device *netdev, gfp_t gfp) { struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_RADAR_DETECT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) goto nla_put_failure; /* NOP and radar events don't need a netdev parameter */ if (netdev) { struct wireless_dev *wdev = netdev->ieee80211_ptr; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; } if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) goto nla_put_failure; if (nl80211_send_chandef(msg, chandef)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } void cfg80211_sta_opmode_change_notify(struct net_device *dev, const u8 *mac, struct sta_opmode_info *sta_opmode, gfp_t gfp) { struct sk_buff *msg; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); void *hdr; if (WARN_ON(!mac)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STA_OPMODE_CHANGED); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac)) goto nla_put_failure; if ((sta_opmode->changed & STA_OPMODE_SMPS_MODE_CHANGED) && nla_put_u8(msg, NL80211_ATTR_SMPS_MODE, sta_opmode->smps_mode)) goto nla_put_failure; if ((sta_opmode->changed & STA_OPMODE_MAX_BW_CHANGED) && nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, sta_opmode->bw)) goto nla_put_failure; if ((sta_opmode->changed & STA_OPMODE_N_SS_CHANGED) && nla_put_u8(msg, NL80211_ATTR_NSS, sta_opmode->rx_nss)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_sta_opmode_change_notify); void cfg80211_probe_status(struct net_device *dev, const u8 *addr, u64 cookie, bool acked, s32 ack_signal, bool is_valid_ack_signal, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_probe_status(dev, addr, cookie, acked); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PROBE_CLIENT); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, NL80211_ATTR_PAD) || (acked && nla_put_flag(msg, NL80211_ATTR_ACK)) || (is_valid_ack_signal && nla_put_s32(msg, NL80211_ATTR_ACK_SIGNAL, ack_signal))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_probe_status); void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame, size_t len, int freq, int sig_dbm) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; struct cfg80211_beacon_registration *reg; trace_cfg80211_report_obss_beacon(wiphy, frame, len, freq, sig_dbm); spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry(reg, &rdev->beacon_registrations, list) { msg = nlmsg_new(len + 100, GFP_ATOMIC); if (!msg) { spin_unlock_bh(&rdev->beacon_registrations_lock); return; } hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || (freq && (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, freq % 1000))) || (sig_dbm && nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) || nla_put(msg, NL80211_ATTR_FRAME, len, frame)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, reg->nlportid); } spin_unlock_bh(&rdev->beacon_registrations_lock); return; nla_put_failure: spin_unlock_bh(&rdev->beacon_registrations_lock); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_report_obss_beacon_khz); #ifdef CONFIG_PM static int cfg80211_net_detect_results(struct sk_buff *msg, struct cfg80211_wowlan_wakeup *wakeup) { struct cfg80211_wowlan_nd_info *nd = wakeup->net_detect; struct nlattr *nl_results, *nl_match, *nl_freqs; int i, j; nl_results = nla_nest_start_noflag(msg, NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS); if (!nl_results) return -EMSGSIZE; for (i = 0; i < nd->n_matches; i++) { struct cfg80211_wowlan_nd_match *match = nd->matches[i]; nl_match = nla_nest_start_noflag(msg, i); if (!nl_match) break; /* The SSID attribute is optional in nl80211, but for * simplicity reasons it's always present in the * cfg80211 structure. If a driver can't pass the * SSID, that needs to be changed. A zero length SSID * is still a valid SSID (wildcard), so it cannot be * used for this purpose. */ if (nla_put(msg, NL80211_ATTR_SSID, match->ssid.ssid_len, match->ssid.ssid)) { nla_nest_cancel(msg, nl_match); goto out; } if (match->n_channels) { nl_freqs = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES); if (!nl_freqs) { nla_nest_cancel(msg, nl_match); goto out; } for (j = 0; j < match->n_channels; j++) { if (nla_put_u32(msg, j, match->channels[j])) { nla_nest_cancel(msg, nl_freqs); nla_nest_cancel(msg, nl_match); goto out; } } nla_nest_end(msg, nl_freqs); } nla_nest_end(msg, nl_match); } out: nla_nest_end(msg, nl_results); return 0; } void cfg80211_report_wowlan_wakeup(struct wireless_dev *wdev, struct cfg80211_wowlan_wakeup *wakeup, gfp_t gfp) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; int size = 200; trace_cfg80211_report_wowlan_wakeup(wdev->wiphy, wdev, wakeup); if (wakeup) size += wakeup->packet_present_len; msg = nlmsg_new(size, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_SET_WOWLAN); if (!hdr) goto free_msg; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto free_msg; if (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) goto free_msg; if (wakeup) { struct nlattr *reasons; reasons = nla_nest_start_noflag(msg, NL80211_ATTR_WOWLAN_TRIGGERS); if (!reasons) goto free_msg; if (wakeup->disconnect && nla_put_flag(msg, NL80211_WOWLAN_TRIG_DISCONNECT)) goto free_msg; if (wakeup->magic_pkt && nla_put_flag(msg, NL80211_WOWLAN_TRIG_MAGIC_PKT)) goto free_msg; if (wakeup->gtk_rekey_failure && nla_put_flag(msg, NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE)) goto free_msg; if (wakeup->eap_identity_req && nla_put_flag(msg, NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST)) goto free_msg; if (wakeup->four_way_handshake && nla_put_flag(msg, NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE)) goto free_msg; if (wakeup->rfkill_release && nla_put_flag(msg, NL80211_WOWLAN_TRIG_RFKILL_RELEASE)) goto free_msg; if (wakeup->pattern_idx >= 0 && nla_put_u32(msg, NL80211_WOWLAN_TRIG_PKT_PATTERN, wakeup->pattern_idx)) goto free_msg; if (wakeup->tcp_match && nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_MATCH)) goto free_msg; if (wakeup->tcp_connlost && nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_CONNLOST)) goto free_msg; if (wakeup->tcp_nomoretokens && nla_put_flag(msg, NL80211_WOWLAN_TRIG_WAKEUP_TCP_NOMORETOKENS)) goto free_msg; if (wakeup->unprot_deauth_disassoc && nla_put_flag(msg, NL80211_WOWLAN_TRIG_UNPROTECTED_DEAUTH_DISASSOC)) goto free_msg; if (wakeup->packet) { u32 pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211; u32 len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_80211_LEN; if (!wakeup->packet_80211) { pkt_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023; len_attr = NL80211_WOWLAN_TRIG_WAKEUP_PKT_8023_LEN; } if (wakeup->packet_len && nla_put_u32(msg, len_attr, wakeup->packet_len)) goto free_msg; if (nla_put(msg, pkt_attr, wakeup->packet_present_len, wakeup->packet)) goto free_msg; } if (wakeup->net_detect && cfg80211_net_detect_results(msg, wakeup)) goto free_msg; nla_nest_end(msg, reasons); } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; free_msg: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_report_wowlan_wakeup); #endif void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, enum nl80211_tdls_operation oper, u16 reason_code, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, reason_code); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_TDLS_OPER); if (!hdr) { nlmsg_free(msg); return; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u8(msg, NL80211_ATTR_TDLS_OPERATION, oper) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer) || (reason_code > 0 && nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_tdls_oper_request); static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) { struct netlink_notify *notify = _notify; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; struct cfg80211_beacon_registration *reg, *tmp; if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC) return NOTIFY_DONE; rcu_read_lock(); list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { struct cfg80211_sched_scan_request *sched_scan_req; list_for_each_entry_rcu(sched_scan_req, &rdev->sched_scan_req_list, list) { if (sched_scan_req->owner_nlportid == notify->portid) { sched_scan_req->nl_owner_dead = true; wiphy_work_queue(&rdev->wiphy, &rdev->sched_scan_stop_wk); } } list_for_each_entry_rcu(wdev, &rdev->wiphy.wdev_list, list) { cfg80211_mlme_unregister_socket(wdev, notify->portid); if (wdev->owner_nlportid == notify->portid) { wdev->nl_owner_dead = true; schedule_work(&rdev->destroy_work); } else if (wdev->conn_owner_nlportid == notify->portid) { schedule_work(&wdev->disconnect_wk); } cfg80211_release_pmsr(wdev, notify->portid); } spin_lock_bh(&rdev->beacon_registrations_lock); list_for_each_entry_safe(reg, tmp, &rdev->beacon_registrations, list) { if (reg->nlportid == notify->portid) { list_del(&reg->list); kfree(reg); break; } } spin_unlock_bh(&rdev->beacon_registrations_lock); } rcu_read_unlock(); /* * It is possible that the user space process that is controlling the * indoor setting disappeared, so notify the regulatory core. */ regulatory_netlink_notify(notify->portid); return NOTIFY_OK; } static struct notifier_block nl80211_netlink_notifier = { .notifier_call = nl80211_netlink_notify, }; void cfg80211_ft_event(struct net_device *netdev, struct cfg80211_ft_event_params *ft_event) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_ft_event(wiphy, netdev, ft_event); if (!ft_event->target_ap) return; msg = nlmsg_new(100 + ft_event->ies_len + ft_event->ric_ies_len, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FT_EVENT); if (!hdr) goto out; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, ft_event->target_ap)) goto out; if (ft_event->ies && nla_put(msg, NL80211_ATTR_IE, ft_event->ies_len, ft_event->ies)) goto out; if (ft_event->ric_ies && nla_put(msg, NL80211_ATTR_IE_RIC, ft_event->ric_ies_len, ft_event->ric_ies)) goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; out: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_ft_event); void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp) { struct cfg80211_registered_device *rdev; struct sk_buff *msg; void *hdr; u32 nlportid; rdev = wiphy_to_rdev(wdev->wiphy); if (!rdev->crit_proto_nlportid) return; nlportid = rdev->crit_proto_nlportid; rdev->crit_proto_nlportid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_CRIT_PROTOCOL_STOP); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_crit_proto_stopped); void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_STOP_AP); if (!hdr) goto out; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || (wdev->valid_links && nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id))) goto out; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; out: nlmsg_free(msg); } int cfg80211_external_auth_request(struct net_device *dev, struct cfg80211_external_auth_params *params, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; if (!wdev->conn_owner_nlportid) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return -ENOMEM; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EXTERNAL_AUTH); if (!hdr) goto nla_put_failure; /* Some historical mistakes in drivers <-> userspace interface (notably * between drivers and wpa_supplicant) led to a big-endian conversion * being needed on NL80211_ATTR_AKM_SUITES _only_ when its value is * WLAN_AKM_SUITE_SAE. This is now fixed on userspace side, but for the * benefit of older wpa_supplicant versions, send this particular value * in big-endian. Note that newer wpa_supplicant will also detect this * particular value in big endian still, so it all continues to work. */ if (params->key_mgmt_suite == WLAN_AKM_SUITE_SAE) { if (nla_put_be32(msg, NL80211_ATTR_AKM_SUITES, cpu_to_be32(WLAN_AKM_SUITE_SAE))) goto nla_put_failure; } else { if (nla_put_u32(msg, NL80211_ATTR_AKM_SUITES, params->key_mgmt_suite)) goto nla_put_failure; } if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u32(msg, NL80211_ATTR_EXTERNAL_AUTH_ACTION, params->action) || nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) || nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len, params->ssid.ssid) || (!is_zero_ether_addr(params->mld_addr) && nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, params->mld_addr))) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, wdev->conn_owner_nlportid); return 0; nla_put_failure: nlmsg_free(msg); return -ENOBUFS; } EXPORT_SYMBOL(cfg80211_external_auth_request); void cfg80211_update_owe_info_event(struct net_device *netdev, struct cfg80211_update_owe_info *owe_info, gfp_t gfp) { struct wiphy *wiphy = netdev->ieee80211_ptr->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_update_owe_info_event(wiphy, netdev, owe_info); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UPDATE_OWE_INFO); if (!hdr) goto nla_put_failure; if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, owe_info->peer)) goto nla_put_failure; if (!owe_info->ie_len || nla_put(msg, NL80211_ATTR_IE, owe_info->ie_len, owe_info->ie)) goto nla_put_failure; if (owe_info->assoc_link_id != -1) { if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, owe_info->assoc_link_id)) goto nla_put_failure; if (!is_zero_ether_addr(owe_info->peer_mld_addr) && nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, owe_info->peer_mld_addr)) goto nla_put_failure; } genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, gfp); return; nla_put_failure: genlmsg_cancel(msg, hdr); nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_update_owe_info_event); void cfg80211_schedule_channels_check(struct wireless_dev *wdev) { struct wiphy *wiphy = wdev->wiphy; /* Schedule channels check if NO_IR or DFS relaxations are supported */ if (wdev->iftype == NL80211_IFTYPE_STATION && (wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_CONCURRENT) || (IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) && wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))) reg_check_channels(); } EXPORT_SYMBOL(cfg80211_schedule_channels_check); void cfg80211_epcs_changed(struct net_device *netdev, bool enabled) { struct wireless_dev *wdev = netdev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; void *hdr; trace_cfg80211_epcs_changed(wdev, enabled); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_EPCS_CFG); if (!hdr) { nlmsg_free(msg); return; } if (enabled && nla_put_flag(msg, NL80211_ATTR_EPCS)) goto nla_put_failure; genlmsg_end(msg, hdr); genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL); return; nla_put_failure: nlmsg_free(msg); } EXPORT_SYMBOL(cfg80211_epcs_changed); /* initialisation/exit functions */ int __init nl80211_init(void) { int err; err = genl_register_family(&nl80211_fam); if (err) return err; err = netlink_register_notifier(&nl80211_netlink_notifier); if (err) goto err_out; return 0; err_out: genl_unregister_family(&nl80211_fam); return err; } void nl80211_exit(void) { netlink_unregister_notifier(&nl80211_netlink_notifier); genl_unregister_family(&nl80211_fam); }
128 488 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_PGTABLE_64_H #define _ASM_X86_PGTABLE_64_H #include <linux/const.h> #include <asm/pgtable_64_types.h> #ifndef __ASSEMBLY__ /* * This file contains the functions and defines necessary to modify and use * the x86-64 page table tree. */ #include <asm/processor.h> #include <linux/bitops.h> #include <linux/threads.h> #include <asm/fixmap.h> extern p4d_t level4_kernel_pgt[512]; extern p4d_t level4_ident_pgt[512]; extern pud_t level3_kernel_pgt[512]; extern pud_t level3_ident_pgt[512]; extern pmd_t level2_kernel_pgt[512]; extern pmd_t level2_fixmap_pgt[512]; extern pmd_t level2_ident_pgt[512]; extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM]; extern pgd_t init_top_pgt[]; #define swapper_pg_dir init_top_pgt extern void paging_init(void); static inline void sync_initial_page_table(void) { } #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pte_val(e)) #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pmd_val(e)) #define pud_ERROR(e) \ pr_err("%s:%d: bad pud %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pud_val(e)) #if CONFIG_PGTABLE_LEVELS >= 5 #define p4d_ERROR(e) \ pr_err("%s:%d: bad p4d %p(%016lx)\n", \ __FILE__, __LINE__, &(e), p4d_val(e)) #endif #define pgd_ERROR(e) \ pr_err("%s:%d: bad pgd %p(%016lx)\n", \ __FILE__, __LINE__, &(e), pgd_val(e)) struct mm_struct; #define mm_p4d_folded mm_p4d_folded static inline bool mm_p4d_folded(struct mm_struct *mm) { return !pgtable_l5_enabled(); } void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte); void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte); static inline void native_set_pte(pte_t *ptep, pte_t pte) { WRITE_ONCE(*ptep, pte); } static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { native_set_pte(ptep, native_make_pte(0)); } static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { native_set_pte(ptep, pte); } static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) { WRITE_ONCE(*pmdp, pmd); } static inline void native_pmd_clear(pmd_t *pmd) { native_set_pmd(pmd, native_make_pmd(0)); } static inline pte_t native_ptep_get_and_clear(pte_t *xp) { #ifdef CONFIG_SMP return native_make_pte(xchg(&xp->pte, 0)); #else /* native_local_ptep_get_and_clear, but duplicated because of cyclic dependency */ pte_t ret = *xp; native_pte_clear(NULL, 0, xp); return ret; #endif } static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) { #ifdef CONFIG_SMP return native_make_pmd(xchg(&xp->pmd, 0)); #else /* native_local_pmdp_get_and_clear, but duplicated because of cyclic dependency */ pmd_t ret = *xp; native_pmd_clear(xp); return ret; #endif } static inline void native_set_pud(pud_t *pudp, pud_t pud) { WRITE_ONCE(*pudp, pud); } static inline void native_pud_clear(pud_t *pud) { native_set_pud(pud, native_make_pud(0)); } static inline pud_t native_pudp_get_and_clear(pud_t *xp) { #ifdef CONFIG_SMP return native_make_pud(xchg(&xp->pud, 0)); #else /* native_local_pudp_get_and_clear, * but duplicated because of cyclic dependency */ pud_t ret = *xp; native_pud_clear(xp); return ret; #endif } static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_MITIGATION_PAGE_TABLE_ISOLATION)) { WRITE_ONCE(*p4dp, p4d); return; } pgd = native_make_pgd(native_p4d_val(p4d)); pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd); WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd))); } static inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd)); } static inline void native_pgd_clear(pgd_t *pgd) { native_set_pgd(pgd, native_make_pgd(0)); } /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ /* PGD - Level 4 access */ /* PUD - Level 3 access */ /* PMD - Level 2 access */ /* PTE - Level 1 access */ /* * Encode and de-code a swap entry * * | ... | 11| 10| 9|8|7|6|5| 4| 3|2| 1|0| <- bit number * | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U| W|P| <- bit names * | TYPE (59-63) | ~OFFSET (9-58) |0|0|X|X| X| E|F|SD|0| <- swp entry * * G (8) is aliased and used as a PROT_NONE indicator for * !present ptes. We need to start storing swap entries above * there. We also need to avoid using A and D because of an * erratum where they can be incorrectly set by hardware on * non-present PTEs. * * SD Bits 1-4 are not used in non-present format and available for * special use described below: * * SD (1) in swp entry is used to store soft dirty bit, which helps us * remember soft dirty over page migration * * F (2) in swp entry is used to record when a pagetable is * writeprotected by userfaultfd WP support. * * E (3) in swp entry is used to remember PG_anon_exclusive. * * Bit 7 in swp entry should be 0 because pmd_present checks not only P, * but also L and G. * * The offset is inverted by a binary not operation to make the high * physical bits set. */ #define SWP_TYPE_BITS 5 #define SWP_OFFSET_FIRST_BIT (_PAGE_BIT_PROTNONE + 1) /* We always extract/encode the offset by shifting it all the way up, and then down again */ #define SWP_OFFSET_SHIFT (SWP_OFFSET_FIRST_BIT+SWP_TYPE_BITS) #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) /* Extract the high bits for type */ #define __swp_type(x) ((x).val >> (64 - SWP_TYPE_BITS)) /* Shift up (to get rid of type), then down to get value */ #define __swp_offset(x) (~(x).val << SWP_TYPE_BITS >> SWP_OFFSET_SHIFT) /* * Shift the offset up "too far" by TYPE bits, then down again * The offset is inverted by a binary not operation to make the high * physical bits set. */ #define __swp_entry(type, offset) ((swp_entry_t) { \ (~(unsigned long)(offset) << SWP_OFFSET_SHIFT >> SWP_TYPE_BITS) \ | ((unsigned long)(type) << (64-SWP_TYPE_BITS)) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val((pmd)) }) #define __swp_entry_to_pte(x) (__pte((x).val)) #define __swp_entry_to_pmd(x) (__pmd((x).val)) extern void cleanup_highmap(void); #define HAVE_ARCH_UNMAPPED_AREA #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define PAGE_AGP PAGE_KERNEL_NOCACHE #define HAVE_PAGE_AGP 1 /* fs/proc/kcore.c */ #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) #define __HAVE_ARCH_PTE_SAME #define vmemmap ((struct page *)VMEMMAP_START) extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); #define gup_fast_permitted gup_fast_permitted static inline bool gup_fast_permitted(unsigned long start, unsigned long end) { if (end >> __VIRTUAL_MASK_SHIFT) return false; return true; } #include <asm/pgtable-invert.h> #else /* __ASSEMBLY__ */ #define l4_index(x) (((x) >> 39) & 511) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4) L4_START_KERNEL = l4_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map) #define SYM_DATA_START_PAGE_ALIGNED(name) \ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE) /* Automate the creation of 1 to 1 mapping pmd entries */ #define PMDS(START, PERM, COUNT) \ i = 0 ; \ .rept (COUNT) ; \ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \ i = i + 1 ; \ .endr #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */
56 56 56 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 /* SPDX-License-Identifier: GPL-2.0 * * IO cost model based controller. * * Copyright (C) 2019 Tejun Heo <tj@kernel.org> * Copyright (C) 2019 Andy Newell <newella@fb.com> * Copyright (C) 2019 Facebook * * One challenge of controlling IO resources is the lack of trivially * observable cost metric. This is distinguished from CPU and memory where * wallclock time and the number of bytes can serve as accurate enough * approximations. * * Bandwidth and iops are the most commonly used metrics for IO devices but * depending on the type and specifics of the device, different IO patterns * easily lead to multiple orders of magnitude variations rendering them * useless for the purpose of IO capacity distribution. While on-device * time, with a lot of clutches, could serve as a useful approximation for * non-queued rotational devices, this is no longer viable with modern * devices, even the rotational ones. * * While there is no cost metric we can trivially observe, it isn't a * complete mystery. For example, on a rotational device, seek cost * dominates while a contiguous transfer contributes a smaller amount * proportional to the size. If we can characterize at least the relative * costs of these different types of IOs, it should be possible to * implement a reasonable work-conserving proportional IO resource * distribution. * * 1. IO Cost Model * * IO cost model estimates the cost of an IO given its basic parameters and * history (e.g. the end sector of the last IO). The cost is measured in * device time. If a given IO is estimated to cost 10ms, the device should * be able to process ~100 of those IOs in a second. * * Currently, there's only one builtin cost model - linear. Each IO is * classified as sequential or random and given a base cost accordingly. * On top of that, a size cost proportional to the length of the IO is * added. While simple, this model captures the operational * characteristics of a wide varienty of devices well enough. Default * parameters for several different classes of devices are provided and the * parameters can be configured from userspace via * /sys/fs/cgroup/io.cost.model. * * If needed, tools/cgroup/iocost_coef_gen.py can be used to generate * device-specific coefficients. * * 2. Control Strategy * * The device virtual time (vtime) is used as the primary control metric. * The control strategy is composed of the following three parts. * * 2-1. Vtime Distribution * * When a cgroup becomes active in terms of IOs, its hierarchical share is * calculated. Please consider the following hierarchy where the numbers * inside parentheses denote the configured weights. * * root * / \ * A (w:100) B (w:300) * / \ * A0 (w:100) A1 (w:100) * * If B is idle and only A0 and A1 are actively issuing IOs, as the two are * of equal weight, each gets 50% share. If then B starts issuing IOs, B * gets 300/(100+300) or 75% share, and A0 and A1 equally splits the rest, * 12.5% each. The distribution mechanism only cares about these flattened * shares. They're called hweights (hierarchical weights) and always add * upto 1 (WEIGHT_ONE). * * A given cgroup's vtime runs slower in inverse proportion to its hweight. * For example, with 12.5% weight, A0's time runs 8 times slower (100/12.5) * against the device vtime - an IO which takes 10ms on the underlying * device is considered to take 80ms on A0. * * This constitutes the basis of IO capacity distribution. Each cgroup's * vtime is running at a rate determined by its hweight. A cgroup tracks * the vtime consumed by past IOs and can issue a new IO if doing so * wouldn't outrun the current device vtime. Otherwise, the IO is * suspended until the vtime has progressed enough to cover it. * * 2-2. Vrate Adjustment * * It's unrealistic to expect the cost model to be perfect. There are too * many devices and even on the same device the overall performance * fluctuates depending on numerous factors such as IO mixture and device * internal garbage collection. The controller needs to adapt dynamically. * * This is achieved by adjusting the overall IO rate according to how busy * the device is. If the device becomes overloaded, we're sending down too * many IOs and should generally slow down. If there are waiting issuers * but the device isn't saturated, we're issuing too few and should * generally speed up. * * To slow down, we lower the vrate - the rate at which the device vtime * passes compared to the wall clock. For example, if the vtime is running * at the vrate of 75%, all cgroups added up would only be able to issue * 750ms worth of IOs per second, and vice-versa for speeding up. * * Device business is determined using two criteria - rq wait and * completion latencies. * * When a device gets saturated, the on-device and then the request queues * fill up and a bio which is ready to be issued has to wait for a request * to become available. When this delay becomes noticeable, it's a clear * indication that the device is saturated and we lower the vrate. This * saturation signal is fairly conservative as it only triggers when both * hardware and software queues are filled up, and is used as the default * busy signal. * * As devices can have deep queues and be unfair in how the queued commands * are executed, solely depending on rq wait may not result in satisfactory * control quality. For a better control quality, completion latency QoS * parameters can be configured so that the device is considered saturated * if N'th percentile completion latency rises above the set point. * * The completion latency requirements are a function of both the * underlying device characteristics and the desired IO latency quality of * service. There is an inherent trade-off - the tighter the latency QoS, * the higher the bandwidth lossage. Latency QoS is disabled by default * and can be set through /sys/fs/cgroup/io.cost.qos. * * 2-3. Work Conservation * * Imagine two cgroups A and B with equal weights. A is issuing a small IO * periodically while B is sending out enough parallel IOs to saturate the * device on its own. Let's say A's usage amounts to 100ms worth of IO * cost per second, i.e., 10% of the device capacity. The naive * distribution of half and half would lead to 60% utilization of the * device, a significant reduction in the total amount of work done * compared to free-for-all competition. This is too high a cost to pay * for IO control. * * To conserve the total amount of work done, we keep track of how much * each active cgroup is actually using and yield part of its weight if * there are other cgroups which can make use of it. In the above case, * A's weight will be lowered so that it hovers above the actual usage and * B would be able to use the rest. * * As we don't want to penalize a cgroup for donating its weight, the * surplus weight adjustment factors in a margin and has an immediate * snapback mechanism in case the cgroup needs more IO vtime for itself. * * Note that adjusting down surplus weights has the same effects as * accelerating vtime for other cgroups and work conservation can also be * implemented by adjusting vrate dynamically. However, squaring who can * donate and should take back how much requires hweight propagations * anyway making it easier to implement and understand as a separate * mechanism. * * 3. Monitoring * * Instead of debugfs or other clumsy monitoring mechanisms, this * controller uses a drgn based monitoring script - * tools/cgroup/iocost_monitor.py. For details on drgn, please see * https://github.com/osandov/drgn. The output looks like the following. * * sdb RUN per=300ms cur_per=234.218:v203.695 busy= +1 vrate= 62.12% * active weight hweight% inflt% dbt delay usages% * test/a * 50/ 50 33.33/ 33.33 27.65 2 0*041 033:033:033 * test/b * 100/ 100 66.67/ 66.67 17.56 0 0*000 066:079:077 * * - per : Timer period * - cur_per : Internal wall and device vtime clock * - vrate : Device virtual time rate against wall clock * - weight : Surplus-adjusted and configured weights * - hweight : Surplus-adjusted and configured hierarchical weights * - inflt : The percentage of in-flight IO cost at the end of last period * - del_ms : Deferred issuer delay induction level and duration * - usages : Usage history */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/timer.h> #include <linux/time64.h> #include <linux/parser.h> #include <linux/sched/signal.h> #include <asm/local.h> #include <asm/local64.h> #include "blk-rq-qos.h" #include "blk-stat.h" #include "blk-wbt.h" #include "blk-cgroup.h" #ifdef CONFIG_TRACEPOINTS /* copied from TRACE_CGROUP_PATH, see cgroup-internal.h */ #define TRACE_IOCG_PATH_LEN 1024 static DEFINE_SPINLOCK(trace_iocg_path_lock); static char trace_iocg_path[TRACE_IOCG_PATH_LEN]; #define TRACE_IOCG_PATH(type, iocg, ...) \ do { \ unsigned long flags; \ if (trace_iocost_##type##_enabled()) { \ spin_lock_irqsave(&trace_iocg_path_lock, flags); \ cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup, \ trace_iocg_path, TRACE_IOCG_PATH_LEN); \ trace_iocost_##type(iocg, trace_iocg_path, \ ##__VA_ARGS__); \ spin_unlock_irqrestore(&trace_iocg_path_lock, flags); \ } \ } while (0) #else /* CONFIG_TRACE_POINTS */ #define TRACE_IOCG_PATH(type, iocg, ...) do { } while (0) #endif /* CONFIG_TRACE_POINTS */ enum { MILLION = 1000000, /* timer period is calculated from latency requirements, bound it */ MIN_PERIOD = USEC_PER_MSEC, MAX_PERIOD = USEC_PER_SEC, /* * iocg->vtime is targeted at 50% behind the device vtime, which * serves as its IO credit buffer. Surplus weight adjustment is * immediately canceled if the vtime margin runs below 10%. */ MARGIN_MIN_PCT = 10, MARGIN_LOW_PCT = 20, MARGIN_TARGET_PCT = 50, INUSE_ADJ_STEP_PCT = 25, /* Have some play in timer operations */ TIMER_SLACK_PCT = 1, /* 1/64k is granular enough and can easily be handled w/ u32 */ WEIGHT_ONE = 1 << 16, }; enum { /* * As vtime is used to calculate the cost of each IO, it needs to * be fairly high precision. For example, it should be able to * represent the cost of a single page worth of discard with * suffificient accuracy. At the same time, it should be able to * represent reasonably long enough durations to be useful and * convenient during operation. * * 1s worth of vtime is 2^37. This gives us both sub-nanosecond * granularity and days of wrap-around time even at extreme vrates. */ VTIME_PER_SEC_SHIFT = 37, VTIME_PER_SEC = 1LLU << VTIME_PER_SEC_SHIFT, VTIME_PER_USEC = VTIME_PER_SEC / USEC_PER_SEC, VTIME_PER_NSEC = VTIME_PER_SEC / NSEC_PER_SEC, /* bound vrate adjustments within two orders of magnitude */ VRATE_MIN_PPM = 10000, /* 1% */ VRATE_MAX_PPM = 100000000, /* 10000% */ VRATE_MIN = VTIME_PER_USEC * VRATE_MIN_PPM / MILLION, VRATE_CLAMP_ADJ_PCT = 4, /* switch iff the conditions are met for longer than this */ AUTOP_CYCLE_NSEC = 10LLU * NSEC_PER_SEC, }; enum { /* if IOs end up waiting for requests, issue less */ RQ_WAIT_BUSY_PCT = 5, /* unbusy hysterisis */ UNBUSY_THR_PCT = 75, /* * The effect of delay is indirect and non-linear and a huge amount of * future debt can accumulate abruptly while unthrottled. Linearly scale * up delay as debt is going up and then let it decay exponentially. * This gives us quick ramp ups while delay is accumulating and long * tails which can help reducing the frequency of debt explosions on * unthrottle. The parameters are experimentally determined. * * The delay mechanism provides adequate protection and behavior in many * cases. However, this is far from ideal and falls shorts on both * fronts. The debtors are often throttled too harshly costing a * significant level of fairness and possibly total work while the * protection against their impacts on the system can be choppy and * unreliable. * * The shortcoming primarily stems from the fact that, unlike for page * cache, the kernel doesn't have well-defined back-pressure propagation * mechanism and policies for anonymous memory. Fully addressing this * issue will likely require substantial improvements in the area. */ MIN_DELAY_THR_PCT = 500, MAX_DELAY_THR_PCT = 25000, MIN_DELAY = 250, MAX_DELAY = 250 * USEC_PER_MSEC, /* halve debts if avg usage over 100ms is under 50% */ DFGV_USAGE_PCT = 50, DFGV_PERIOD = 100 * USEC_PER_MSEC, /* don't let cmds which take a very long time pin lagging for too long */ MAX_LAGGING_PERIODS = 10, /* * Count IO size in 4k pages. The 12bit shift helps keeping * size-proportional components of cost calculation in closer * numbers of digits to per-IO cost components. */ IOC_PAGE_SHIFT = 12, IOC_PAGE_SIZE = 1 << IOC_PAGE_SHIFT, IOC_SECT_TO_PAGE_SHIFT = IOC_PAGE_SHIFT - SECTOR_SHIFT, /* if apart further than 16M, consider randio for linear model */ LCOEF_RANDIO_PAGES = 4096, }; enum ioc_running { IOC_IDLE, IOC_RUNNING, IOC_STOP, }; /* io.cost.qos controls including per-dev enable of the whole controller */ enum { QOS_ENABLE, QOS_CTRL, NR_QOS_CTRL_PARAMS, }; /* io.cost.qos params */ enum { QOS_RPPM, QOS_RLAT, QOS_WPPM, QOS_WLAT, QOS_MIN, QOS_MAX, NR_QOS_PARAMS, }; /* io.cost.model controls */ enum { COST_CTRL, COST_MODEL, NR_COST_CTRL_PARAMS, }; /* builtin linear cost model coefficients */ enum { I_LCOEF_RBPS, I_LCOEF_RSEQIOPS, I_LCOEF_RRANDIOPS, I_LCOEF_WBPS, I_LCOEF_WSEQIOPS, I_LCOEF_WRANDIOPS, NR_I_LCOEFS, }; enum { LCOEF_RPAGE, LCOEF_RSEQIO, LCOEF_RRANDIO, LCOEF_WPAGE, LCOEF_WSEQIO, LCOEF_WRANDIO, NR_LCOEFS, }; enum { AUTOP_INVALID, AUTOP_HDD, AUTOP_SSD_QD1, AUTOP_SSD_DFL, AUTOP_SSD_FAST, }; struct ioc_params { u32 qos[NR_QOS_PARAMS]; u64 i_lcoefs[NR_I_LCOEFS]; u64 lcoefs[NR_LCOEFS]; u32 too_fast_vrate_pct; u32 too_slow_vrate_pct; }; struct ioc_margins { s64 min; s64 low; s64 target; }; struct ioc_missed { local_t nr_met; local_t nr_missed; u32 last_met; u32 last_missed; }; struct ioc_pcpu_stat { struct ioc_missed missed[2]; local64_t rq_wait_ns; u64 last_rq_wait_ns; }; /* per device */ struct ioc { struct rq_qos rqos; bool enabled; struct ioc_params params; struct ioc_margins margins; u32 period_us; u32 timer_slack_ns; u64 vrate_min; u64 vrate_max; spinlock_t lock; struct timer_list timer; struct list_head active_iocgs; /* active cgroups */ struct ioc_pcpu_stat __percpu *pcpu_stat; enum ioc_running running; atomic64_t vtime_rate; u64 vtime_base_rate; s64 vtime_err; seqcount_spinlock_t period_seqcount; u64 period_at; /* wallclock starttime */ u64 period_at_vtime; /* vtime starttime */ atomic64_t cur_period; /* inc'd each period */ int busy_level; /* saturation history */ bool weights_updated; atomic_t hweight_gen; /* for lazy hweights */ /* debt forgivness */ u64 dfgv_period_at; u64 dfgv_period_rem; u64 dfgv_usage_us_sum; u64 autop_too_fast_at; u64 autop_too_slow_at; int autop_idx; bool user_qos_params:1; bool user_cost_model:1; }; struct iocg_pcpu_stat { local64_t abs_vusage; }; struct iocg_stat { u64 usage_us; u64 wait_us; u64 indebt_us; u64 indelay_us; }; /* per device-cgroup pair */ struct ioc_gq { struct blkg_policy_data pd; struct ioc *ioc; /* * A iocg can get its weight from two sources - an explicit * per-device-cgroup configuration or the default weight of the * cgroup. `cfg_weight` is the explicit per-device-cgroup * configuration. `weight` is the effective considering both * sources. * * When an idle cgroup becomes active its `active` goes from 0 to * `weight`. `inuse` is the surplus adjusted active weight. * `active` and `inuse` are used to calculate `hweight_active` and * `hweight_inuse`. * * `last_inuse` remembers `inuse` while an iocg is idle to persist * surplus adjustments. * * `inuse` may be adjusted dynamically during period. `saved_*` are used * to determine and track adjustments. */ u32 cfg_weight; u32 weight; u32 active; u32 inuse; u32 last_inuse; s64 saved_margin; sector_t cursor; /* to detect randio */ /* * `vtime` is this iocg's vtime cursor which progresses as IOs are * issued. If lagging behind device vtime, the delta represents * the currently available IO budget. If running ahead, the * overage. * * `vtime_done` is the same but progressed on completion rather * than issue. The delta behind `vtime` represents the cost of * currently in-flight IOs. */ atomic64_t vtime; atomic64_t done_vtime; u64 abs_vdebt; /* current delay in effect and when it started */ u64 delay; u64 delay_at; /* * The period this iocg was last active in. Used for deactivation * and invalidating `vtime`. */ atomic64_t active_period; struct list_head active_list; /* see __propagate_weights() and current_hweight() for details */ u64 child_active_sum; u64 child_inuse_sum; u64 child_adjusted_sum; int hweight_gen; u32 hweight_active; u32 hweight_inuse; u32 hweight_donating; u32 hweight_after_donation; struct list_head walk_list; struct list_head surplus_list; struct wait_queue_head waitq; struct hrtimer waitq_timer; /* timestamp at the latest activation */ u64 activated_at; /* statistics */ struct iocg_pcpu_stat __percpu *pcpu_stat; struct iocg_stat stat; struct iocg_stat last_stat; u64 last_stat_abs_vusage; u64 usage_delta_us; u64 wait_since; u64 indebt_since; u64 indelay_since; /* this iocg's depth in the hierarchy and ancestors including self */ int level; struct ioc_gq *ancestors[]; }; /* per cgroup */ struct ioc_cgrp { struct blkcg_policy_data cpd; unsigned int dfl_weight; }; struct ioc_now { u64 now_ns; u64 now; u64 vnow; }; struct iocg_wait { struct wait_queue_entry wait; struct bio *bio; u64 abs_cost; bool committed; }; struct iocg_wake_ctx { struct ioc_gq *iocg; u32 hw_inuse; s64 vbudget; }; static const struct ioc_params autop[] = { [AUTOP_HDD] = { .qos = { [QOS_RLAT] = 250000, /* 250ms */ [QOS_WLAT] = 250000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 174019176, [I_LCOEF_RSEQIOPS] = 41708, [I_LCOEF_RRANDIOPS] = 370, [I_LCOEF_WBPS] = 178075866, [I_LCOEF_WSEQIOPS] = 42705, [I_LCOEF_WRANDIOPS] = 378, }, }, [AUTOP_SSD_QD1] = { .qos = { [QOS_RLAT] = 25000, /* 25ms */ [QOS_WLAT] = 25000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 245855193, [I_LCOEF_RSEQIOPS] = 61575, [I_LCOEF_RRANDIOPS] = 6946, [I_LCOEF_WBPS] = 141365009, [I_LCOEF_WSEQIOPS] = 33716, [I_LCOEF_WRANDIOPS] = 26796, }, }, [AUTOP_SSD_DFL] = { .qos = { [QOS_RLAT] = 25000, /* 25ms */ [QOS_WLAT] = 25000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 488636629, [I_LCOEF_RSEQIOPS] = 8932, [I_LCOEF_RRANDIOPS] = 8518, [I_LCOEF_WBPS] = 427891549, [I_LCOEF_WSEQIOPS] = 28755, [I_LCOEF_WRANDIOPS] = 21940, }, .too_fast_vrate_pct = 500, }, [AUTOP_SSD_FAST] = { .qos = { [QOS_RLAT] = 5000, /* 5ms */ [QOS_WLAT] = 5000, [QOS_MIN] = VRATE_MIN_PPM, [QOS_MAX] = VRATE_MAX_PPM, }, .i_lcoefs = { [I_LCOEF_RBPS] = 3102524156LLU, [I_LCOEF_RSEQIOPS] = 724816, [I_LCOEF_RRANDIOPS] = 778122, [I_LCOEF_WBPS] = 1742780862LLU, [I_LCOEF_WSEQIOPS] = 425702, [I_LCOEF_WRANDIOPS] = 443193, }, .too_slow_vrate_pct = 10, }, }; /* * vrate adjust percentages indexed by ioc->busy_level. We adjust up on * vtime credit shortage and down on device saturation. */ static const u32 vrate_adj_pct[] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 16 }; static struct blkcg_policy blkcg_policy_iocost; /* accessors and helpers */ static struct ioc *rqos_to_ioc(struct rq_qos *rqos) { return container_of(rqos, struct ioc, rqos); } static struct ioc *q_to_ioc(struct request_queue *q) { return rqos_to_ioc(rq_qos_id(q, RQ_QOS_COST)); } static const char __maybe_unused *ioc_name(struct ioc *ioc) { struct gendisk *disk = ioc->rqos.disk; if (!disk) return "<unknown>"; return disk->disk_name; } static struct ioc_gq *pd_to_iocg(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct ioc_gq, pd) : NULL; } static struct ioc_gq *blkg_to_iocg(struct blkcg_gq *blkg) { return pd_to_iocg(blkg_to_pd(blkg, &blkcg_policy_iocost)); } static struct blkcg_gq *iocg_to_blkg(struct ioc_gq *iocg) { return pd_to_blkg(&iocg->pd); } static struct ioc_cgrp *blkcg_to_iocc(struct blkcg *blkcg) { return container_of(blkcg_to_cpd(blkcg, &blkcg_policy_iocost), struct ioc_cgrp, cpd); } /* * Scale @abs_cost to the inverse of @hw_inuse. The lower the hierarchical * weight, the more expensive each IO. Must round up. */ static u64 abs_cost_to_cost(u64 abs_cost, u32 hw_inuse) { return DIV64_U64_ROUND_UP(abs_cost * WEIGHT_ONE, hw_inuse); } /* * The inverse of abs_cost_to_cost(). Must round up. */ static u64 cost_to_abs_cost(u64 cost, u32 hw_inuse) { return DIV64_U64_ROUND_UP(cost * hw_inuse, WEIGHT_ONE); } static void iocg_commit_bio(struct ioc_gq *iocg, struct bio *bio, u64 abs_cost, u64 cost) { struct iocg_pcpu_stat *gcs; bio->bi_iocost_cost = cost; atomic64_add(cost, &iocg->vtime); gcs = get_cpu_ptr(iocg->pcpu_stat); local64_add(abs_cost, &gcs->abs_vusage); put_cpu_ptr(gcs); } static void iocg_lock(struct ioc_gq *iocg, bool lock_ioc, unsigned long *flags) { if (lock_ioc) { spin_lock_irqsave(&iocg->ioc->lock, *flags); spin_lock(&iocg->waitq.lock); } else { spin_lock_irqsave(&iocg->waitq.lock, *flags); } } static void iocg_unlock(struct ioc_gq *iocg, bool unlock_ioc, unsigned long *flags) { if (unlock_ioc) { spin_unlock(&iocg->waitq.lock); spin_unlock_irqrestore(&iocg->ioc->lock, *flags); } else { spin_unlock_irqrestore(&iocg->waitq.lock, *flags); } } #define CREATE_TRACE_POINTS #include <trace/events/iocost.h> static void ioc_refresh_margins(struct ioc *ioc) { struct ioc_margins *margins = &ioc->margins; u32 period_us = ioc->period_us; u64 vrate = ioc->vtime_base_rate; margins->min = (period_us * MARGIN_MIN_PCT / 100) * vrate; margins->low = (period_us * MARGIN_LOW_PCT / 100) * vrate; margins->target = (period_us * MARGIN_TARGET_PCT / 100) * vrate; } /* latency Qos params changed, update period_us and all the dependent params */ static void ioc_refresh_period_us(struct ioc *ioc) { u32 ppm, lat, multi, period_us; lockdep_assert_held(&ioc->lock); /* pick the higher latency target */ if (ioc->params.qos[QOS_RLAT] >= ioc->params.qos[QOS_WLAT]) { ppm = ioc->params.qos[QOS_RPPM]; lat = ioc->params.qos[QOS_RLAT]; } else { ppm = ioc->params.qos[QOS_WPPM]; lat = ioc->params.qos[QOS_WLAT]; } /* * We want the period to be long enough to contain a healthy number * of IOs while short enough for granular control. Define it as a * multiple of the latency target. Ideally, the multiplier should * be scaled according to the percentile so that it would nominally * contain a certain number of requests. Let's be simpler and * scale it linearly so that it's 2x >= pct(90) and 10x at pct(50). */ if (ppm) multi = max_t(u32, (MILLION - ppm) / 50000, 2); else multi = 2; period_us = multi * lat; period_us = clamp_t(u32, period_us, MIN_PERIOD, MAX_PERIOD); /* calculate dependent params */ ioc->period_us = period_us; ioc->timer_slack_ns = div64_u64( (u64)period_us * NSEC_PER_USEC * TIMER_SLACK_PCT, 100); ioc_refresh_margins(ioc); } /* * ioc->rqos.disk isn't initialized when this function is called from * the init path. */ static int ioc_autop_idx(struct ioc *ioc, struct gendisk *disk) { int idx = ioc->autop_idx; const struct ioc_params *p = &autop[idx]; u32 vrate_pct; u64 now_ns; /* rotational? */ if (!blk_queue_nonrot(disk->queue)) return AUTOP_HDD; /* handle SATA SSDs w/ broken NCQ */ if (blk_queue_depth(disk->queue) == 1) return AUTOP_SSD_QD1; /* use one of the normal ssd sets */ if (idx < AUTOP_SSD_DFL) return AUTOP_SSD_DFL; /* if user is overriding anything, maintain what was there */ if (ioc->user_qos_params || ioc->user_cost_model) return idx; /* step up/down based on the vrate */ vrate_pct = div64_u64(ioc->vtime_base_rate * 100, VTIME_PER_USEC); now_ns = blk_time_get_ns(); if (p->too_fast_vrate_pct && p->too_fast_vrate_pct <= vrate_pct) { if (!ioc->autop_too_fast_at) ioc->autop_too_fast_at = now_ns; if (now_ns - ioc->autop_too_fast_at >= AUTOP_CYCLE_NSEC) return idx + 1; } else { ioc->autop_too_fast_at = 0; } if (p->too_slow_vrate_pct && p->too_slow_vrate_pct >= vrate_pct) { if (!ioc->autop_too_slow_at) ioc->autop_too_slow_at = now_ns; if (now_ns - ioc->autop_too_slow_at >= AUTOP_CYCLE_NSEC) return idx - 1; } else { ioc->autop_too_slow_at = 0; } return idx; } /* * Take the followings as input * * @bps maximum sequential throughput * @seqiops maximum sequential 4k iops * @randiops maximum random 4k iops * * and calculate the linear model cost coefficients. * * *@page per-page cost 1s / (@bps / 4096) * *@seqio base cost of a seq IO max((1s / @seqiops) - *@page, 0) * @randiops base cost of a rand IO max((1s / @randiops) - *@page, 0) */ static void calc_lcoefs(u64 bps, u64 seqiops, u64 randiops, u64 *page, u64 *seqio, u64 *randio) { u64 v; *page = *seqio = *randio = 0; if (bps) { u64 bps_pages = DIV_ROUND_UP_ULL(bps, IOC_PAGE_SIZE); if (bps_pages) *page = DIV64_U64_ROUND_UP(VTIME_PER_SEC, bps_pages); else *page = 1; } if (seqiops) { v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, seqiops); if (v > *page) *seqio = v - *page; } if (randiops) { v = DIV64_U64_ROUND_UP(VTIME_PER_SEC, randiops); if (v > *page) *randio = v - *page; } } static void ioc_refresh_lcoefs(struct ioc *ioc) { u64 *u = ioc->params.i_lcoefs; u64 *c = ioc->params.lcoefs; calc_lcoefs(u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], &c[LCOEF_RPAGE], &c[LCOEF_RSEQIO], &c[LCOEF_RRANDIO]); calc_lcoefs(u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS], &c[LCOEF_WPAGE], &c[LCOEF_WSEQIO], &c[LCOEF_WRANDIO]); } /* * struct gendisk is required as an argument because ioc->rqos.disk * is not properly initialized when called from the init path. */ static bool ioc_refresh_params_disk(struct ioc *ioc, bool force, struct gendisk *disk) { const struct ioc_params *p; int idx; lockdep_assert_held(&ioc->lock); idx = ioc_autop_idx(ioc, disk); p = &autop[idx]; if (idx == ioc->autop_idx && !force) return false; if (idx != ioc->autop_idx) { atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC); ioc->vtime_base_rate = VTIME_PER_USEC; } ioc->autop_idx = idx; ioc->autop_too_fast_at = 0; ioc->autop_too_slow_at = 0; if (!ioc->user_qos_params) memcpy(ioc->params.qos, p->qos, sizeof(p->qos)); if (!ioc->user_cost_model) memcpy(ioc->params.i_lcoefs, p->i_lcoefs, sizeof(p->i_lcoefs)); ioc_refresh_period_us(ioc); ioc_refresh_lcoefs(ioc); ioc->vrate_min = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MIN] * VTIME_PER_USEC, MILLION); ioc->vrate_max = DIV64_U64_ROUND_UP((u64)ioc->params.qos[QOS_MAX] * VTIME_PER_USEC, MILLION); return true; } static bool ioc_refresh_params(struct ioc *ioc, bool force) { return ioc_refresh_params_disk(ioc, force, ioc->rqos.disk); } /* * When an iocg accumulates too much vtime or gets deactivated, we throw away * some vtime, which lowers the overall device utilization. As the exact amount * which is being thrown away is known, we can compensate by accelerating the * vrate accordingly so that the extra vtime generated in the current period * matches what got lost. */ static void ioc_refresh_vrate(struct ioc *ioc, struct ioc_now *now) { s64 pleft = ioc->period_at + ioc->period_us - now->now; s64 vperiod = ioc->period_us * ioc->vtime_base_rate; s64 vcomp, vcomp_min, vcomp_max; lockdep_assert_held(&ioc->lock); /* we need some time left in this period */ if (pleft <= 0) goto done; /* * Calculate how much vrate should be adjusted to offset the error. * Limit the amount of adjustment and deduct the adjusted amount from * the error. */ vcomp = -div64_s64(ioc->vtime_err, pleft); vcomp_min = -(ioc->vtime_base_rate >> 1); vcomp_max = ioc->vtime_base_rate; vcomp = clamp(vcomp, vcomp_min, vcomp_max); ioc->vtime_err += vcomp * pleft; atomic64_set(&ioc->vtime_rate, ioc->vtime_base_rate + vcomp); done: /* bound how much error can accumulate */ ioc->vtime_err = clamp(ioc->vtime_err, -vperiod, vperiod); } static void ioc_adjust_base_vrate(struct ioc *ioc, u32 rq_wait_pct, int nr_lagging, int nr_shortages, int prev_busy_level, u32 *missed_ppm) { u64 vrate = ioc->vtime_base_rate; u64 vrate_min = ioc->vrate_min, vrate_max = ioc->vrate_max; if (!ioc->busy_level || (ioc->busy_level < 0 && nr_lagging)) { if (ioc->busy_level != prev_busy_level || nr_lagging) trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages); return; } /* * If vrate is out of bounds, apply clamp gradually as the * bounds can change abruptly. Otherwise, apply busy_level * based adjustment. */ if (vrate < vrate_min) { vrate = div64_u64(vrate * (100 + VRATE_CLAMP_ADJ_PCT), 100); vrate = min(vrate, vrate_min); } else if (vrate > vrate_max) { vrate = div64_u64(vrate * (100 - VRATE_CLAMP_ADJ_PCT), 100); vrate = max(vrate, vrate_max); } else { int idx = min_t(int, abs(ioc->busy_level), ARRAY_SIZE(vrate_adj_pct) - 1); u32 adj_pct = vrate_adj_pct[idx]; if (ioc->busy_level > 0) adj_pct = 100 - adj_pct; else adj_pct = 100 + adj_pct; vrate = clamp(DIV64_U64_ROUND_UP(vrate * adj_pct, 100), vrate_min, vrate_max); } trace_iocost_ioc_vrate_adj(ioc, vrate, missed_ppm, rq_wait_pct, nr_lagging, nr_shortages); ioc->vtime_base_rate = vrate; ioc_refresh_margins(ioc); } /* take a snapshot of the current [v]time and vrate */ static void ioc_now(struct ioc *ioc, struct ioc_now *now) { unsigned seq; u64 vrate; now->now_ns = blk_time_get_ns(); now->now = ktime_to_us(now->now_ns); vrate = atomic64_read(&ioc->vtime_rate); /* * The current vtime is * * vtime at period start + (wallclock time since the start) * vrate * * As a consistent snapshot of `period_at_vtime` and `period_at` is * needed, they're seqcount protected. */ do { seq = read_seqcount_begin(&ioc->period_seqcount); now->vnow = ioc->period_at_vtime + (now->now - ioc->period_at) * vrate; } while (read_seqcount_retry(&ioc->period_seqcount, seq)); } static void ioc_start_period(struct ioc *ioc, struct ioc_now *now) { WARN_ON_ONCE(ioc->running != IOC_RUNNING); write_seqcount_begin(&ioc->period_seqcount); ioc->period_at = now->now; ioc->period_at_vtime = now->vnow; write_seqcount_end(&ioc->period_seqcount); ioc->timer.expires = jiffies + usecs_to_jiffies(ioc->period_us); add_timer(&ioc->timer); } /* * Update @iocg's `active` and `inuse` to @active and @inuse, update level * weight sums and propagate upwards accordingly. If @save, the current margin * is saved to be used as reference for later inuse in-period adjustments. */ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, bool save, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; int lvl; lockdep_assert_held(&ioc->lock); /* * For an active leaf node, its inuse shouldn't be zero or exceed * @active. An active internal node's inuse is solely determined by the * inuse to active ratio of its children regardless of @inuse. */ if (list_empty(&iocg->active_list) && iocg->child_active_sum) { inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum, iocg->child_active_sum); } else { /* * It may be tempting to turn this into a clamp expression with * a lower limit of 1 but active may be 0, which cannot be used * as an upper limit in that situation. This expression allows * active to clamp inuse unless it is 0, in which case inuse * becomes 1. */ inuse = min(inuse, active) ?: 1; } iocg->last_inuse = iocg->inuse; if (save) iocg->saved_margin = now->vnow - atomic64_read(&iocg->vtime); if (active == iocg->active && inuse == iocg->inuse) return; for (lvl = iocg->level - 1; lvl >= 0; lvl--) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; u32 parent_active = 0, parent_inuse = 0; /* update the level sums */ parent->child_active_sum += (s32)(active - child->active); parent->child_inuse_sum += (s32)(inuse - child->inuse); /* apply the updates */ child->active = active; child->inuse = inuse; /* * The delta between inuse and active sums indicates that * much of weight is being given away. Parent's inuse * and active should reflect the ratio. */ if (parent->child_active_sum) { parent_active = parent->weight; parent_inuse = DIV64_U64_ROUND_UP( parent_active * parent->child_inuse_sum, parent->child_active_sum); } /* do we need to keep walking up? */ if (parent_active == parent->active && parent_inuse == parent->inuse) break; active = parent_active; inuse = parent_inuse; } ioc->weights_updated = true; } static void commit_weights(struct ioc *ioc) { lockdep_assert_held(&ioc->lock); if (ioc->weights_updated) { /* paired with rmb in current_hweight(), see there */ smp_wmb(); atomic_inc(&ioc->hweight_gen); ioc->weights_updated = false; } } static void propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse, bool save, struct ioc_now *now) { __propagate_weights(iocg, active, inuse, save, now); commit_weights(iocg->ioc); } static void current_hweight(struct ioc_gq *iocg, u32 *hw_activep, u32 *hw_inusep) { struct ioc *ioc = iocg->ioc; int lvl; u32 hwa, hwi; int ioc_gen; /* hot path - if uptodate, use cached */ ioc_gen = atomic_read(&ioc->hweight_gen); if (ioc_gen == iocg->hweight_gen) goto out; /* * Paired with wmb in commit_weights(). If we saw the updated * hweight_gen, all the weight updates from __propagate_weights() are * visible too. * * We can race with weight updates during calculation and get it * wrong. However, hweight_gen would have changed and a future * reader will recalculate and we're guaranteed to discard the * wrong result soon. */ smp_rmb(); hwa = hwi = WEIGHT_ONE; for (lvl = 0; lvl <= iocg->level - 1; lvl++) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; u64 active_sum = READ_ONCE(parent->child_active_sum); u64 inuse_sum = READ_ONCE(parent->child_inuse_sum); u32 active = READ_ONCE(child->active); u32 inuse = READ_ONCE(child->inuse); /* we can race with deactivations and either may read as zero */ if (!active_sum || !inuse_sum) continue; active_sum = max_t(u64, active, active_sum); hwa = div64_u64((u64)hwa * active, active_sum); inuse_sum = max_t(u64, inuse, inuse_sum); hwi = div64_u64((u64)hwi * inuse, inuse_sum); } iocg->hweight_active = max_t(u32, hwa, 1); iocg->hweight_inuse = max_t(u32, hwi, 1); iocg->hweight_gen = ioc_gen; out: if (hw_activep) *hw_activep = iocg->hweight_active; if (hw_inusep) *hw_inusep = iocg->hweight_inuse; } /* * Calculate the hweight_inuse @iocg would get with max @inuse assuming all the * other weights stay unchanged. */ static u32 current_hweight_max(struct ioc_gq *iocg) { u32 hwm = WEIGHT_ONE; u32 inuse = iocg->active; u64 child_inuse_sum; int lvl; lockdep_assert_held(&iocg->ioc->lock); for (lvl = iocg->level - 1; lvl >= 0; lvl--) { struct ioc_gq *parent = iocg->ancestors[lvl]; struct ioc_gq *child = iocg->ancestors[lvl + 1]; child_inuse_sum = parent->child_inuse_sum + inuse - child->inuse; hwm = div64_u64((u64)hwm * inuse, child_inuse_sum); inuse = DIV64_U64_ROUND_UP(parent->active * child_inuse_sum, parent->child_active_sum); } return max_t(u32, hwm, 1); } static void weight_updated(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); struct ioc_cgrp *iocc = blkcg_to_iocc(blkg->blkcg); u32 weight; lockdep_assert_held(&ioc->lock); weight = iocg->cfg_weight ?: iocc->dfl_weight; if (weight != iocg->weight && iocg->active) propagate_weights(iocg, weight, iocg->inuse, true, now); iocg->weight = weight; } static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; u64 __maybe_unused last_period, cur_period; u64 vtime, vtarget; int i; /* * If seem to be already active, just update the stamp to tell the * timer that we're still active. We don't mind occassional races. */ if (!list_empty(&iocg->active_list)) { ioc_now(ioc, now); cur_period = atomic64_read(&ioc->cur_period); if (atomic64_read(&iocg->active_period) != cur_period) atomic64_set(&iocg->active_period, cur_period); return true; } /* racy check on internal node IOs, treat as root level IOs */ if (iocg->child_active_sum) return false; spin_lock_irq(&ioc->lock); ioc_now(ioc, now); /* update period */ cur_period = atomic64_read(&ioc->cur_period); last_period = atomic64_read(&iocg->active_period); atomic64_set(&iocg->active_period, cur_period); /* already activated or breaking leaf-only constraint? */ if (!list_empty(&iocg->active_list)) goto succeed_unlock; for (i = iocg->level - 1; i > 0; i--) if (!list_empty(&iocg->ancestors[i]->active_list)) goto fail_unlock; if (iocg->child_active_sum) goto fail_unlock; /* * Always start with the target budget. On deactivation, we throw away * anything above it. */ vtarget = now->vnow - ioc->margins.target; vtime = atomic64_read(&iocg->vtime); atomic64_add(vtarget - vtime, &iocg->vtime); atomic64_add(vtarget - vtime, &iocg->done_vtime); vtime = vtarget; /* * Activate, propagate weight and start period timer if not * running. Reset hweight_gen to avoid accidental match from * wrapping. */ iocg->hweight_gen = atomic_read(&ioc->hweight_gen) - 1; list_add(&iocg->active_list, &ioc->active_iocgs); propagate_weights(iocg, iocg->weight, iocg->last_inuse ?: iocg->weight, true, now); TRACE_IOCG_PATH(iocg_activate, iocg, now, last_period, cur_period, vtime); iocg->activated_at = now->now; if (ioc->running == IOC_IDLE) { ioc->running = IOC_RUNNING; ioc->dfgv_period_at = now->now; ioc->dfgv_period_rem = 0; ioc_start_period(ioc, now); } succeed_unlock: spin_unlock_irq(&ioc->lock); return true; fail_unlock: spin_unlock_irq(&ioc->lock); return false; } static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct blkcg_gq *blkg = iocg_to_blkg(iocg); u64 tdelta, delay, new_delay, shift; s64 vover, vover_pct; u32 hwa; lockdep_assert_held(&iocg->waitq.lock); /* * If the delay is set by another CPU, we may be in the past. No need to * change anything if so. This avoids decay calculation underflow. */ if (time_before64(now->now, iocg->delay_at)) return false; /* calculate the current delay in effect - 1/2 every second */ tdelta = now->now - iocg->delay_at; shift = div64_u64(tdelta, USEC_PER_SEC); if (iocg->delay && shift < BITS_PER_LONG) delay = iocg->delay >> shift; else delay = 0; /* calculate the new delay from the debt amount */ current_hweight(iocg, &hwa, NULL); vover = atomic64_read(&iocg->vtime) + abs_cost_to_cost(iocg->abs_vdebt, hwa) - now->vnow; vover_pct = div64_s64(100 * vover, ioc->period_us * ioc->vtime_base_rate); if (vover_pct <= MIN_DELAY_THR_PCT) new_delay = 0; else if (vover_pct >= MAX_DELAY_THR_PCT) new_delay = MAX_DELAY; else new_delay = MIN_DELAY + div_u64((MAX_DELAY - MIN_DELAY) * (vover_pct - MIN_DELAY_THR_PCT), MAX_DELAY_THR_PCT - MIN_DELAY_THR_PCT); /* pick the higher one and apply */ if (new_delay > delay) { iocg->delay = new_delay; iocg->delay_at = now->now; delay = new_delay; } if (delay >= MIN_DELAY) { if (!iocg->indelay_since) iocg->indelay_since = now->now; blkcg_set_delay(blkg, delay * NSEC_PER_USEC); return true; } else { if (iocg->indelay_since) { iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = 0; } iocg->delay = 0; blkcg_clear_delay(blkg); return false; } } static void iocg_incur_debt(struct ioc_gq *iocg, u64 abs_cost, struct ioc_now *now) { struct iocg_pcpu_stat *gcs; lockdep_assert_held(&iocg->ioc->lock); lockdep_assert_held(&iocg->waitq.lock); WARN_ON_ONCE(list_empty(&iocg->active_list)); /* * Once in debt, debt handling owns inuse. @iocg stays at the minimum * inuse donating all of it share to others until its debt is paid off. */ if (!iocg->abs_vdebt && abs_cost) { iocg->indebt_since = now->now; propagate_weights(iocg, iocg->active, 0, false, now); } iocg->abs_vdebt += abs_cost; gcs = get_cpu_ptr(iocg->pcpu_stat); local64_add(abs_cost, &gcs->abs_vusage); put_cpu_ptr(gcs); } static void iocg_pay_debt(struct ioc_gq *iocg, u64 abs_vpay, struct ioc_now *now) { lockdep_assert_held(&iocg->ioc->lock); lockdep_assert_held(&iocg->waitq.lock); /* * make sure that nobody messed with @iocg. Check iocg->pd.online * to avoid warn when removing blkcg or disk. */ WARN_ON_ONCE(list_empty(&iocg->active_list) && iocg->pd.online); WARN_ON_ONCE(iocg->inuse > 1); iocg->abs_vdebt -= min(abs_vpay, iocg->abs_vdebt); /* if debt is paid in full, restore inuse */ if (!iocg->abs_vdebt) { iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = 0; propagate_weights(iocg, iocg->active, iocg->last_inuse, false, now); } } static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode, int flags, void *key) { struct iocg_wait *wait = container_of(wq_entry, struct iocg_wait, wait); struct iocg_wake_ctx *ctx = key; u64 cost = abs_cost_to_cost(wait->abs_cost, ctx->hw_inuse); ctx->vbudget -= cost; if (ctx->vbudget < 0) return -1; iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost); wait->committed = true; /* * autoremove_wake_function() removes the wait entry only when it * actually changed the task state. We want the wait always removed. * Remove explicitly and use default_wake_function(). Note that the * order of operations is important as finish_wait() tests whether * @wq_entry is removed without grabbing the lock. */ default_wake_function(wq_entry, mode, flags, key); list_del_init_careful(&wq_entry->entry); return 0; } /* * Calculate the accumulated budget, pay debt if @pay_debt and wake up waiters * accordingly. When @pay_debt is %true, the caller must be holding ioc->lock in * addition to iocg->waitq.lock. */ static void iocg_kick_waitq(struct ioc_gq *iocg, bool pay_debt, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct iocg_wake_ctx ctx = { .iocg = iocg }; u64 vshortage, expires, oexpires; s64 vbudget; u32 hwa; lockdep_assert_held(&iocg->waitq.lock); current_hweight(iocg, &hwa, NULL); vbudget = now->vnow - atomic64_read(&iocg->vtime); /* pay off debt */ if (pay_debt && iocg->abs_vdebt && vbudget > 0) { u64 abs_vbudget = cost_to_abs_cost(vbudget, hwa); u64 abs_vpay = min_t(u64, abs_vbudget, iocg->abs_vdebt); u64 vpay = abs_cost_to_cost(abs_vpay, hwa); lockdep_assert_held(&ioc->lock); atomic64_add(vpay, &iocg->vtime); atomic64_add(vpay, &iocg->done_vtime); iocg_pay_debt(iocg, abs_vpay, now); vbudget -= vpay; } if (iocg->abs_vdebt || iocg->delay) iocg_kick_delay(iocg, now); /* * Debt can still be outstanding if we haven't paid all yet or the * caller raced and called without @pay_debt. Shouldn't wake up waiters * under debt. Make sure @vbudget reflects the outstanding amount and is * not positive. */ if (iocg->abs_vdebt) { s64 vdebt = abs_cost_to_cost(iocg->abs_vdebt, hwa); vbudget = min_t(s64, 0, vbudget - vdebt); } /* * Wake up the ones which are due and see how much vtime we'll need for * the next one. As paying off debt restores hw_inuse, it must be read * after the above debt payment. */ ctx.vbudget = vbudget; current_hweight(iocg, NULL, &ctx.hw_inuse); __wake_up_locked_key(&iocg->waitq, TASK_NORMAL, &ctx); if (!waitqueue_active(&iocg->waitq)) { if (iocg->wait_since) { iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = 0; } return; } if (!iocg->wait_since) iocg->wait_since = now->now; if (WARN_ON_ONCE(ctx.vbudget >= 0)) return; /* determine next wakeup, add a timer margin to guarantee chunking */ vshortage = -ctx.vbudget; expires = now->now_ns + DIV64_U64_ROUND_UP(vshortage, ioc->vtime_base_rate) * NSEC_PER_USEC; expires += ioc->timer_slack_ns; /* if already active and close enough, don't bother */ oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->waitq_timer)); if (hrtimer_is_queued(&iocg->waitq_timer) && abs(oexpires - expires) <= ioc->timer_slack_ns) return; hrtimer_start_range_ns(&iocg->waitq_timer, ns_to_ktime(expires), ioc->timer_slack_ns, HRTIMER_MODE_ABS); } static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer) { struct ioc_gq *iocg = container_of(timer, struct ioc_gq, waitq_timer); bool pay_debt = READ_ONCE(iocg->abs_vdebt); struct ioc_now now; unsigned long flags; ioc_now(iocg->ioc, &now); iocg_lock(iocg, pay_debt, &flags); iocg_kick_waitq(iocg, pay_debt, &now); iocg_unlock(iocg, pay_debt, &flags); return HRTIMER_NORESTART; } static void ioc_lat_stat(struct ioc *ioc, u32 *missed_ppm_ar, u32 *rq_wait_pct_p) { u32 nr_met[2] = { }; u32 nr_missed[2] = { }; u64 rq_wait_ns = 0; int cpu, rw; for_each_online_cpu(cpu) { struct ioc_pcpu_stat *stat = per_cpu_ptr(ioc->pcpu_stat, cpu); u64 this_rq_wait_ns; for (rw = READ; rw <= WRITE; rw++) { u32 this_met = local_read(&stat->missed[rw].nr_met); u32 this_missed = local_read(&stat->missed[rw].nr_missed); nr_met[rw] += this_met - stat->missed[rw].last_met; nr_missed[rw] += this_missed - stat->missed[rw].last_missed; stat->missed[rw].last_met = this_met; stat->missed[rw].last_missed = this_missed; } this_rq_wait_ns = local64_read(&stat->rq_wait_ns); rq_wait_ns += this_rq_wait_ns - stat->last_rq_wait_ns; stat->last_rq_wait_ns = this_rq_wait_ns; } for (rw = READ; rw <= WRITE; rw++) { if (nr_met[rw] + nr_missed[rw]) missed_ppm_ar[rw] = DIV64_U64_ROUND_UP((u64)nr_missed[rw] * MILLION, nr_met[rw] + nr_missed[rw]); else missed_ppm_ar[rw] = 0; } *rq_wait_pct_p = div64_u64(rq_wait_ns * 100, ioc->period_us * NSEC_PER_USEC); } /* was iocg idle this period? */ static bool iocg_is_idle(struct ioc_gq *iocg) { struct ioc *ioc = iocg->ioc; /* did something get issued this period? */ if (atomic64_read(&iocg->active_period) == atomic64_read(&ioc->cur_period)) return false; /* is something in flight? */ if (atomic64_read(&iocg->done_vtime) != atomic64_read(&iocg->vtime)) return false; return true; } /* * Call this function on the target leaf @iocg's to build pre-order traversal * list of all the ancestors in @inner_walk. The inner nodes are linked through * ->walk_list and the caller is responsible for dissolving the list after use. */ static void iocg_build_inner_walk(struct ioc_gq *iocg, struct list_head *inner_walk) { int lvl; WARN_ON_ONCE(!list_empty(&iocg->walk_list)); /* find the first ancestor which hasn't been visited yet */ for (lvl = iocg->level - 1; lvl >= 0; lvl--) { if (!list_empty(&iocg->ancestors[lvl]->walk_list)) break; } /* walk down and visit the inner nodes to get pre-order traversal */ while (++lvl <= iocg->level - 1) { struct ioc_gq *inner = iocg->ancestors[lvl]; /* record traversal order */ list_add_tail(&inner->walk_list, inner_walk); } } /* propagate the deltas to the parent */ static void iocg_flush_stat_upward(struct ioc_gq *iocg) { if (iocg->level > 0) { struct iocg_stat *parent_stat = &iocg->ancestors[iocg->level - 1]->stat; parent_stat->usage_us += iocg->stat.usage_us - iocg->last_stat.usage_us; parent_stat->wait_us += iocg->stat.wait_us - iocg->last_stat.wait_us; parent_stat->indebt_us += iocg->stat.indebt_us - iocg->last_stat.indebt_us; parent_stat->indelay_us += iocg->stat.indelay_us - iocg->last_stat.indelay_us; } iocg->last_stat = iocg->stat; } /* collect per-cpu counters and propagate the deltas to the parent */ static void iocg_flush_stat_leaf(struct ioc_gq *iocg, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; u64 abs_vusage = 0; u64 vusage_delta; int cpu; lockdep_assert_held(&iocg->ioc->lock); /* collect per-cpu counters */ for_each_possible_cpu(cpu) { abs_vusage += local64_read( per_cpu_ptr(&iocg->pcpu_stat->abs_vusage, cpu)); } vusage_delta = abs_vusage - iocg->last_stat_abs_vusage; iocg->last_stat_abs_vusage = abs_vusage; iocg->usage_delta_us = div64_u64(vusage_delta, ioc->vtime_base_rate); iocg->stat.usage_us += iocg->usage_delta_us; iocg_flush_stat_upward(iocg); } /* get stat counters ready for reading on all active iocgs */ static void iocg_flush_stat(struct list_head *target_iocgs, struct ioc_now *now) { LIST_HEAD(inner_walk); struct ioc_gq *iocg, *tiocg; /* flush leaves and build inner node walk list */ list_for_each_entry(iocg, target_iocgs, active_list) { iocg_flush_stat_leaf(iocg, now); iocg_build_inner_walk(iocg, &inner_walk); } /* keep flushing upwards by walking the inner list backwards */ list_for_each_entry_safe_reverse(iocg, tiocg, &inner_walk, walk_list) { iocg_flush_stat_upward(iocg); list_del_init(&iocg->walk_list); } } /* * Determine what @iocg's hweight_inuse should be after donating unused * capacity. @hwm is the upper bound and used to signal no donation. This * function also throws away @iocg's excess budget. */ static u32 hweight_after_donation(struct ioc_gq *iocg, u32 old_hwi, u32 hwm, u32 usage, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; u64 vtime = atomic64_read(&iocg->vtime); s64 excess, delta, target, new_hwi; /* debt handling owns inuse for debtors */ if (iocg->abs_vdebt) return 1; /* see whether minimum margin requirement is met */ if (waitqueue_active(&iocg->waitq) || time_after64(vtime, now->vnow - ioc->margins.min)) return hwm; /* throw away excess above target */ excess = now->vnow - vtime - ioc->margins.target; if (excess > 0) { atomic64_add(excess, &iocg->vtime); atomic64_add(excess, &iocg->done_vtime); vtime += excess; ioc->vtime_err -= div64_u64(excess * old_hwi, WEIGHT_ONE); } /* * Let's say the distance between iocg's and device's vtimes as a * fraction of period duration is delta. Assuming that the iocg will * consume the usage determined above, we want to determine new_hwi so * that delta equals MARGIN_TARGET at the end of the next period. * * We need to execute usage worth of IOs while spending the sum of the * new budget (1 - MARGIN_TARGET) and the leftover from the last period * (delta): * * usage = (1 - MARGIN_TARGET + delta) * new_hwi * * Therefore, the new_hwi is: * * new_hwi = usage / (1 - MARGIN_TARGET + delta) */ delta = div64_s64(WEIGHT_ONE * (now->vnow - vtime), now->vnow - ioc->period_at_vtime); target = WEIGHT_ONE * MARGIN_TARGET_PCT / 100; new_hwi = div64_s64(WEIGHT_ONE * usage, WEIGHT_ONE - target + delta); return clamp_t(s64, new_hwi, 1, hwm); } /* * For work-conservation, an iocg which isn't using all of its share should * donate the leftover to other iocgs. There are two ways to achieve this - 1. * bumping up vrate accordingly 2. lowering the donating iocg's inuse weight. * * #1 is mathematically simpler but has the drawback of requiring synchronous * global hweight_inuse updates when idle iocg's get activated or inuse weights * change due to donation snapbacks as it has the possibility of grossly * overshooting what's allowed by the model and vrate. * * #2 is inherently safe with local operations. The donating iocg can easily * snap back to higher weights when needed without worrying about impacts on * other nodes as the impacts will be inherently correct. This also makes idle * iocg activations safe. The only effect activations have is decreasing * hweight_inuse of others, the right solution to which is for those iocgs to * snap back to higher weights. * * So, we go with #2. The challenge is calculating how each donating iocg's * inuse should be adjusted to achieve the target donation amounts. This is done * using Andy's method described in the following pdf. * * https://drive.google.com/file/d/1PsJwxPFtjUnwOY1QJ5AeICCcsL7BM3bo * * Given the weights and target after-donation hweight_inuse values, Andy's * method determines how the proportional distribution should look like at each * sibling level to maintain the relative relationship between all non-donating * pairs. To roughly summarize, it divides the tree into donating and * non-donating parts, calculates global donation rate which is used to * determine the target hweight_inuse for each node, and then derives per-level * proportions. * * The following pdf shows that global distribution calculated this way can be * achieved by scaling inuse weights of donating leaves and propagating the * adjustments upwards proportionally. * * https://drive.google.com/file/d/1vONz1-fzVO7oY5DXXsLjSxEtYYQbOvsE * * Combining the above two, we can determine how each leaf iocg's inuse should * be adjusted to achieve the target donation. * * https://drive.google.com/file/d/1WcrltBOSPN0qXVdBgnKm4mdp9FhuEFQN * * The inline comments use symbols from the last pdf. * * b is the sum of the absolute budgets in the subtree. 1 for the root node. * f is the sum of the absolute budgets of non-donating nodes in the subtree. * t is the sum of the absolute budgets of donating nodes in the subtree. * w is the weight of the node. w = w_f + w_t * w_f is the non-donating portion of w. w_f = w * f / b * w_b is the donating portion of w. w_t = w * t / b * s is the sum of all sibling weights. s = Sum(w) for siblings * s_f and s_t are the non-donating and donating portions of s. * * Subscript p denotes the parent's counterpart and ' the adjusted value - e.g. * w_pt is the donating portion of the parent's weight and w'_pt the same value * after adjustments. Subscript r denotes the root node's values. */ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now) { LIST_HEAD(over_hwa); LIST_HEAD(inner_walk); struct ioc_gq *iocg, *tiocg, *root_iocg; u32 after_sum, over_sum, over_target, gamma; /* * It's pretty unlikely but possible for the total sum of * hweight_after_donation's to be higher than WEIGHT_ONE, which will * confuse the following calculations. If such condition is detected, * scale down everyone over its full share equally to keep the sum below * WEIGHT_ONE. */ after_sum = 0; over_sum = 0; list_for_each_entry(iocg, surpluses, surplus_list) { u32 hwa; current_hweight(iocg, &hwa, NULL); after_sum += iocg->hweight_after_donation; if (iocg->hweight_after_donation > hwa) { over_sum += iocg->hweight_after_donation; list_add(&iocg->walk_list, &over_hwa); } } if (after_sum >= WEIGHT_ONE) { /* * The delta should be deducted from the over_sum, calculate * target over_sum value. */ u32 over_delta = after_sum - (WEIGHT_ONE - 1); WARN_ON_ONCE(over_sum <= over_delta); over_target = over_sum - over_delta; } else { over_target = 0; } list_for_each_entry_safe(iocg, tiocg, &over_hwa, walk_list) { if (over_target) iocg->hweight_after_donation = div_u64((u64)iocg->hweight_after_donation * over_target, over_sum); list_del_init(&iocg->walk_list); } /* * Build pre-order inner node walk list and prepare for donation * adjustment calculations. */ list_for_each_entry(iocg, surpluses, surplus_list) { iocg_build_inner_walk(iocg, &inner_walk); } root_iocg = list_first_entry(&inner_walk, struct ioc_gq, walk_list); WARN_ON_ONCE(root_iocg->level > 0); list_for_each_entry(iocg, &inner_walk, walk_list) { iocg->child_adjusted_sum = 0; iocg->hweight_donating = 0; iocg->hweight_after_donation = 0; } /* * Propagate the donating budget (b_t) and after donation budget (b'_t) * up the hierarchy. */ list_for_each_entry(iocg, surpluses, surplus_list) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; parent->hweight_donating += iocg->hweight_donating; parent->hweight_after_donation += iocg->hweight_after_donation; } list_for_each_entry_reverse(iocg, &inner_walk, walk_list) { if (iocg->level > 0) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; parent->hweight_donating += iocg->hweight_donating; parent->hweight_after_donation += iocg->hweight_after_donation; } } /* * Calculate inner hwa's (b) and make sure the donation values are * within the accepted ranges as we're doing low res calculations with * roundups. */ list_for_each_entry(iocg, &inner_walk, walk_list) { if (iocg->level) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; iocg->hweight_active = DIV64_U64_ROUND_UP( (u64)parent->hweight_active * iocg->active, parent->child_active_sum); } iocg->hweight_donating = min(iocg->hweight_donating, iocg->hweight_active); iocg->hweight_after_donation = min(iocg->hweight_after_donation, iocg->hweight_donating - 1); if (WARN_ON_ONCE(iocg->hweight_active <= 1 || iocg->hweight_donating <= 1 || iocg->hweight_after_donation == 0)) { pr_warn("iocg: invalid donation weights in "); pr_cont_cgroup_path(iocg_to_blkg(iocg)->blkcg->css.cgroup); pr_cont(": active=%u donating=%u after=%u\n", iocg->hweight_active, iocg->hweight_donating, iocg->hweight_after_donation); } } /* * Calculate the global donation rate (gamma) - the rate to adjust * non-donating budgets by. * * No need to use 64bit multiplication here as the first operand is * guaranteed to be smaller than WEIGHT_ONE (1<<16). * * We know that there are beneficiary nodes and the sum of the donating * hweights can't be whole; however, due to the round-ups during hweight * calculations, root_iocg->hweight_donating might still end up equal to * or greater than whole. Limit the range when calculating the divider. * * gamma = (1 - t_r') / (1 - t_r) */ gamma = DIV_ROUND_UP( (WEIGHT_ONE - root_iocg->hweight_after_donation) * WEIGHT_ONE, WEIGHT_ONE - min_t(u32, root_iocg->hweight_donating, WEIGHT_ONE - 1)); /* * Calculate adjusted hwi, child_adjusted_sum and inuse for the inner * nodes. */ list_for_each_entry(iocg, &inner_walk, walk_list) { struct ioc_gq *parent; u32 inuse, wpt, wptp; u64 st, sf; if (iocg->level == 0) { /* adjusted weight sum for 1st level: s' = s * b_pf / b'_pf */ iocg->child_adjusted_sum = DIV64_U64_ROUND_UP( iocg->child_active_sum * (WEIGHT_ONE - iocg->hweight_donating), WEIGHT_ONE - iocg->hweight_after_donation); continue; } parent = iocg->ancestors[iocg->level - 1]; /* b' = gamma * b_f + b_t' */ iocg->hweight_inuse = DIV64_U64_ROUND_UP( (u64)gamma * (iocg->hweight_active - iocg->hweight_donating), WEIGHT_ONE) + iocg->hweight_after_donation; /* w' = s' * b' / b'_p */ inuse = DIV64_U64_ROUND_UP( (u64)parent->child_adjusted_sum * iocg->hweight_inuse, parent->hweight_inuse); /* adjusted weight sum for children: s' = s_f + s_t * w'_pt / w_pt */ st = DIV64_U64_ROUND_UP( iocg->child_active_sum * iocg->hweight_donating, iocg->hweight_active); sf = iocg->child_active_sum - st; wpt = DIV64_U64_ROUND_UP( (u64)iocg->active * iocg->hweight_donating, iocg->hweight_active); wptp = DIV64_U64_ROUND_UP( (u64)inuse * iocg->hweight_after_donation, iocg->hweight_inuse); iocg->child_adjusted_sum = sf + DIV64_U64_ROUND_UP(st * wptp, wpt); } /* * All inner nodes now have ->hweight_inuse and ->child_adjusted_sum and * we can finally determine leaf adjustments. */ list_for_each_entry(iocg, surpluses, surplus_list) { struct ioc_gq *parent = iocg->ancestors[iocg->level - 1]; u32 inuse; /* * In-debt iocgs participated in the donation calculation with * the minimum target hweight_inuse. Configuring inuse * accordingly would work fine but debt handling expects * @iocg->inuse stay at the minimum and we don't wanna * interfere. */ if (iocg->abs_vdebt) { WARN_ON_ONCE(iocg->inuse > 1); continue; } /* w' = s' * b' / b'_p, note that b' == b'_t for donating leaves */ inuse = DIV64_U64_ROUND_UP( parent->child_adjusted_sum * iocg->hweight_after_donation, parent->hweight_inuse); TRACE_IOCG_PATH(inuse_transfer, iocg, now, iocg->inuse, inuse, iocg->hweight_inuse, iocg->hweight_after_donation); __propagate_weights(iocg, iocg->active, inuse, true, now); } /* walk list should be dissolved after use */ list_for_each_entry_safe(iocg, tiocg, &inner_walk, walk_list) list_del_init(&iocg->walk_list); } /* * A low weight iocg can amass a large amount of debt, for example, when * anonymous memory gets reclaimed aggressively. If the system has a lot of * memory paired with a slow IO device, the debt can span multiple seconds or * more. If there are no other subsequent IO issuers, the in-debt iocg may end * up blocked paying its debt while the IO device is idle. * * The following protects against such cases. If the device has been * sufficiently idle for a while, the debts are halved and delays are * recalculated. */ static void ioc_forgive_debts(struct ioc *ioc, u64 usage_us_sum, int nr_debtors, struct ioc_now *now) { struct ioc_gq *iocg; u64 dur, usage_pct, nr_cycles, nr_cycles_shift; /* if no debtor, reset the cycle */ if (!nr_debtors) { ioc->dfgv_period_at = now->now; ioc->dfgv_period_rem = 0; ioc->dfgv_usage_us_sum = 0; return; } /* * Debtors can pass through a lot of writes choking the device and we * don't want to be forgiving debts while the device is struggling from * write bursts. If we're missing latency targets, consider the device * fully utilized. */ if (ioc->busy_level > 0) usage_us_sum = max_t(u64, usage_us_sum, ioc->period_us); ioc->dfgv_usage_us_sum += usage_us_sum; if (time_before64(now->now, ioc->dfgv_period_at + DFGV_PERIOD)) return; /* * At least DFGV_PERIOD has passed since the last period. Calculate the * average usage and reset the period counters. */ dur = now->now - ioc->dfgv_period_at; usage_pct = div64_u64(100 * ioc->dfgv_usage_us_sum, dur); ioc->dfgv_period_at = now->now; ioc->dfgv_usage_us_sum = 0; /* if was too busy, reset everything */ if (usage_pct > DFGV_USAGE_PCT) { ioc->dfgv_period_rem = 0; return; } /* * Usage is lower than threshold. Let's forgive some debts. Debt * forgiveness runs off of the usual ioc timer but its period usually * doesn't match ioc's. Compensate the difference by performing the * reduction as many times as would fit in the duration since the last * run and carrying over the left-over duration in @ioc->dfgv_period_rem * - if ioc period is 75% of DFGV_PERIOD, one out of three consecutive * reductions is doubled. */ nr_cycles = dur + ioc->dfgv_period_rem; ioc->dfgv_period_rem = do_div(nr_cycles, DFGV_PERIOD); list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { u64 __maybe_unused old_debt, __maybe_unused old_delay; if (!iocg->abs_vdebt && !iocg->delay) continue; spin_lock(&iocg->waitq.lock); old_debt = iocg->abs_vdebt; old_delay = iocg->delay; nr_cycles_shift = min_t(u64, nr_cycles, BITS_PER_LONG - 1); if (iocg->abs_vdebt) iocg->abs_vdebt = iocg->abs_vdebt >> nr_cycles_shift ?: 1; if (iocg->delay) iocg->delay = iocg->delay >> nr_cycles_shift ?: 1; iocg_kick_waitq(iocg, true, now); TRACE_IOCG_PATH(iocg_forgive_debt, iocg, now, usage_pct, old_debt, iocg->abs_vdebt, old_delay, iocg->delay); spin_unlock(&iocg->waitq.lock); } } /* * Check the active iocgs' state to avoid oversleeping and deactive * idle iocgs. * * Since waiters determine the sleep durations based on the vrate * they saw at the time of sleep, if vrate has increased, some * waiters could be sleeping for too long. Wake up tardy waiters * which should have woken up in the last period and expire idle * iocgs. */ static int ioc_check_iocgs(struct ioc *ioc, struct ioc_now *now) { int nr_debtors = 0; struct ioc_gq *iocg, *tiocg; list_for_each_entry_safe(iocg, tiocg, &ioc->active_iocgs, active_list) { if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && !iocg->delay && !iocg_is_idle(iocg)) continue; spin_lock(&iocg->waitq.lock); /* flush wait and indebt stat deltas */ if (iocg->wait_since) { iocg->stat.wait_us += now->now - iocg->wait_since; iocg->wait_since = now->now; } if (iocg->indebt_since) { iocg->stat.indebt_us += now->now - iocg->indebt_since; iocg->indebt_since = now->now; } if (iocg->indelay_since) { iocg->stat.indelay_us += now->now - iocg->indelay_since; iocg->indelay_since = now->now; } if (waitqueue_active(&iocg->waitq) || iocg->abs_vdebt || iocg->delay) { /* might be oversleeping vtime / hweight changes, kick */ iocg_kick_waitq(iocg, true, now); if (iocg->abs_vdebt || iocg->delay) nr_debtors++; } else if (iocg_is_idle(iocg)) { /* no waiter and idle, deactivate */ u64 vtime = atomic64_read(&iocg->vtime); s64 excess; /* * @iocg has been inactive for a full duration and will * have a high budget. Account anything above target as * error and throw away. On reactivation, it'll start * with the target budget. */ excess = now->vnow - vtime - ioc->margins.target; if (excess > 0) { u32 old_hwi; current_hweight(iocg, NULL, &old_hwi); ioc->vtime_err -= div64_u64(excess * old_hwi, WEIGHT_ONE); } TRACE_IOCG_PATH(iocg_idle, iocg, now, atomic64_read(&iocg->active_period), atomic64_read(&ioc->cur_period), vtime); __propagate_weights(iocg, 0, 0, false, now); list_del_init(&iocg->active_list); } spin_unlock(&iocg->waitq.lock); } commit_weights(ioc); return nr_debtors; } static void ioc_timer_fn(struct timer_list *timer) { struct ioc *ioc = container_of(timer, struct ioc, timer); struct ioc_gq *iocg, *tiocg; struct ioc_now now; LIST_HEAD(surpluses); int nr_debtors, nr_shortages = 0, nr_lagging = 0; u64 usage_us_sum = 0; u32 ppm_rthr; u32 ppm_wthr; u32 missed_ppm[2], rq_wait_pct; u64 period_vtime; int prev_busy_level; /* how were the latencies during the period? */ ioc_lat_stat(ioc, missed_ppm, &rq_wait_pct); /* take care of active iocgs */ spin_lock_irq(&ioc->lock); ppm_rthr = MILLION - ioc->params.qos[QOS_RPPM]; ppm_wthr = MILLION - ioc->params.qos[QOS_WPPM]; ioc_now(ioc, &now); period_vtime = now.vnow - ioc->period_at_vtime; if (WARN_ON_ONCE(!period_vtime)) { spin_unlock_irq(&ioc->lock); return; } nr_debtors = ioc_check_iocgs(ioc, &now); /* * Wait and indebt stat are flushed above and the donation calculation * below needs updated usage stat. Let's bring stat up-to-date. */ iocg_flush_stat(&ioc->active_iocgs, &now); /* calc usage and see whether some weights need to be moved around */ list_for_each_entry(iocg, &ioc->active_iocgs, active_list) { u64 vdone, vtime, usage_us; u32 hw_active, hw_inuse; /* * Collect unused and wind vtime closer to vnow to prevent * iocgs from accumulating a large amount of budget. */ vdone = atomic64_read(&iocg->done_vtime); vtime = atomic64_read(&iocg->vtime); current_hweight(iocg, &hw_active, &hw_inuse); /* * Latency QoS detection doesn't account for IOs which are * in-flight for longer than a period. Detect them by * comparing vdone against period start. If lagging behind * IOs from past periods, don't increase vrate. */ if ((ppm_rthr != MILLION || ppm_wthr != MILLION) && !atomic_read(&iocg_to_blkg(iocg)->use_delay) && time_after64(vtime, vdone) && time_after64(vtime, now.vnow - MAX_LAGGING_PERIODS * period_vtime) && time_before64(vdone, now.vnow - period_vtime)) nr_lagging++; /* * Determine absolute usage factoring in in-flight IOs to avoid * high-latency completions appearing as idle. */ usage_us = iocg->usage_delta_us; usage_us_sum += usage_us; /* see whether there's surplus vtime */ WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); if (hw_inuse < hw_active || (!waitqueue_active(&iocg->waitq) && time_before64(vtime, now.vnow - ioc->margins.low))) { u32 hwa, old_hwi, hwm, new_hwi, usage; u64 usage_dur; if (vdone != vtime) { u64 inflight_us = DIV64_U64_ROUND_UP( cost_to_abs_cost(vtime - vdone, hw_inuse), ioc->vtime_base_rate); usage_us = max(usage_us, inflight_us); } /* convert to hweight based usage ratio */ if (time_after64(iocg->activated_at, ioc->period_at)) usage_dur = max_t(u64, now.now - iocg->activated_at, 1); else usage_dur = max_t(u64, now.now - ioc->period_at, 1); usage = clamp_t(u32, DIV64_U64_ROUND_UP(usage_us * WEIGHT_ONE, usage_dur), 1, WEIGHT_ONE); /* * Already donating or accumulated enough to start. * Determine the donation amount. */ current_hweight(iocg, &hwa, &old_hwi); hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); /* * Donation calculation assumes hweight_after_donation * to be positive, a condition that a donor w/ hwa < 2 * can't meet. Don't bother with donation if hwa is * below 2. It's not gonna make a meaningful difference * anyway. */ if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); } else if (!iocg->abs_vdebt) { /* * @iocg doesn't have enough to donate. Reset * its inuse to active. * * Don't reset debtors as their inuse's are * owned by debt handling. This shouldn't affect * donation calculuation in any meaningful way * as @iocg doesn't have a meaningful amount of * share anyway. */ TRACE_IOCG_PATH(inuse_shortage, iocg, &now, iocg->inuse, iocg->active, iocg->hweight_inuse, new_hwi); __propagate_weights(iocg, iocg->active, iocg->active, true, &now); nr_shortages++; } } else { /* genuinely short on vtime */ nr_shortages++; } } if (!list_empty(&surpluses) && nr_shortages) transfer_surpluses(&surpluses, &now); commit_weights(ioc); /* surplus list should be dissolved after use */ list_for_each_entry_safe(iocg, tiocg, &surpluses, surplus_list) list_del_init(&iocg->surplus_list); /* * If q is getting clogged or we're missing too much, we're issuing * too much IO and should lower vtime rate. If we're not missing * and experiencing shortages but not surpluses, we're too stingy * and should increase vtime rate. */ prev_busy_level = ioc->busy_level; if (rq_wait_pct > RQ_WAIT_BUSY_PCT || missed_ppm[READ] > ppm_rthr || missed_ppm[WRITE] > ppm_wthr) { /* clearly missing QoS targets, slow down vrate */ ioc->busy_level = max(ioc->busy_level, 0); ioc->busy_level++; } else if (rq_wait_pct <= RQ_WAIT_BUSY_PCT * UNBUSY_THR_PCT / 100 && missed_ppm[READ] <= ppm_rthr * UNBUSY_THR_PCT / 100 && missed_ppm[WRITE] <= ppm_wthr * UNBUSY_THR_PCT / 100) { /* QoS targets are being met with >25% margin */ if (nr_shortages) { /* * We're throttling while the device has spare * capacity. If vrate was being slowed down, stop. */ ioc->busy_level = min(ioc->busy_level, 0); /* * If there are IOs spanning multiple periods, wait * them out before pushing the device harder. */ if (!nr_lagging) ioc->busy_level--; } else { /* * Nobody is being throttled and the users aren't * issuing enough IOs to saturate the device. We * simply don't know how close the device is to * saturation. Coast. */ ioc->busy_level = 0; } } else { /* inside the hysterisis margin, we're good */ ioc->busy_level = 0; } ioc->busy_level = clamp(ioc->busy_level, -1000, 1000); ioc_adjust_base_vrate(ioc, rq_wait_pct, nr_lagging, nr_shortages, prev_busy_level, missed_ppm); ioc_refresh_params(ioc, false); ioc_forgive_debts(ioc, usage_us_sum, nr_debtors, &now); /* * This period is done. Move onto the next one. If nothing's * going on with the device, stop the timer. */ atomic64_inc(&ioc->cur_period); if (ioc->running != IOC_STOP) { if (!list_empty(&ioc->active_iocgs)) { ioc_start_period(ioc, &now); } else { ioc->busy_level = 0; ioc->vtime_err = 0; ioc->running = IOC_IDLE; } ioc_refresh_vrate(ioc, &now); } spin_unlock_irq(&ioc->lock); } static u64 adjust_inuse_and_calc_cost(struct ioc_gq *iocg, u64 vtime, u64 abs_cost, struct ioc_now *now) { struct ioc *ioc = iocg->ioc; struct ioc_margins *margins = &ioc->margins; u32 __maybe_unused old_inuse = iocg->inuse, __maybe_unused old_hwi; u32 hwi, adj_step; s64 margin; u64 cost, new_inuse; unsigned long flags; current_hweight(iocg, NULL, &hwi); old_hwi = hwi; cost = abs_cost_to_cost(abs_cost, hwi); margin = now->vnow - vtime - cost; /* debt handling owns inuse for debtors */ if (iocg->abs_vdebt) return cost; /* * We only increase inuse during period and do so if the margin has * deteriorated since the previous adjustment. */ if (margin >= iocg->saved_margin || margin >= margins->low || iocg->inuse == iocg->active) return cost; spin_lock_irqsave(&ioc->lock, flags); /* we own inuse only when @iocg is in the normal active state */ if (iocg->abs_vdebt || list_empty(&iocg->active_list)) { spin_unlock_irqrestore(&ioc->lock, flags); return cost; } /* * Bump up inuse till @abs_cost fits in the existing budget. * adj_step must be determined after acquiring ioc->lock - we might * have raced and lost to another thread for activation and could * be reading 0 iocg->active before ioc->lock which will lead to * infinite loop. */ new_inuse = iocg->inuse; adj_step = DIV_ROUND_UP(iocg->active * INUSE_ADJ_STEP_PCT, 100); do { new_inuse = new_inuse + adj_step; propagate_weights(iocg, iocg->active, new_inuse, true, now); current_hweight(iocg, NULL, &hwi); cost = abs_cost_to_cost(abs_cost, hwi); } while (time_after64(vtime + cost, now->vnow) && iocg->inuse != iocg->active); spin_unlock_irqrestore(&ioc->lock, flags); TRACE_IOCG_PATH(inuse_adjust, iocg, now, old_inuse, iocg->inuse, old_hwi, hwi); return cost; } static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg, bool is_merge, u64 *costp) { struct ioc *ioc = iocg->ioc; u64 coef_seqio, coef_randio, coef_page; u64 pages = max_t(u64, bio_sectors(bio) >> IOC_SECT_TO_PAGE_SHIFT, 1); u64 seek_pages = 0; u64 cost = 0; /* Can't calculate cost for empty bio */ if (!bio->bi_iter.bi_size) goto out; switch (bio_op(bio)) { case REQ_OP_READ: coef_seqio = ioc->params.lcoefs[LCOEF_RSEQIO]; coef_randio = ioc->params.lcoefs[LCOEF_RRANDIO]; coef_page = ioc->params.lcoefs[LCOEF_RPAGE]; break; case REQ_OP_WRITE: coef_seqio = ioc->params.lcoefs[LCOEF_WSEQIO]; coef_randio = ioc->params.lcoefs[LCOEF_WRANDIO]; coef_page = ioc->params.lcoefs[LCOEF_WPAGE]; break; default: goto out; } if (iocg->cursor) { seek_pages = abs(bio->bi_iter.bi_sector - iocg->cursor); seek_pages >>= IOC_SECT_TO_PAGE_SHIFT; } if (!is_merge) { if (seek_pages > LCOEF_RANDIO_PAGES) { cost += coef_randio; } else { cost += coef_seqio; } } cost += pages * coef_page; out: *costp = cost; } static u64 calc_vtime_cost(struct bio *bio, struct ioc_gq *iocg, bool is_merge) { u64 cost; calc_vtime_cost_builtin(bio, iocg, is_merge, &cost); return cost; } static void calc_size_vtime_cost_builtin(struct request *rq, struct ioc *ioc, u64 *costp) { unsigned int pages = blk_rq_stats_sectors(rq) >> IOC_SECT_TO_PAGE_SHIFT; switch (req_op(rq)) { case REQ_OP_READ: *costp = pages * ioc->params.lcoefs[LCOEF_RPAGE]; break; case REQ_OP_WRITE: *costp = pages * ioc->params.lcoefs[LCOEF_WPAGE]; break; default: *costp = 0; } } static u64 calc_size_vtime_cost(struct request *rq, struct ioc *ioc) { u64 cost; calc_size_vtime_cost_builtin(rq, ioc, &cost); return cost; } static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio) { struct blkcg_gq *blkg = bio->bi_blkg; struct ioc *ioc = rqos_to_ioc(rqos); struct ioc_gq *iocg = blkg_to_iocg(blkg); struct ioc_now now; struct iocg_wait wait; u64 abs_cost, cost, vtime; bool use_debt, ioc_locked; unsigned long flags; /* bypass IOs if disabled, still initializing, or for root cgroup */ if (!ioc->enabled || !iocg || !iocg->level) return; /* calculate the absolute vtime cost */ abs_cost = calc_vtime_cost(bio, iocg, false); if (!abs_cost) return; if (!iocg_activate(iocg, &now)) return; iocg->cursor = bio_end_sector(bio); vtime = atomic64_read(&iocg->vtime); cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now); /* * If no one's waiting and within budget, issue right away. The * tests are racy but the races aren't systemic - we only miss once * in a while which is fine. */ if (!waitqueue_active(&iocg->waitq) && !iocg->abs_vdebt && time_before_eq64(vtime + cost, now.vnow)) { iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* * We're over budget. This can be handled in two ways. IOs which may * cause priority inversions are punted to @ioc->aux_iocg and charged as * debt. Otherwise, the issuer is blocked on @iocg->waitq. Debt handling * requires @ioc->lock, waitq handling @iocg->waitq.lock. Determine * whether debt handling is needed and acquire locks accordingly. */ use_debt = bio_issue_as_root_blkg(bio) || fatal_signal_pending(current); ioc_locked = use_debt || READ_ONCE(iocg->abs_vdebt); retry_lock: iocg_lock(iocg, ioc_locked, &flags); /* * @iocg must stay activated for debt and waitq handling. Deactivation * is synchronized against both ioc->lock and waitq.lock and we won't * get deactivated as long as we're waiting or has debt, so we're good * if we're activated here. In the unlikely cases that we aren't, just * issue the IO. */ if (unlikely(list_empty(&iocg->active_list))) { iocg_unlock(iocg, ioc_locked, &flags); iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* * We're over budget. If @bio has to be issued regardless, remember * the abs_cost instead of advancing vtime. iocg_kick_waitq() will pay * off the debt before waking more IOs. * * This way, the debt is continuously paid off each period with the * actual budget available to the cgroup. If we just wound vtime, we * would incorrectly use the current hw_inuse for the entire amount * which, for example, can lead to the cgroup staying blocked for a * long time even with substantially raised hw_inuse. * * An iocg with vdebt should stay online so that the timer can keep * deducting its vdebt and [de]activate use_delay mechanism * accordingly. We don't want to race against the timer trying to * clear them and leave @iocg inactive w/ dangling use_delay heavily * penalizing the cgroup and its descendants. */ if (use_debt) { iocg_incur_debt(iocg, abs_cost, &now); if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->disk, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); iocg_unlock(iocg, ioc_locked, &flags); return; } /* guarantee that iocgs w/ waiters have maximum inuse */ if (!iocg->abs_vdebt && iocg->inuse != iocg->active) { if (!ioc_locked) { iocg_unlock(iocg, false, &flags); ioc_locked = true; goto retry_lock; } propagate_weights(iocg, iocg->active, iocg->active, true, &now); } /* * Append self to the waitq and schedule the wakeup timer if we're * the first waiter. The timer duration is calculated based on the * current vrate. vtime and hweight changes can make it too short * or too long. Each wait entry records the absolute cost it's * waiting for to allow re-evaluation using a custom wait entry. * * If too short, the timer simply reschedules itself. If too long, * the period timer will notice and trigger wakeups. * * All waiters are on iocg->waitq and the wait states are * synchronized using waitq.lock. */ init_waitqueue_func_entry(&wait.wait, iocg_wake_fn); wait.wait.private = current; wait.bio = bio; wait.abs_cost = abs_cost; wait.committed = false; /* will be set true by waker */ __add_wait_queue_entry_tail(&iocg->waitq, &wait.wait); iocg_kick_waitq(iocg, ioc_locked, &now); iocg_unlock(iocg, ioc_locked, &flags); while (true) { set_current_state(TASK_UNINTERRUPTIBLE); if (wait.committed) break; io_schedule(); } /* waker already committed us, proceed */ finish_wait(&iocg->waitq, &wait.wait); } static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); struct ioc *ioc = rqos_to_ioc(rqos); sector_t bio_end = bio_end_sector(bio); struct ioc_now now; u64 vtime, abs_cost, cost; unsigned long flags; /* bypass if disabled, still initializing, or for root cgroup */ if (!ioc->enabled || !iocg || !iocg->level) return; abs_cost = calc_vtime_cost(bio, iocg, true); if (!abs_cost) return; ioc_now(ioc, &now); vtime = atomic64_read(&iocg->vtime); cost = adjust_inuse_and_calc_cost(iocg, vtime, abs_cost, &now); /* update cursor if backmerging into the request at the cursor */ if (blk_rq_pos(rq) < bio_end && blk_rq_pos(rq) + blk_rq_sectors(rq) == iocg->cursor) iocg->cursor = bio_end; /* * Charge if there's enough vtime budget and the existing request has * cost assigned. */ if (rq->bio && rq->bio->bi_iocost_cost && time_before_eq64(atomic64_read(&iocg->vtime) + cost, now.vnow)) { iocg_commit_bio(iocg, bio, abs_cost, cost); return; } /* * Otherwise, account it as debt if @iocg is online, which it should * be for the vast majority of cases. See debt handling in * ioc_rqos_throttle() for details. */ spin_lock_irqsave(&ioc->lock, flags); spin_lock(&iocg->waitq.lock); if (likely(!list_empty(&iocg->active_list))) { iocg_incur_debt(iocg, abs_cost, &now); if (iocg_kick_delay(iocg, &now)) blkcg_schedule_throttle(rqos->disk, (bio->bi_opf & REQ_SWAP) == REQ_SWAP); } else { iocg_commit_bio(iocg, bio, abs_cost, cost); } spin_unlock(&iocg->waitq.lock); spin_unlock_irqrestore(&ioc->lock, flags); } static void ioc_rqos_done_bio(struct rq_qos *rqos, struct bio *bio) { struct ioc_gq *iocg = blkg_to_iocg(bio->bi_blkg); if (iocg && bio->bi_iocost_cost) atomic64_add(bio->bi_iocost_cost, &iocg->done_vtime); } static void ioc_rqos_done(struct rq_qos *rqos, struct request *rq) { struct ioc *ioc = rqos_to_ioc(rqos); struct ioc_pcpu_stat *ccs; u64 on_q_ns, rq_wait_ns, size_nsec; int pidx, rw; if (!ioc->enabled || !rq->alloc_time_ns || !rq->start_time_ns) return; switch (req_op(rq)) { case REQ_OP_READ: pidx = QOS_RLAT; rw = READ; break; case REQ_OP_WRITE: pidx = QOS_WLAT; rw = WRITE; break; default: return; } on_q_ns = blk_time_get_ns() - rq->alloc_time_ns; rq_wait_ns = rq->start_time_ns - rq->alloc_time_ns; size_nsec = div64_u64(calc_size_vtime_cost(rq, ioc), VTIME_PER_NSEC); ccs = get_cpu_ptr(ioc->pcpu_stat); if (on_q_ns <= size_nsec || on_q_ns - size_nsec <= ioc->params.qos[pidx] * NSEC_PER_USEC) local_inc(&ccs->missed[rw].nr_met); else local_inc(&ccs->missed[rw].nr_missed); local64_add(rq_wait_ns, &ccs->rq_wait_ns); put_cpu_ptr(ccs); } static void ioc_rqos_queue_depth_changed(struct rq_qos *rqos) { struct ioc *ioc = rqos_to_ioc(rqos); spin_lock_irq(&ioc->lock); ioc_refresh_params(ioc, false); spin_unlock_irq(&ioc->lock); } static void ioc_rqos_exit(struct rq_qos *rqos) { struct ioc *ioc = rqos_to_ioc(rqos); blkcg_deactivate_policy(rqos->disk, &blkcg_policy_iocost); spin_lock_irq(&ioc->lock); ioc->running = IOC_STOP; spin_unlock_irq(&ioc->lock); timer_shutdown_sync(&ioc->timer); free_percpu(ioc->pcpu_stat); kfree(ioc); } static const struct rq_qos_ops ioc_rqos_ops = { .throttle = ioc_rqos_throttle, .merge = ioc_rqos_merge, .done_bio = ioc_rqos_done_bio, .done = ioc_rqos_done, .queue_depth_changed = ioc_rqos_queue_depth_changed, .exit = ioc_rqos_exit, }; static int blk_iocost_init(struct gendisk *disk) { struct ioc *ioc; int i, cpu, ret; ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); if (!ioc) return -ENOMEM; ioc->pcpu_stat = alloc_percpu(struct ioc_pcpu_stat); if (!ioc->pcpu_stat) { kfree(ioc); return -ENOMEM; } for_each_possible_cpu(cpu) { struct ioc_pcpu_stat *ccs = per_cpu_ptr(ioc->pcpu_stat, cpu); for (i = 0; i < ARRAY_SIZE(ccs->missed); i++) { local_set(&ccs->missed[i].nr_met, 0); local_set(&ccs->missed[i].nr_missed, 0); } local64_set(&ccs->rq_wait_ns, 0); } spin_lock_init(&ioc->lock); timer_setup(&ioc->timer, ioc_timer_fn, 0); INIT_LIST_HEAD(&ioc->active_iocgs); ioc->running = IOC_IDLE; ioc->vtime_base_rate = VTIME_PER_USEC; atomic64_set(&ioc->vtime_rate, VTIME_PER_USEC); seqcount_spinlock_init(&ioc->period_seqcount, &ioc->lock); ioc->period_at = ktime_to_us(blk_time_get()); atomic64_set(&ioc->cur_period, 0); atomic_set(&ioc->hweight_gen, 0); spin_lock_irq(&ioc->lock); ioc->autop_idx = AUTOP_INVALID; ioc_refresh_params_disk(ioc, true, disk); spin_unlock_irq(&ioc->lock); /* * rqos must be added before activation to allow ioc_pd_init() to * lookup the ioc from q. This means that the rqos methods may get * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ ret = rq_qos_add(&ioc->rqos, disk, RQ_QOS_COST, &ioc_rqos_ops); if (ret) goto err_free_ioc; ret = blkcg_activate_policy(disk, &blkcg_policy_iocost); if (ret) goto err_del_qos; return 0; err_del_qos: rq_qos_del(&ioc->rqos); err_free_ioc: free_percpu(ioc->pcpu_stat); kfree(ioc); return ret; } static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp) { struct ioc_cgrp *iocc; iocc = kzalloc(sizeof(struct ioc_cgrp), gfp); if (!iocc) return NULL; iocc->dfl_weight = CGROUP_WEIGHT_DFL * WEIGHT_ONE; return &iocc->cpd; } static void ioc_cpd_free(struct blkcg_policy_data *cpd) { kfree(container_of(cpd, struct ioc_cgrp, cpd)); } static struct blkg_policy_data *ioc_pd_alloc(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp) { int levels = blkcg->css.cgroup->level + 1; struct ioc_gq *iocg; iocg = kzalloc_node(struct_size(iocg, ancestors, levels), gfp, disk->node_id); if (!iocg) return NULL; iocg->pcpu_stat = alloc_percpu_gfp(struct iocg_pcpu_stat, gfp); if (!iocg->pcpu_stat) { kfree(iocg); return NULL; } return &iocg->pd; } static void ioc_pd_init(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct blkcg_gq *blkg = pd_to_blkg(&iocg->pd); struct ioc *ioc = q_to_ioc(blkg->q); struct ioc_now now; struct blkcg_gq *tblkg; unsigned long flags; ioc_now(ioc, &now); iocg->ioc = ioc; atomic64_set(&iocg->vtime, now.vnow); atomic64_set(&iocg->done_vtime, now.vnow); atomic64_set(&iocg->active_period, atomic64_read(&ioc->cur_period)); INIT_LIST_HEAD(&iocg->active_list); INIT_LIST_HEAD(&iocg->walk_list); INIT_LIST_HEAD(&iocg->surplus_list); iocg->hweight_active = WEIGHT_ONE; iocg->hweight_inuse = WEIGHT_ONE; init_waitqueue_head(&iocg->waitq); hrtimer_init(&iocg->waitq_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); iocg->waitq_timer.function = iocg_waitq_timer_fn; iocg->level = blkg->blkcg->css.cgroup->level; for (tblkg = blkg; tblkg; tblkg = tblkg->parent) { struct ioc_gq *tiocg = blkg_to_iocg(tblkg); iocg->ancestors[tiocg->level] = tiocg; } spin_lock_irqsave(&ioc->lock, flags); weight_updated(iocg, &now); spin_unlock_irqrestore(&ioc->lock, flags); } static void ioc_pd_free(struct blkg_policy_data *pd) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; unsigned long flags; if (ioc) { spin_lock_irqsave(&ioc->lock, flags); if (!list_empty(&iocg->active_list)) { struct ioc_now now; ioc_now(ioc, &now); propagate_weights(iocg, 0, 0, false, &now); list_del_init(&iocg->active_list); } WARN_ON_ONCE(!list_empty(&iocg->walk_list)); WARN_ON_ONCE(!list_empty(&iocg->surplus_list)); spin_unlock_irqrestore(&ioc->lock, flags); hrtimer_cancel(&iocg->waitq_timer); } free_percpu(iocg->pcpu_stat); kfree(iocg); } static void ioc_pd_stat(struct blkg_policy_data *pd, struct seq_file *s) { struct ioc_gq *iocg = pd_to_iocg(pd); struct ioc *ioc = iocg->ioc; if (!ioc->enabled) return; if (iocg->level == 0) { unsigned vp10k = DIV64_U64_ROUND_CLOSEST( ioc->vtime_base_rate * 10000, VTIME_PER_USEC); seq_printf(s, " cost.vrate=%u.%02u", vp10k / 100, vp10k % 100); } seq_printf(s, " cost.usage=%llu", iocg->last_stat.usage_us); if (blkcg_debug_stats) seq_printf(s, " cost.wait=%llu cost.indebt=%llu cost.indelay=%llu", iocg->last_stat.wait_us, iocg->last_stat.indebt_us, iocg->last_stat.indelay_us); } static u64 ioc_weight_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { const char *dname = blkg_dev_name(pd->blkg); struct ioc_gq *iocg = pd_to_iocg(pd); if (dname && iocg->cfg_weight) seq_printf(sf, "%s %u\n", dname, iocg->cfg_weight / WEIGHT_ONE); return 0; } static int ioc_weight_show(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg); seq_printf(sf, "default %u\n", iocc->dfl_weight / WEIGHT_ONE); blkcg_print_blkgs(sf, blkcg, ioc_weight_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; } static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct blkcg *blkcg = css_to_blkcg(of_css(of)); struct ioc_cgrp *iocc = blkcg_to_iocc(blkcg); struct blkg_conf_ctx ctx; struct ioc_now now; struct ioc_gq *iocg; u32 v; int ret; if (!strchr(buf, ':')) { struct blkcg_gq *blkg; if (!sscanf(buf, "default %u", &v) && !sscanf(buf, "%u", &v)) return -EINVAL; if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) return -EINVAL; spin_lock_irq(&blkcg->lock); iocc->dfl_weight = v * WEIGHT_ONE; hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { struct ioc_gq *iocg = blkg_to_iocg(blkg); if (iocg) { spin_lock(&iocg->ioc->lock); ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); spin_unlock(&iocg->ioc->lock); } } spin_unlock_irq(&blkcg->lock); return nbytes; } blkg_conf_init(&ctx, buf); ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx); if (ret) goto err; iocg = blkg_to_iocg(ctx.blkg); if (!strncmp(ctx.body, "default", 7)) { v = 0; } else { if (!sscanf(ctx.body, "%u", &v)) goto einval; if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) goto einval; } spin_lock(&iocg->ioc->lock); iocg->cfg_weight = v * WEIGHT_ONE; ioc_now(iocg->ioc, &now); weight_updated(iocg, &now); spin_unlock(&iocg->ioc->lock); blkg_conf_exit(&ctx); return nbytes; einval: ret = -EINVAL; err: blkg_conf_exit(&ctx); return ret; } static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { const char *dname = blkg_dev_name(pd->blkg); struct ioc *ioc = pd_to_iocg(pd)->ioc; if (!dname) return 0; spin_lock(&ioc->lock); seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n", dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto", ioc->params.qos[QOS_RPPM] / 10000, ioc->params.qos[QOS_RPPM] % 10000 / 100, ioc->params.qos[QOS_RLAT], ioc->params.qos[QOS_WPPM] / 10000, ioc->params.qos[QOS_WPPM] % 10000 / 100, ioc->params.qos[QOS_WLAT], ioc->params.qos[QOS_MIN] / 10000, ioc->params.qos[QOS_MIN] % 10000 / 100, ioc->params.qos[QOS_MAX] / 10000, ioc->params.qos[QOS_MAX] % 10000 / 100); spin_unlock(&ioc->lock); return 0; } static int ioc_qos_show(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); blkcg_print_blkgs(sf, blkcg, ioc_qos_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; } static const match_table_t qos_ctrl_tokens = { { QOS_ENABLE, "enable=%u" }, { QOS_CTRL, "ctrl=%s" }, { NR_QOS_CTRL_PARAMS, NULL }, }; static const match_table_t qos_tokens = { { QOS_RPPM, "rpct=%s" }, { QOS_RLAT, "rlat=%u" }, { QOS_WPPM, "wpct=%s" }, { QOS_WLAT, "wlat=%u" }, { QOS_MIN, "min=%s" }, { QOS_MAX, "max=%s" }, { NR_QOS_PARAMS, NULL }, }; static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { struct blkg_conf_ctx ctx; struct gendisk *disk; struct ioc *ioc; u32 qos[NR_QOS_PARAMS]; bool enable, user; char *body, *p; unsigned int memflags; int ret; blkg_conf_init(&ctx, input); ret = blkg_conf_open_bdev(&ctx); if (ret) goto err; body = ctx.body; disk = ctx.bdev->bd_disk; if (!queue_is_mq(disk->queue)) { ret = -EOPNOTSUPP; goto err; } ioc = q_to_ioc(disk->queue); if (!ioc) { ret = blk_iocost_init(disk); if (ret) goto err; ioc = q_to_ioc(disk->queue); } memflags = blk_mq_freeze_queue(disk->queue); blk_mq_quiesce_queue(disk->queue); spin_lock_irq(&ioc->lock); memcpy(qos, ioc->params.qos, sizeof(qos)); enable = ioc->enabled; user = ioc->user_qos_params; while ((p = strsep(&body, " \t\n"))) { substring_t args[MAX_OPT_ARGS]; char buf[32]; int tok; s64 v; if (!*p) continue; switch (match_token(p, qos_ctrl_tokens, args)) { case QOS_ENABLE: if (match_u64(&args[0], &v)) goto einval; enable = v; continue; case QOS_CTRL: match_strlcpy(buf, &args[0], sizeof(buf)); if (!strcmp(buf, "auto")) user = false; else if (!strcmp(buf, "user")) user = true; else goto einval; continue; } tok = match_token(p, qos_tokens, args); switch (tok) { case QOS_RPPM: case QOS_WPPM: if (match_strlcpy(buf, &args[0], sizeof(buf)) >= sizeof(buf)) goto einval; if (cgroup_parse_float(buf, 2, &v)) goto einval; if (v < 0 || v > 10000) goto einval; qos[tok] = v * 100; break; case QOS_RLAT: case QOS_WLAT: if (match_u64(&args[0], &v)) goto einval; qos[tok] = v; break; case QOS_MIN: case QOS_MAX: if (match_strlcpy(buf, &args[0], sizeof(buf)) >= sizeof(buf)) goto einval; if (cgroup_parse_float(buf, 2, &v)) goto einval; if (v < 0) goto einval; qos[tok] = clamp_t(s64, v * 100, VRATE_MIN_PPM, VRATE_MAX_PPM); break; default: goto einval; } user = true; } if (qos[QOS_MIN] > qos[QOS_MAX]) goto einval; if (enable && !ioc->enabled) { blk_stat_enable_accounting(disk->queue); blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = true; } else if (!enable && ioc->enabled) { blk_stat_disable_accounting(disk->queue); blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue); ioc->enabled = false; } if (user) { memcpy(ioc->params.qos, qos, sizeof(qos)); ioc->user_qos_params = true; } else { ioc->user_qos_params = false; } ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); if (enable) wbt_disable_default(disk); else wbt_enable_default(disk); blk_mq_unquiesce_queue(disk->queue); blk_mq_unfreeze_queue(disk->queue, memflags); blkg_conf_exit(&ctx); return nbytes; einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); blk_mq_unfreeze_queue(disk->queue, memflags); ret = -EINVAL; err: blkg_conf_exit(&ctx); return ret; } static u64 ioc_cost_model_prfill(struct seq_file *sf, struct blkg_policy_data *pd, int off) { const char *dname = blkg_dev_name(pd->blkg); struct ioc *ioc = pd_to_iocg(pd)->ioc; u64 *u = ioc->params.i_lcoefs; if (!dname) return 0; spin_lock(&ioc->lock); seq_printf(sf, "%s ctrl=%s model=linear " "rbps=%llu rseqiops=%llu rrandiops=%llu " "wbps=%llu wseqiops=%llu wrandiops=%llu\n", dname, ioc->user_cost_model ? "user" : "auto", u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); spin_unlock(&ioc->lock); return 0; } static int ioc_cost_model_show(struct seq_file *sf, void *v) { struct blkcg *blkcg = css_to_blkcg(seq_css(sf)); blkcg_print_blkgs(sf, blkcg, ioc_cost_model_prfill, &blkcg_policy_iocost, seq_cft(sf)->private, false); return 0; } static const match_table_t cost_ctrl_tokens = { { COST_CTRL, "ctrl=%s" }, { COST_MODEL, "model=%s" }, { NR_COST_CTRL_PARAMS, NULL }, }; static const match_table_t i_lcoef_tokens = { { I_LCOEF_RBPS, "rbps=%u" }, { I_LCOEF_RSEQIOPS, "rseqiops=%u" }, { I_LCOEF_RRANDIOPS, "rrandiops=%u" }, { I_LCOEF_WBPS, "wbps=%u" }, { I_LCOEF_WSEQIOPS, "wseqiops=%u" }, { I_LCOEF_WRANDIOPS, "wrandiops=%u" }, { NR_I_LCOEFS, NULL }, }; static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { struct blkg_conf_ctx ctx; struct request_queue *q; unsigned int memflags; struct ioc *ioc; u64 u[NR_I_LCOEFS]; bool user; char *body, *p; int ret; blkg_conf_init(&ctx, input); ret = blkg_conf_open_bdev(&ctx); if (ret) goto err; body = ctx.body; q = bdev_get_queue(ctx.bdev); if (!queue_is_mq(q)) { ret = -EOPNOTSUPP; goto err; } ioc = q_to_ioc(q); if (!ioc) { ret = blk_iocost_init(ctx.bdev->bd_disk); if (ret) goto err; ioc = q_to_ioc(q); } memflags = blk_mq_freeze_queue(q); blk_mq_quiesce_queue(q); spin_lock_irq(&ioc->lock); memcpy(u, ioc->params.i_lcoefs, sizeof(u)); user = ioc->user_cost_model; while ((p = strsep(&body, " \t\n"))) { substring_t args[MAX_OPT_ARGS]; char buf[32]; int tok; u64 v; if (!*p) continue; switch (match_token(p, cost_ctrl_tokens, args)) { case COST_CTRL: match_strlcpy(buf, &args[0], sizeof(buf)); if (!strcmp(buf, "auto")) user = false; else if (!strcmp(buf, "user")) user = true; else goto einval; continue; case COST_MODEL: match_strlcpy(buf, &args[0], sizeof(buf)); if (strcmp(buf, "linear")) goto einval; continue; } tok = match_token(p, i_lcoef_tokens, args); if (tok == NR_I_LCOEFS) goto einval; if (match_u64(&args[0], &v)) goto einval; u[tok] = v; user = true; } if (user) { memcpy(ioc->params.i_lcoefs, u, sizeof(u)); ioc->user_cost_model = true; } else { ioc->user_cost_model = false; } ioc_refresh_params(ioc, true); spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q, memflags); blkg_conf_exit(&ctx); return nbytes; einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q, memflags); ret = -EINVAL; err: blkg_conf_exit(&ctx); return ret; } static struct cftype ioc_files[] = { { .name = "weight", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = ioc_weight_show, .write = ioc_weight_write, }, { .name = "cost.qos", .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = ioc_qos_show, .write = ioc_qos_write, }, { .name = "cost.model", .flags = CFTYPE_ONLY_ON_ROOT, .seq_show = ioc_cost_model_show, .write = ioc_cost_model_write, }, {} }; static struct blkcg_policy blkcg_policy_iocost = { .dfl_cftypes = ioc_files, .cpd_alloc_fn = ioc_cpd_alloc, .cpd_free_fn = ioc_cpd_free, .pd_alloc_fn = ioc_pd_alloc, .pd_init_fn = ioc_pd_init, .pd_free_fn = ioc_pd_free, .pd_stat_fn = ioc_pd_stat, }; static int __init ioc_init(void) { return blkcg_policy_register(&blkcg_policy_iocost); } static void __exit ioc_exit(void) { blkcg_policy_unregister(&blkcg_policy_iocost); } module_init(ioc_init); module_exit(ioc_exit);
42 42 3 3 1 1 3 41 37 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0 */ /* * Definitions for the UDP-Lite (RFC 3828) code. */ #ifndef _UDPLITE_H #define _UDPLITE_H #include <net/ip6_checksum.h> #include <net/udp.h> /* UDP-Lite socket options */ #define UDPLITE_SEND_CSCOV 10 /* sender partial coverage (as sent) */ #define UDPLITE_RECV_CSCOV 11 /* receiver partial coverage (threshold ) */ extern struct proto udplite_prot; extern struct udp_table udplite_table; /* * Checksum computation is all in software, hence simpler getfrag. */ static __inline__ int udplite_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb) { struct msghdr *msg = from; return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; } /* * Checksumming routines */ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) { u16 cscov; /* In UDPv4 a zero checksum means that the transmitter generated no * checksum. UDP-Lite (like IPv6) mandates checksums, hence packets * with a zero checksum field are illegal. */ if (uh->check == 0) { net_dbg_ratelimited("UDPLite: zeroed checksum field\n"); return 1; } cscov = ntohs(uh->len); if (cscov == 0) /* Indicates that full coverage is required. */ ; else if (cscov < 8 || cscov > skb->len) { /* * Coverage length violates RFC 3828: log and discard silently. */ net_dbg_ratelimited("UDPLite: bad csum coverage %d/%d\n", cscov, skb->len); return 1; } else if (cscov < skb->len) { UDP_SKB_CB(skb)->partial_cov = 1; UDP_SKB_CB(skb)->cscov = cscov; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; skb->csum_valid = 0; } return 0; } /* Fast-path computation of checksum. Socket may not be locked. */ static inline __wsum udplite_csum(struct sk_buff *skb) { const int off = skb_transport_offset(skb); const struct sock *sk = skb->sk; int len = skb->len - off; if (udp_test_bit(UDPLITE_SEND_CC, sk)) { u16 pcslen = READ_ONCE(udp_sk(sk)->pcslen); if (pcslen < len) { if (pcslen > 0) len = pcslen; udp_hdr(skb)->len = htons(pcslen); } } skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ return skb_checksum(skb, off, len, 0); } void udplite4_register(void); #endif /* _UDPLITE_H */
8 8 1 74 611 73 9 4 14 2 19 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_FIB_RULES_H #define __NET_FIB_RULES_H #include <linux/types.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/fib_rules.h> #include <linux/refcount.h> #include <net/flow.h> #include <net/rtnetlink.h> #include <net/fib_notifier.h> #include <linux/indirect_call_wrapper.h> struct fib_kuid_range { kuid_t start; kuid_t end; }; struct fib_rule { struct list_head list; int iifindex; int oifindex; u32 mark; u32 mark_mask; u32 flags; u32 table; u8 action; u8 l3mdev; u8 proto; u8 ip_proto; u32 target; __be64 tun_id; struct fib_rule __rcu *ctarget; struct net *fr_net; refcount_t refcnt; u32 pref; int suppress_ifgroup; int suppress_prefixlen; char iifname[IFNAMSIZ]; char oifname[IFNAMSIZ]; struct fib_kuid_range uid_range; struct fib_rule_port_range sport_range; struct fib_rule_port_range dport_range; struct rcu_head rcu; }; struct fib_lookup_arg { void *lookup_ptr; const void *lookup_data; void *result; struct fib_rule *rule; u32 table; int flags; #define FIB_LOOKUP_NOREF 1 #define FIB_LOOKUP_IGNORE_LINKSTATE 2 }; struct fib_rules_ops { int family; struct list_head list; int rule_size; int addr_size; int unresolved_rules; int nr_goto_rules; unsigned int fib_rules_seq; int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, struct nlattr **, struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, struct nlattr **); int (*fill)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *); size_t (*nlmsg_payload)(struct fib_rule *); /* Called after modifications to the rules set, must flush * the route cache if one exists. */ void (*flush_cache)(struct fib_rules_ops *ops); int nlgroup; struct list_head rules_list; struct module *owner; struct net *fro_net; struct rcu_head rcu; }; struct fib_rule_notifier_info { struct fib_notifier_info info; /* must be first */ struct fib_rule *rule; }; static inline void fib_rule_get(struct fib_rule *rule) { refcount_inc(&rule->refcnt); } static inline void fib_rule_put(struct fib_rule *rule) { if (refcount_dec_and_test(&rule->refcnt)) kfree_rcu(rule, rcu); } #ifdef CONFIG_NET_L3_MASTER_DEV static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->l3mdev ? arg->table : rule->table; } #else static inline u32 fib_rule_get_table(struct fib_rule *rule, struct fib_lookup_arg *arg) { return rule->table; } #endif static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) { if (nla[FRA_TABLE]) return nla_get_u32(nla[FRA_TABLE]); return frh->table; } static inline bool fib_rule_port_range_set(const struct fib_rule_port_range *range) { return range->start != 0 && range->end != 0; } static inline bool fib_rule_port_inrange(const struct fib_rule_port_range *a, __be16 port) { return ntohs(port) >= a->start && ntohs(port) <= a->end; } static inline bool fib_rule_port_range_valid(const struct fib_rule_port_range *a) { return a->start != 0 && a->end != 0 && a->end < 0xffff && a->start <= a->end; } static inline bool fib_rule_port_range_compare(struct fib_rule_port_range *a, struct fib_rule_port_range *b) { return a->start == b->start && a->end == b->end; } static inline bool fib_rule_requires_fldissect(struct fib_rule *rule) { return rule->iifindex != LOOPBACK_IFINDEX && (rule->ip_proto || fib_rule_port_range_set(&rule->sport_range) || fib_rule_port_range_set(&rule->dport_range)); } struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); void fib_rules_unregister(struct fib_rules_ops *); int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, struct fib_lookup_arg *); int fib_default_rule_add(struct fib_rules_ops *, u32 pref, u32 table); bool fib_rule_matchall(const struct fib_rule *rule); int fib_rules_dump(struct net *net, struct notifier_block *nb, int family, struct netlink_ext_ack *extack); unsigned int fib_rules_seq_read(const struct net *net, int family); int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack); INDIRECT_CALLABLE_DECLARE(int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)); INDIRECT_CALLABLE_DECLARE(int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, int flags, struct fib_lookup_arg *arg)); #endif
2286 2285 2284 2294 252 2279 2282 2290 2285 2293 2285 2215 2214 2215 2221 2214 25 2215 2214 2213 25 2214 2213 2216 223 2293 254 34 35 233 230 2285 2288 2281 2295 2285 14 2286 86 2217 2295 2279 2295 2285 2293 2281 2295 2284 15 2282 51 2288 2292 2 13 13 13 2288 2274 86 2211 2295 2283 2286 2287 2288 2290 2286 264 2293 2290 2276 2274 2284 250 2284 2288 2290 2288 2282 2226 2216 2289 2288 35 2214 2218 2213 2223 2286 2290 2286 2287 2294 2294 222 2285 2286 2284 2222 2280 2223 2280 223 2273 222 2283 2286 2293 2292 808 2278 2289 260 2286 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 // SPDX-License-Identifier: GPL-2.0-only /* * linux/kernel/printk.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Modified to make sys_syslog() more flexible: added commands to * return the last 4k of kernel messages, regardless of whether * they've been read or not. Added option to suppress kernel printk's * to the console. Added hook for sending the console messages * elsewhere, in preparation for a serial line console (someday). * Ted Ts'o, 2/11/93. * Modified for sysctl support, 1/8/97, Chris Horn. * Fixed SMP synchronization, 08/08/99, Manfred Spraul * manfred@colorfullife.com * Rewrote bits to get rid of console_lock * 01Mar01 Andrew Morton */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/mm.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/console.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/nmi.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/delay.h> #include <linux/smp.h> #include <linux/security.h> #include <linux/memblock.h> #include <linux/syscalls.h> #include <linux/syscore_ops.h> #include <linux/vmcore_info.h> #include <linux/ratelimit.h> #include <linux/kmsg_dump.h> #include <linux/syslog.h> #include <linux/cpu.h> #include <linux/rculist.h> #include <linux/poll.h> #include <linux/irq_work.h> #include <linux/ctype.h> #include <linux/uio.h> #include <linux/sched/clock.h> #include <linux/sched/debug.h> #include <linux/sched/task_stack.h> #include <linux/uaccess.h> #include <asm/sections.h> #include <trace/events/initcall.h> #define CREATE_TRACE_POINTS #include <trace/events/printk.h> #include "printk_ringbuffer.h" #include "console_cmdline.h" #include "braille.h" #include "internal.h" int console_printk[4] = { CONSOLE_LOGLEVEL_DEFAULT, /* console_loglevel */ MESSAGE_LOGLEVEL_DEFAULT, /* default_message_loglevel */ CONSOLE_LOGLEVEL_MIN, /* minimum_console_loglevel */ CONSOLE_LOGLEVEL_DEFAULT, /* default_console_loglevel */ }; EXPORT_SYMBOL_GPL(console_printk); atomic_t ignore_console_lock_warning __read_mostly = ATOMIC_INIT(0); EXPORT_SYMBOL(ignore_console_lock_warning); EXPORT_TRACEPOINT_SYMBOL_GPL(console); /* * Low level drivers may need that to know if they can schedule in * their unblank() callback or not. So let's export it. */ int oops_in_progress; EXPORT_SYMBOL(oops_in_progress); /* * console_mutex protects console_list updates and console->flags updates. * The flags are synchronized only for consoles that are registered, i.e. * accessible via the console list. */ static DEFINE_MUTEX(console_mutex); /* * console_sem protects updates to console->seq * and also provides serialization for console printing. */ static DEFINE_SEMAPHORE(console_sem, 1); HLIST_HEAD(console_list); EXPORT_SYMBOL_GPL(console_list); DEFINE_STATIC_SRCU(console_srcu); /* * System may need to suppress printk message under certain * circumstances, like after kernel panic happens. */ int __read_mostly suppress_printk; #ifdef CONFIG_LOCKDEP static struct lockdep_map console_lock_dep_map = { .name = "console_lock" }; void lockdep_assert_console_list_lock_held(void) { lockdep_assert_held(&console_mutex); } EXPORT_SYMBOL(lockdep_assert_console_list_lock_held); #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC bool console_srcu_read_lock_is_held(void) { return srcu_read_lock_held(&console_srcu); } EXPORT_SYMBOL(console_srcu_read_lock_is_held); #endif enum devkmsg_log_bits { __DEVKMSG_LOG_BIT_ON = 0, __DEVKMSG_LOG_BIT_OFF, __DEVKMSG_LOG_BIT_LOCK, }; enum devkmsg_log_masks { DEVKMSG_LOG_MASK_ON = BIT(__DEVKMSG_LOG_BIT_ON), DEVKMSG_LOG_MASK_OFF = BIT(__DEVKMSG_LOG_BIT_OFF), DEVKMSG_LOG_MASK_LOCK = BIT(__DEVKMSG_LOG_BIT_LOCK), }; /* Keep both the 'on' and 'off' bits clear, i.e. ratelimit by default: */ #define DEVKMSG_LOG_MASK_DEFAULT 0 static unsigned int __read_mostly devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; static int __control_devkmsg(char *str) { size_t len; if (!str) return -EINVAL; len = str_has_prefix(str, "on"); if (len) { devkmsg_log = DEVKMSG_LOG_MASK_ON; return len; } len = str_has_prefix(str, "off"); if (len) { devkmsg_log = DEVKMSG_LOG_MASK_OFF; return len; } len = str_has_prefix(str, "ratelimit"); if (len) { devkmsg_log = DEVKMSG_LOG_MASK_DEFAULT; return len; } return -EINVAL; } static int __init control_devkmsg(char *str) { if (__control_devkmsg(str) < 0) { pr_warn("printk.devkmsg: bad option string '%s'\n", str); return 1; } /* * Set sysctl string accordingly: */ if (devkmsg_log == DEVKMSG_LOG_MASK_ON) strscpy(devkmsg_log_str, "on"); else if (devkmsg_log == DEVKMSG_LOG_MASK_OFF) strscpy(devkmsg_log_str, "off"); /* else "ratelimit" which is set by default. */ /* * Sysctl cannot change it anymore. The kernel command line setting of * this parameter is to force the setting to be permanent throughout the * runtime of the system. This is a precation measure against userspace * trying to be a smarta** and attempting to change it up on us. */ devkmsg_log |= DEVKMSG_LOG_MASK_LOCK; return 1; } __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) int devkmsg_sysctl_set_loglvl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { char old_str[DEVKMSG_STR_MAX_SIZE]; unsigned int old; int err; if (write) { if (devkmsg_log & DEVKMSG_LOG_MASK_LOCK) return -EINVAL; old = devkmsg_log; strscpy(old_str, devkmsg_log_str); } err = proc_dostring(table, write, buffer, lenp, ppos); if (err) return err; if (write) { err = __control_devkmsg(devkmsg_log_str); /* * Do not accept an unknown string OR a known string with * trailing crap... */ if (err < 0 || (err + 1 != *lenp)) { /* ... and restore old setting. */ devkmsg_log = old; strscpy(devkmsg_log_str, old_str); return -EINVAL; } } return 0; } #endif /* CONFIG_PRINTK && CONFIG_SYSCTL */ /** * console_list_lock - Lock the console list * * For console list or console->flags updates */ void console_list_lock(void) { /* * In unregister_console() and console_force_preferred_locked(), * synchronize_srcu() is called with the console_list_lock held. * Therefore it is not allowed that the console_list_lock is taken * with the srcu_lock held. * * Detecting if this context is really in the read-side critical * section is only possible if the appropriate debug options are * enabled. */ WARN_ON_ONCE(debug_lockdep_rcu_enabled() && srcu_read_lock_held(&console_srcu)); mutex_lock(&console_mutex); } EXPORT_SYMBOL(console_list_lock); /** * console_list_unlock - Unlock the console list * * Counterpart to console_list_lock() */ void console_list_unlock(void) { mutex_unlock(&console_mutex); } EXPORT_SYMBOL(console_list_unlock); /** * console_srcu_read_lock - Register a new reader for the * SRCU-protected console list * * Use for_each_console_srcu() to iterate the console list * * Context: Any context. * Return: A cookie to pass to console_srcu_read_unlock(). */ int console_srcu_read_lock(void) __acquires(&console_srcu) { return srcu_read_lock_nmisafe(&console_srcu); } EXPORT_SYMBOL(console_srcu_read_lock); /** * console_srcu_read_unlock - Unregister an old reader from * the SRCU-protected console list * @cookie: cookie returned from console_srcu_read_lock() * * Counterpart to console_srcu_read_lock() */ void console_srcu_read_unlock(int cookie) __releases(&console_srcu) { srcu_read_unlock_nmisafe(&console_srcu, cookie); } EXPORT_SYMBOL(console_srcu_read_unlock); /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. */ #define down_console_sem() do { \ down(&console_sem);\ mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ } while (0) static int __down_trylock_console_sem(unsigned long ip) { int lock_failed; unsigned long flags; /* * Here and in __up_console_sem() we need to be in safe mode, * because spindump/WARN/etc from under console ->lock will * deadlock in printk()->down_trylock_console_sem() otherwise. */ printk_safe_enter_irqsave(flags); lock_failed = down_trylock(&console_sem); printk_safe_exit_irqrestore(flags); if (lock_failed) return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; } #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) static void __up_console_sem(unsigned long ip) { unsigned long flags; mutex_release(&console_lock_dep_map, ip); printk_safe_enter_irqsave(flags); up(&console_sem); printk_safe_exit_irqrestore(flags); } #define up_console_sem() __up_console_sem(_RET_IP_) static bool panic_in_progress(void) { return unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID); } /* Return true if a panic is in progress on the current CPU. */ bool this_cpu_in_panic(void) { /* * We can use raw_smp_processor_id() here because it is impossible for * the task to be migrated to the panic_cpu, or away from it. If * panic_cpu has already been set, and we're not currently executing on * that CPU, then we never will be. */ return unlikely(atomic_read(&panic_cpu) == raw_smp_processor_id()); } /* * Return true if a panic is in progress on a remote CPU. * * On true, the local CPU should immediately release any printing resources * that may be needed by the panic CPU. */ bool other_cpu_in_panic(void) { return (panic_in_progress() && !this_cpu_in_panic()); } /* * This is used for debugging the mess that is the VT code by * keeping track if we have the console semaphore held. It's * definitely not the perfect debug tool (we don't know if _WE_ * hold it and are racing, but it helps tracking those weird code * paths in the console code where we end up in places I want * locked without the console semaphore held). */ static int console_locked; /* * Array of consoles built from command line options (console=) */ #define MAX_CMDLINECONSOLES 8 static struct console_cmdline console_cmdline[MAX_CMDLINECONSOLES]; static int preferred_console = -1; int console_set_on_cmdline; EXPORT_SYMBOL(console_set_on_cmdline); /* Flag: console code may call schedule() */ static int console_may_schedule; enum con_msg_format_flags { MSG_FORMAT_DEFAULT = 0, MSG_FORMAT_SYSLOG = (1 << 0), }; static int console_msg_format = MSG_FORMAT_DEFAULT; /* * The printk log buffer consists of a sequenced collection of records, each * containing variable length message text. Every record also contains its * own meta-data (@info). * * Every record meta-data carries the timestamp in microseconds, as well as * the standard userspace syslog level and syslog facility. The usual kernel * messages use LOG_KERN; userspace-injected messages always carry a matching * syslog facility, by default LOG_USER. The origin of every message can be * reliably determined that way. * * The human readable log message of a record is available in @text, the * length of the message text in @text_len. The stored message is not * terminated. * * Optionally, a record can carry a dictionary of properties (key/value * pairs), to provide userspace with a machine-readable message context. * * Examples for well-defined, commonly used property names are: * DEVICE=b12:8 device identifier * b12:8 block dev_t * c127:3 char dev_t * n8 netdev ifindex * +sound:card0 subsystem:devname * SUBSYSTEM=pci driver-core subsystem name * * Valid characters in property names are [a-zA-Z0-9.-_]. Property names * and values are terminated by a '\0' character. * * Example of record values: * record.text_buf = "it's a line" (unterminated) * record.info.seq = 56 * record.info.ts_nsec = 36863 * record.info.text_len = 11 * record.info.facility = 0 (LOG_KERN) * record.info.flags = 0 * record.info.level = 3 (LOG_ERR) * record.info.caller_id = 299 (task 299) * record.info.dev_info.subsystem = "pci" (terminated) * record.info.dev_info.device = "+pci:0000:00:01.0" (terminated) * * The 'struct printk_info' buffer must never be directly exported to * userspace, it is a kernel-private implementation detail that might * need to be changed in the future, when the requirements change. * * /dev/kmsg exports the structured data in the following line format: * "<level>,<sequnum>,<timestamp>,<contflag>[,additional_values, ... ];<message text>\n" * * Users of the export format should ignore possible additional values * separated by ',', and find the message after the ';' character. * * The optional key/value pairs are attached as continuation lines starting * with a space character and terminated by a newline. All possible * non-prinatable characters are escaped in the "\xff" notation. */ /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); /* * Specifies if a legacy console is registered. If legacy consoles are * present, it is necessary to perform the console lock/unlock dance * whenever console flushing should occur. */ bool have_legacy_console; /* * Specifies if an nbcon console is registered. If nbcon consoles are present, * synchronous printing of legacy consoles will not occur during panic until * the backtrace has been stored to the ringbuffer. */ bool have_nbcon_console; /* * Specifies if a boot console is registered. If boot consoles are present, * nbcon consoles cannot print simultaneously and must be synchronized by * the console lock. This is because boot consoles and nbcon consoles may * have mapped the same hardware. */ bool have_boot_console; /* See printk_legacy_allow_panic_sync() for details. */ bool legacy_allow_panic_sync; #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); static DECLARE_WAIT_QUEUE_HEAD(legacy_wait); /* All 3 protected by @syslog_lock. */ /* the next printk record to read by syslog(READ) or /proc/kmsg */ static u64 syslog_seq; static size_t syslog_partial; static bool syslog_time; /* True when _all_ printer threads are available for printing. */ bool printk_kthreads_running; struct latched_seq { seqcount_latch_t latch; u64 val[2]; }; /* * The next printk record to read after the last 'clear' command. There are * two copies (updated with seqcount_latch) so that reads can locklessly * access a valid value. Writers are synchronized by @syslog_lock. */ static struct latched_seq clear_seq = { .latch = SEQCNT_LATCH_ZERO(clear_seq.latch), .val[0] = 0, .val[1] = 0, }; #define LOG_LEVEL(v) ((v) & 0x07) #define LOG_FACILITY(v) ((v) >> 3 & 0xff) /* record buffer */ #define LOG_ALIGN __alignof__(unsigned long) #define __LOG_BUF_LEN (1 << CONFIG_LOG_BUF_SHIFT) #define LOG_BUF_LEN_MAX ((u32)1 << 31) static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; /* * Define the average message size. This only affects the number of * descriptors that will be available. Underestimating is better than * overestimating (too many available descriptors is better than not enough). */ #define PRB_AVGBITS 5 /* 32 character average length */ #if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS #error CONFIG_LOG_BUF_SHIFT value too small. #endif _DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS, PRB_AVGBITS, &__log_buf[0]); static struct printk_ringbuffer printk_rb_dynamic; struct printk_ringbuffer *prb = &printk_rb_static; /* * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before * per_cpu_areas are initialised. This variable is set to true when * it's safe to access per-CPU data. */ static bool __printk_percpu_data_ready __ro_after_init; bool printk_percpu_data_ready(void) { return __printk_percpu_data_ready; } /* Must be called under syslog_lock. */ static void latched_seq_write(struct latched_seq *ls, u64 val) { write_seqcount_latch_begin(&ls->latch); ls->val[0] = val; write_seqcount_latch(&ls->latch); ls->val[1] = val; write_seqcount_latch_end(&ls->latch); } /* Can be called from any context. */ static u64 latched_seq_read_nolock(struct latched_seq *ls) { unsigned int seq; unsigned int idx; u64 val; do { seq = read_seqcount_latch(&ls->latch); idx = seq & 0x1; val = ls->val[idx]; } while (read_seqcount_latch_retry(&ls->latch, seq)); return val; } /* Return log buffer address */ char *log_buf_addr_get(void) { return log_buf; } /* Return log buffer size */ u32 log_buf_len_get(void) { return log_buf_len; } /* * Define how much of the log buffer we could take at maximum. The value * must be greater than two. Note that only half of the buffer is available * when the index points to the middle. */ #define MAX_LOG_TAKE_PART 4 static const char trunc_msg[] = "<truncated>"; static void truncate_msg(u16 *text_len, u16 *trunc_msg_len) { /* * The message should not take the whole buffer. Otherwise, it might * get removed too soon. */ u32 max_text_len = log_buf_len / MAX_LOG_TAKE_PART; if (*text_len > max_text_len) *text_len = max_text_len; /* enable the warning message (if there is room) */ *trunc_msg_len = strlen(trunc_msg); if (*text_len >= *trunc_msg_len) *text_len -= *trunc_msg_len; else *trunc_msg_len = 0; } int dmesg_restrict = IS_ENABLED(CONFIG_SECURITY_DMESG_RESTRICT); static int syslog_action_restricted(int type) { if (dmesg_restrict) return 1; /* * Unless restricted, we allow "read all" and "get buffer size" * for everybody. */ return type != SYSLOG_ACTION_READ_ALL && type != SYSLOG_ACTION_SIZE_BUFFER; } static int check_syslog_permissions(int type, int source) { /* * If this is from /proc/kmsg and we've already opened it, then we've * already done the capabilities checks at open time. */ if (source == SYSLOG_FROM_PROC && type != SYSLOG_ACTION_OPEN) goto ok; if (syslog_action_restricted(type)) { if (capable(CAP_SYSLOG)) goto ok; return -EPERM; } ok: return security_syslog(type); } static void append_char(char **pp, char *e, char c) { if (*pp < e) *(*pp)++ = c; } static ssize_t info_print_ext_header(char *buf, size_t size, struct printk_info *info) { u64 ts_usec = info->ts_nsec; char caller[20]; #ifdef CONFIG_PRINTK_CALLER u32 id = info->caller_id; snprintf(caller, sizeof(caller), ",caller=%c%u", id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); #else caller[0] = '\0'; #endif do_div(ts_usec, 1000); return scnprintf(buf, size, "%u,%llu,%llu,%c%s;", (info->facility << 3) | info->level, info->seq, ts_usec, info->flags & LOG_CONT ? 'c' : '-', caller); } static ssize_t msg_add_ext_text(char *buf, size_t size, const char *text, size_t text_len, unsigned char endc) { char *p = buf, *e = buf + size; size_t i; /* escape non-printable characters */ for (i = 0; i < text_len; i++) { unsigned char c = text[i]; if (c < ' ' || c >= 127 || c == '\\') p += scnprintf(p, e - p, "\\x%02x", c); else append_char(&p, e, c); } append_char(&p, e, endc); return p - buf; } static ssize_t msg_add_dict_text(char *buf, size_t size, const char *key, const char *val) { size_t val_len = strlen(val); ssize_t len; if (!val_len) return 0; len = msg_add_ext_text(buf, size, "", 0, ' '); /* dict prefix */ len += msg_add_ext_text(buf + len, size - len, key, strlen(key), '='); len += msg_add_ext_text(buf + len, size - len, val, val_len, '\n'); return len; } static ssize_t msg_print_ext_body(char *buf, size_t size, char *text, size_t text_len, struct dev_printk_info *dev_info) { ssize_t len; len = msg_add_ext_text(buf, size, text, text_len, '\n'); if (!dev_info) goto out; len += msg_add_dict_text(buf + len, size - len, "SUBSYSTEM", dev_info->subsystem); len += msg_add_dict_text(buf + len, size - len, "DEVICE", dev_info->device); out: return len; } /* /dev/kmsg - userspace message inject/listen interface */ struct devkmsg_user { atomic64_t seq; struct ratelimit_state rs; struct mutex lock; struct printk_buffers pbufs; }; static __printf(3, 4) __cold int devkmsg_emit(int facility, int level, const char *fmt, ...) { va_list args; int r; va_start(args, fmt); r = vprintk_emit(facility, level, NULL, fmt, args); va_end(args); return r; } static ssize_t devkmsg_write(struct kiocb *iocb, struct iov_iter *from) { char *buf, *line; int level = default_message_loglevel; int facility = 1; /* LOG_USER */ struct file *file = iocb->ki_filp; struct devkmsg_user *user = file->private_data; size_t len = iov_iter_count(from); ssize_t ret = len; if (len > PRINTKRB_RECORD_MAX) return -EINVAL; /* Ignore when user logging is disabled. */ if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) return len; /* Ratelimit when not explicitly enabled. */ if (!(devkmsg_log & DEVKMSG_LOG_MASK_ON)) { if (!___ratelimit(&user->rs, current->comm)) return ret; } buf = kmalloc(len+1, GFP_KERNEL); if (buf == NULL) return -ENOMEM; buf[len] = '\0'; if (!copy_from_iter_full(buf, len, from)) { kfree(buf); return -EFAULT; } /* * Extract and skip the syslog prefix <[0-9]*>. Coming from userspace * the decimal value represents 32bit, the lower 3 bit are the log * level, the rest are the log facility. * * If no prefix or no userspace facility is specified, we * enforce LOG_USER, to be able to reliably distinguish * kernel-generated messages from userspace-injected ones. */ line = buf; if (line[0] == '<') { char *endp = NULL; unsigned int u; u = simple_strtoul(line + 1, &endp, 10); if (endp && endp[0] == '>') { level = LOG_LEVEL(u); if (LOG_FACILITY(u) != 0) facility = LOG_FACILITY(u); endp++; line = endp; } } devkmsg_emit(facility, level, "%s", line); kfree(buf); return ret; } static ssize_t devkmsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct devkmsg_user *user = file->private_data; char *outbuf = &user->pbufs.outbuf[0]; struct printk_message pmsg = { .pbufs = &user->pbufs, }; ssize_t ret; ret = mutex_lock_interruptible(&user->lock); if (ret) return ret; if (!printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)) { if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; goto out; } /* * Guarantee this task is visible on the waitqueue before * checking the wake condition. * * The full memory barrier within set_current_state() of * prepare_to_wait_event() pairs with the full memory barrier * within wq_has_sleeper(). * * This pairs with __wake_up_klogd:A. */ ret = wait_event_interruptible(log_wait, printk_get_next_message(&pmsg, atomic64_read(&user->seq), true, false)); /* LMM(devkmsg_read:A) */ if (ret) goto out; } if (pmsg.dropped) { /* our last seen message is gone, return error and reset */ atomic64_set(&user->seq, pmsg.seq); ret = -EPIPE; goto out; } atomic64_set(&user->seq, pmsg.seq + 1); if (pmsg.outbuf_len > count) { ret = -EINVAL; goto out; } if (copy_to_user(buf, outbuf, pmsg.outbuf_len)) { ret = -EFAULT; goto out; } ret = pmsg.outbuf_len; out: mutex_unlock(&user->lock); return ret; } /* * Be careful when modifying this function!!! * * Only few operations are supported because the device works only with the * entire variable length messages (records). Non-standard values are * returned in the other cases and has been this way for quite some time. * User space applications might depend on this behavior. */ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) { struct devkmsg_user *user = file->private_data; loff_t ret = 0; if (offset) return -ESPIPE; switch (whence) { case SEEK_SET: /* the first record */ atomic64_set(&user->seq, prb_first_valid_seq(prb)); break; case SEEK_DATA: /* * The first record after the last SYSLOG_ACTION_CLEAR, * like issued by 'dmesg -c'. Reading /dev/kmsg itself * changes no global state, and does not clear anything. */ atomic64_set(&user->seq, latched_seq_read_nolock(&clear_seq)); break; case SEEK_END: /* after the last record */ atomic64_set(&user->seq, prb_next_seq(prb)); break; default: ret = -EINVAL; } return ret; } static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; struct printk_info info; __poll_t ret = 0; poll_wait(file, &log_wait, wait); if (prb_read_valid_info(prb, atomic64_read(&user->seq), &info, NULL)) { /* return error when data has vanished underneath us */ if (info.seq != atomic64_read(&user->seq)) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; } return ret; } static int devkmsg_open(struct inode *inode, struct file *file) { struct devkmsg_user *user; int err; if (devkmsg_log & DEVKMSG_LOG_MASK_OFF) return -EPERM; /* write-only does not need any file context */ if ((file->f_flags & O_ACCMODE) != O_WRONLY) { err = check_syslog_permissions(SYSLOG_ACTION_READ_ALL, SYSLOG_FROM_READER); if (err) return err; } user = kvmalloc(sizeof(struct devkmsg_user), GFP_KERNEL); if (!user) return -ENOMEM; ratelimit_default_init(&user->rs); ratelimit_set_flags(&user->rs, RATELIMIT_MSG_ON_RELEASE); mutex_init(&user->lock); atomic64_set(&user->seq, prb_first_valid_seq(prb)); file->private_data = user; return 0; } static int devkmsg_release(struct inode *inode, struct file *file) { struct devkmsg_user *user = file->private_data; ratelimit_state_exit(&user->rs); mutex_destroy(&user->lock); kvfree(user); return 0; } const struct file_operations kmsg_fops = { .open = devkmsg_open, .read = devkmsg_read, .write_iter = devkmsg_write, .llseek = devkmsg_llseek, .poll = devkmsg_poll, .release = devkmsg_release, }; #ifdef CONFIG_VMCORE_INFO /* * This appends the listed symbols to /proc/vmcore * * /proc/vmcore is used by various utilities, like crash and makedumpfile to * obtain access to symbols that are otherwise very difficult to locate. These * symbols are specifically used so that utilities can access and extract the * dmesg log from a vmcore file after a crash. */ void log_buf_vmcoreinfo_setup(void) { struct dev_printk_info *dev_info = NULL; VMCOREINFO_SYMBOL(prb); VMCOREINFO_SYMBOL(printk_rb_static); VMCOREINFO_SYMBOL(clear_seq); /* * Export struct size and field offsets. User space tools can * parse it and detect any changes to structure down the line. */ VMCOREINFO_STRUCT_SIZE(printk_ringbuffer); VMCOREINFO_OFFSET(printk_ringbuffer, desc_ring); VMCOREINFO_OFFSET(printk_ringbuffer, text_data_ring); VMCOREINFO_OFFSET(printk_ringbuffer, fail); VMCOREINFO_STRUCT_SIZE(prb_desc_ring); VMCOREINFO_OFFSET(prb_desc_ring, count_bits); VMCOREINFO_OFFSET(prb_desc_ring, descs); VMCOREINFO_OFFSET(prb_desc_ring, infos); VMCOREINFO_OFFSET(prb_desc_ring, head_id); VMCOREINFO_OFFSET(prb_desc_ring, tail_id); VMCOREINFO_STRUCT_SIZE(prb_desc); VMCOREINFO_OFFSET(prb_desc, state_var); VMCOREINFO_OFFSET(prb_desc, text_blk_lpos); VMCOREINFO_STRUCT_SIZE(prb_data_blk_lpos); VMCOREINFO_OFFSET(prb_data_blk_lpos, begin); VMCOREINFO_OFFSET(prb_data_blk_lpos, next); VMCOREINFO_STRUCT_SIZE(printk_info); VMCOREINFO_OFFSET(printk_info, seq); VMCOREINFO_OFFSET(printk_info, ts_nsec); VMCOREINFO_OFFSET(printk_info, text_len); VMCOREINFO_OFFSET(printk_info, caller_id); VMCOREINFO_OFFSET(printk_info, dev_info); VMCOREINFO_STRUCT_SIZE(dev_printk_info); VMCOREINFO_OFFSET(dev_printk_info, subsystem); VMCOREINFO_LENGTH(printk_info_subsystem, sizeof(dev_info->subsystem)); VMCOREINFO_OFFSET(dev_printk_info, device); VMCOREINFO_LENGTH(printk_info_device, sizeof(dev_info->device)); VMCOREINFO_STRUCT_SIZE(prb_data_ring); VMCOREINFO_OFFSET(prb_data_ring, size_bits); VMCOREINFO_OFFSET(prb_data_ring, data); VMCOREINFO_OFFSET(prb_data_ring, head_lpos); VMCOREINFO_OFFSET(prb_data_ring, tail_lpos); VMCOREINFO_SIZE(atomic_long_t); VMCOREINFO_TYPE_OFFSET(atomic_long_t, counter); VMCOREINFO_STRUCT_SIZE(latched_seq); VMCOREINFO_OFFSET(latched_seq, val); } #endif /* requested log_buf_len from kernel cmdline */ static unsigned long __initdata new_log_buf_len; /* we practice scaling the ring buffer by powers of 2 */ static void __init log_buf_len_update(u64 size) { if (size > (u64)LOG_BUF_LEN_MAX) { size = (u64)LOG_BUF_LEN_MAX; pr_err("log_buf over 2G is not supported.\n"); } if (size) size = roundup_pow_of_two(size); if (size > log_buf_len) new_log_buf_len = (unsigned long)size; } /* save requested log_buf_len since it's too early to process it */ static int __init log_buf_len_setup(char *str) { u64 size; if (!str) return -EINVAL; size = memparse(str, &str); log_buf_len_update(size); return 0; } early_param("log_buf_len", log_buf_len_setup); #ifdef CONFIG_SMP #define __LOG_CPU_MAX_BUF_LEN (1 << CONFIG_LOG_CPU_MAX_BUF_SHIFT) static void __init log_buf_add_cpu(void) { unsigned int cpu_extra; /* * archs should set up cpu_possible_bits properly with * set_cpu_possible() after setup_arch() but just in * case lets ensure this is valid. */ if (num_possible_cpus() == 1) return; cpu_extra = (num_possible_cpus() - 1) * __LOG_CPU_MAX_BUF_LEN; /* by default this will only continue through for large > 64 CPUs */ if (cpu_extra <= __LOG_BUF_LEN / 2) return; pr_info("log_buf_len individual max cpu contribution: %d bytes\n", __LOG_CPU_MAX_BUF_LEN); pr_info("log_buf_len total cpu_extra contributions: %d bytes\n", cpu_extra); pr_info("log_buf_len min size: %d bytes\n", __LOG_BUF_LEN); log_buf_len_update(cpu_extra + __LOG_BUF_LEN); } #else /* !CONFIG_SMP */ static inline void log_buf_add_cpu(void) {} #endif /* CONFIG_SMP */ static void __init set_percpu_data_ready(void) { __printk_percpu_data_ready = true; } static unsigned int __init add_to_rb(struct printk_ringbuffer *rb, struct printk_record *r) { struct prb_reserved_entry e; struct printk_record dest_r; prb_rec_init_wr(&dest_r, r->info->text_len); if (!prb_reserve(&e, rb, &dest_r)) return 0; memcpy(&dest_r.text_buf[0], &r->text_buf[0], r->info->text_len); dest_r.info->text_len = r->info->text_len; dest_r.info->facility = r->info->facility; dest_r.info->level = r->info->level; dest_r.info->flags = r->info->flags; dest_r.info->ts_nsec = r->info->ts_nsec; dest_r.info->caller_id = r->info->caller_id; memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info)); prb_final_commit(&e); return prb_record_text_space(&e); } static char setup_text_buf[PRINTKRB_RECORD_MAX] __initdata; static void print_log_buf_usage_stats(void) { unsigned int descs_count = log_buf_len >> PRB_AVGBITS; size_t meta_data_size; meta_data_size = descs_count * (sizeof(struct prb_desc) + sizeof(struct printk_info)); pr_info("log buffer data + meta data: %u + %zu = %zu bytes\n", log_buf_len, meta_data_size, log_buf_len + meta_data_size); } void __init setup_log_buf(int early) { struct printk_info *new_infos; unsigned int new_descs_count; struct prb_desc *new_descs; struct printk_info info; struct printk_record r; unsigned int text_size; size_t new_descs_size; size_t new_infos_size; unsigned long flags; char *new_log_buf; unsigned int free; u64 seq; /* * Some archs call setup_log_buf() multiple times - first is very * early, e.g. from setup_arch(), and second - when percpu_areas * are initialised. */ if (!early) set_percpu_data_ready(); if (log_buf != __log_buf) return; if (!early && !new_log_buf_len) log_buf_add_cpu(); if (!new_log_buf_len) { /* Show the memory stats only once. */ if (!early) goto out; return; } new_descs_count = new_log_buf_len >> PRB_AVGBITS; if (new_descs_count == 0) { pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len); goto out; } new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN); if (unlikely(!new_log_buf)) { pr_err("log_buf_len: %lu text bytes not available\n", new_log_buf_len); goto out; } new_descs_size = new_descs_count * sizeof(struct prb_desc); new_descs = memblock_alloc(new_descs_size, LOG_ALIGN); if (unlikely(!new_descs)) { pr_err("log_buf_len: %zu desc bytes not available\n", new_descs_size); goto err_free_log_buf; } new_infos_size = new_descs_count * sizeof(struct printk_info); new_infos = memblock_alloc(new_infos_size, LOG_ALIGN); if (unlikely(!new_infos)) { pr_err("log_buf_len: %zu info bytes not available\n", new_infos_size); goto err_free_descs; } prb_rec_init_rd(&r, &info, &setup_text_buf[0], sizeof(setup_text_buf)); prb_init(&printk_rb_dynamic, new_log_buf, ilog2(new_log_buf_len), new_descs, ilog2(new_descs_count), new_infos); local_irq_save(flags); log_buf_len = new_log_buf_len; log_buf = new_log_buf; new_log_buf_len = 0; free = __LOG_BUF_LEN; prb_for_each_record(0, &printk_rb_static, seq, &r) { text_size = add_to_rb(&printk_rb_dynamic, &r); if (text_size > free) free = 0; else free -= text_size; } prb = &printk_rb_dynamic; local_irq_restore(flags); /* * Copy any remaining messages that might have appeared from * NMI context after copying but before switching to the * dynamic buffer. */ prb_for_each_record(seq, &printk_rb_static, seq, &r) { text_size = add_to_rb(&printk_rb_dynamic, &r); if (text_size > free) free = 0; else free -= text_size; } if (seq != prb_next_seq(&printk_rb_static)) { pr_err("dropped %llu messages\n", prb_next_seq(&printk_rb_static) - seq); } print_log_buf_usage_stats(); pr_info("early log buf free: %u(%u%%)\n", free, (free * 100) / __LOG_BUF_LEN); return; err_free_descs: memblock_free(new_descs, new_descs_size); err_free_log_buf: memblock_free(new_log_buf, new_log_buf_len); out: print_log_buf_usage_stats(); } static bool __read_mostly ignore_loglevel; static int __init ignore_loglevel_setup(char *str) { ignore_loglevel = true; pr_info("debug: ignoring loglevel setting.\n"); return 0; } early_param("ignore_loglevel", ignore_loglevel_setup); module_param(ignore_loglevel, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ignore_loglevel, "ignore loglevel setting (prints all kernel messages to the console)"); static bool suppress_message_printing(int level) { return (level >= console_loglevel && !ignore_loglevel); } #ifdef CONFIG_BOOT_PRINTK_DELAY static int boot_delay; /* msecs delay after each printk during bootup */ static unsigned long long loops_per_msec; /* based on boot_delay */ static int __init boot_delay_setup(char *str) { unsigned long lpj; lpj = preset_lpj ? preset_lpj : 1000000; /* some guess */ loops_per_msec = (unsigned long long)lpj / 1000 * HZ; get_option(&str, &boot_delay); if (boot_delay > 10 * 1000) boot_delay = 0; pr_debug("boot_delay: %u, preset_lpj: %ld, lpj: %lu, " "HZ: %d, loops_per_msec: %llu\n", boot_delay, preset_lpj, lpj, HZ, loops_per_msec); return 0; } early_param("boot_delay", boot_delay_setup); static void boot_delay_msec(int level) { unsigned long long k; unsigned long timeout; bool suppress = !is_printk_force_console() && suppress_message_printing(level); if ((boot_delay == 0 || system_state >= SYSTEM_RUNNING) || suppress) return; k = (unsigned long long)loops_per_msec * boot_delay; timeout = jiffies + msecs_to_jiffies(boot_delay); while (k) { k--; cpu_relax(); /* * use (volatile) jiffies to prevent * compiler reduction; loop termination via jiffies * is secondary and may or may not happen. */ if (time_after(jiffies, timeout)) break; touch_nmi_watchdog(); } } #else static inline void boot_delay_msec(int level) { } #endif static bool printk_time = IS_ENABLED(CONFIG_PRINTK_TIME); module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR); static size_t print_syslog(unsigned int level, char *buf) { return sprintf(buf, "<%u>", level); } static size_t print_time(u64 ts, char *buf) { unsigned long rem_nsec = do_div(ts, 1000000000); return sprintf(buf, "[%5lu.%06lu]", (unsigned long)ts, rem_nsec / 1000); } #ifdef CONFIG_PRINTK_CALLER static size_t print_caller(u32 id, char *buf) { char caller[12]; snprintf(caller, sizeof(caller), "%c%u", id & 0x80000000 ? 'C' : 'T', id & ~0x80000000); return sprintf(buf, "[%6s]", caller); } #else #define print_caller(id, buf) 0 #endif static size_t info_print_prefix(const struct printk_info *info, bool syslog, bool time, char *buf) { size_t len = 0; if (syslog) len = print_syslog((info->facility << 3) | info->level, buf); if (time) len += print_time(info->ts_nsec, buf + len); len += print_caller(info->caller_id, buf + len); if (IS_ENABLED(CONFIG_PRINTK_CALLER) || time) { buf[len++] = ' '; buf[len] = '\0'; } return len; } /* * Prepare the record for printing. The text is shifted within the given * buffer to avoid a need for another one. The following operations are * done: * * - Add prefix for each line. * - Drop truncated lines that no longer fit into the buffer. * - Add the trailing newline that has been removed in vprintk_store(). * - Add a string terminator. * * Since the produced string is always terminated, the maximum possible * return value is @r->text_buf_size - 1; * * Return: The length of the updated/prepared text, including the added * prefixes and the newline. The terminator is not counted. The dropped * line(s) are not counted. */ static size_t record_print_text(struct printk_record *r, bool syslog, bool time) { size_t text_len = r->info->text_len; size_t buf_size = r->text_buf_size; char *text = r->text_buf; char prefix[PRINTK_PREFIX_MAX]; bool truncated = false; size_t prefix_len; size_t line_len; size_t len = 0; char *next; /* * If the message was truncated because the buffer was not large * enough, treat the available text as if it were the full text. */ if (text_len > buf_size) text_len = buf_size; prefix_len = info_print_prefix(r->info, syslog, time, prefix); /* * @text_len: bytes of unprocessed text * @line_len: bytes of current line _without_ newline * @text: pointer to beginning of current line * @len: number of bytes prepared in r->text_buf */ for (;;) { next = memchr(text, '\n', text_len); if (next) { line_len = next - text; } else { /* Drop truncated line(s). */ if (truncated) break; line_len = text_len; } /* * Truncate the text if there is not enough space to add the * prefix and a trailing newline and a terminator. */ if (len + prefix_len + text_len + 1 + 1 > buf_size) { /* Drop even the current line if no space. */ if (len + prefix_len + line_len + 1 + 1 > buf_size) break; text_len = buf_size - len - prefix_len - 1 - 1; truncated = true; } memmove(text + prefix_len, text, text_len); memcpy(text, prefix, prefix_len); /* * Increment the prepared length to include the text and * prefix that were just moved+copied. Also increment for the * newline at the end of this line. If this is the last line, * there is no newline, but it will be added immediately below. */ len += prefix_len + line_len + 1; if (text_len == line_len) { /* * This is the last line. Add the trailing newline * removed in vprintk_store(). */ text[prefix_len + line_len] = '\n'; break; } /* * Advance beyond the added prefix and the related line with * its newline. */ text += prefix_len + line_len + 1; /* * The remaining text has only decreased by the line with its * newline. * * Note that @text_len can become zero. It happens when @text * ended with a newline (either due to truncation or the * original string ending with "\n\n"). The loop is correctly * repeated and (if not truncated) an empty line with a prefix * will be prepared. */ text_len -= line_len + 1; } /* * If a buffer was provided, it will be terminated. Space for the * string terminator is guaranteed to be available. The terminator is * not counted in the return value. */ if (buf_size > 0) r->text_buf[len] = 0; return len; } static size_t get_record_print_text_size(struct printk_info *info, unsigned int line_count, bool syslog, bool time) { char prefix[PRINTK_PREFIX_MAX]; size_t prefix_len; prefix_len = info_print_prefix(info, syslog, time, prefix); /* * Each line will be preceded with a prefix. The intermediate * newlines are already within the text, but a final trailing * newline will be added. */ return ((prefix_len * line_count) + info->text_len + 1); } /* * Beginning with @start_seq, find the first record where it and all following * records up to (but not including) @max_seq fit into @size. * * @max_seq is simply an upper bound and does not need to exist. If the caller * does not require an upper bound, -1 can be used for @max_seq. */ static u64 find_first_fitting_seq(u64 start_seq, u64 max_seq, size_t size, bool syslog, bool time) { struct printk_info info; unsigned int line_count; size_t len = 0; u64 seq; /* Determine the size of the records up to @max_seq. */ prb_for_each_info(start_seq, prb, seq, &info, &line_count) { if (info.seq >= max_seq) break; len += get_record_print_text_size(&info, line_count, syslog, time); } /* * Adjust the upper bound for the next loop to avoid subtracting * lengths that were never added. */ if (seq < max_seq) max_seq = seq; /* * Move first record forward until length fits into the buffer. Ignore * newest messages that were not counted in the above cycle. Messages * might appear and get lost in the meantime. This is a best effort * that prevents an infinite loop that could occur with a retry. */ prb_for_each_info(start_seq, prb, seq, &info, &line_count) { if (len <= size || info.seq >= max_seq) break; len -= get_record_print_text_size(&info, line_count, syslog, time); } return seq; } /* The caller is responsible for making sure @size is greater than 0. */ static int syslog_print(char __user *buf, int size) { struct printk_info info; struct printk_record r; char *text; int len = 0; u64 seq; text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); if (!text) return -ENOMEM; prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); mutex_lock(&syslog_lock); /* * Wait for the @syslog_seq record to be available. @syslog_seq may * change while waiting. */ do { seq = syslog_seq; mutex_unlock(&syslog_lock); /* * Guarantee this task is visible on the waitqueue before * checking the wake condition. * * The full memory barrier within set_current_state() of * prepare_to_wait_event() pairs with the full memory barrier * within wq_has_sleeper(). * * This pairs with __wake_up_klogd:A. */ len = wait_event_interruptible(log_wait, prb_read_valid(prb, seq, NULL)); /* LMM(syslog_print:A) */ mutex_lock(&syslog_lock); if (len) goto out; } while (syslog_seq != seq); /* * Copy records that fit into the buffer. The above cycle makes sure * that the first record is always available. */ do { size_t n; size_t skip; int err; if (!prb_read_valid(prb, syslog_seq, &r)) break; if (r.info->seq != syslog_seq) { /* message is gone, move to next valid one */ syslog_seq = r.info->seq; syslog_partial = 0; } /* * To keep reading/counting partial line consistent, * use printk_time value as of the beginning of a line. */ if (!syslog_partial) syslog_time = printk_time; skip = syslog_partial; n = record_print_text(&r, true, syslog_time); if (n - syslog_partial <= size) { /* message fits into buffer, move forward */ syslog_seq = r.info->seq + 1; n -= syslog_partial; syslog_partial = 0; } else if (!len){ /* partial read(), remember position */ n = size; syslog_partial += n; } else n = 0; if (!n) break; mutex_unlock(&syslog_lock); err = copy_to_user(buf, text + skip, n); mutex_lock(&syslog_lock); if (err) { if (!len) len = -EFAULT; break; } len += n; size -= n; buf += n; } while (size); out: mutex_unlock(&syslog_lock); kfree(text); return len; } static int syslog_print_all(char __user *buf, int size, bool clear) { struct printk_info info; struct printk_record r; char *text; int len = 0; u64 seq; bool time; text = kmalloc(PRINTK_MESSAGE_MAX, GFP_KERNEL); if (!text) return -ENOMEM; time = printk_time; /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. */ seq = find_first_fitting_seq(latched_seq_read_nolock(&clear_seq), -1, size, true, time); prb_rec_init_rd(&r, &info, text, PRINTK_MESSAGE_MAX); prb_for_each_record(seq, prb, seq, &r) { int textlen; textlen = record_print_text(&r, true, time); if (len + textlen > size) { seq--; break; } if (copy_to_user(buf + len, text, textlen)) len = -EFAULT; else len += textlen; if (len < 0) break; } if (clear) { mutex_lock(&syslog_lock); latched_seq_write(&clear_seq, seq); mutex_unlock(&syslog_lock); } kfree(text); return len; } static void syslog_clear(void) { mutex_lock(&syslog_lock); latched_seq_write(&clear_seq, prb_next_seq(prb)); mutex_unlock(&syslog_lock); } int do_syslog(int type, char __user *buf, int len, int source) { struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; error = check_syslog_permissions(type, source); if (error) return error; switch (type) { case SYSLOG_ACTION_CLOSE: /* Close log */ break; case SYSLOG_ACTION_OPEN: /* Open log */ break; case SYSLOG_ACTION_READ: /* Read from log */ if (!buf || len < 0) return -EINVAL; if (!len) return 0; if (!access_ok(buf, len)) return -EFAULT; error = syslog_print(buf, len); break; /* Read/clear last kernel messages */ case SYSLOG_ACTION_READ_CLEAR: clear = true; fallthrough; /* Read last kernel messages */ case SYSLOG_ACTION_READ_ALL: if (!buf || len < 0) return -EINVAL; if (!len) return 0; if (!access_ok(buf, len)) return -EFAULT; error = syslog_print_all(buf, len, clear); break; /* Clear ring buffer */ case SYSLOG_ACTION_CLEAR: syslog_clear(); break; /* Disable logging to console */ case SYSLOG_ACTION_CONSOLE_OFF: if (saved_console_loglevel == LOGLEVEL_DEFAULT) saved_console_loglevel = console_loglevel; console_loglevel = minimum_console_loglevel; break; /* Enable logging to console */ case SYSLOG_ACTION_CONSOLE_ON: if (saved_console_loglevel != LOGLEVEL_DEFAULT) { console_loglevel = saved_console_loglevel; saved_console_loglevel = LOGLEVEL_DEFAULT; } break; /* Set level of messages printed to console */ case SYSLOG_ACTION_CONSOLE_LEVEL: if (len < 1 || len > 8) return -EINVAL; if (len < minimum_console_loglevel) len = minimum_console_loglevel; console_loglevel = len; /* Implicitly re-enable logging to console */ saved_console_loglevel = LOGLEVEL_DEFAULT; break; /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: mutex_lock(&syslog_lock); if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { /* No unread messages. */ mutex_unlock(&syslog_lock); return 0; } if (info.seq != syslog_seq) { /* messages are gone, move to first one */ syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { /* * Short-cut for poll(/"proc/kmsg") which simply checks * for pending data, not the size; return the count of * records, not the length. */ error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; unsigned int line_count; u64 seq; prb_for_each_info(syslog_seq, prb, seq, &info, &line_count) { error += get_record_print_text_size(&info, line_count, true, time); time = printk_time; } error -= syslog_partial; } mutex_unlock(&syslog_lock); break; /* Size of the log buffer */ case SYSLOG_ACTION_SIZE_BUFFER: error = log_buf_len; break; default: error = -EINVAL; break; } return error; } SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { return do_syslog(type, buf, len, SYSLOG_FROM_READER); } /* * Special console_lock variants that help to reduce the risk of soft-lockups. * They allow to pass console_lock to another printk() call using a busy wait. */ #ifdef CONFIG_LOCKDEP static struct lockdep_map console_owner_dep_map = { .name = "console_owner" }; #endif static DEFINE_RAW_SPINLOCK(console_owner_lock); static struct task_struct *console_owner; static bool console_waiter; /** * console_lock_spinning_enable - mark beginning of code where another * thread might safely busy wait * * This basically converts console_lock into a spinlock. This marks * the section where the console_lock owner can not sleep, because * there may be a waiter spinning (like a spinlock). Also it must be * ready to hand over the lock at the end of the section. */ void console_lock_spinning_enable(void) { /* * Do not use spinning in panic(). The panic CPU wants to keep the lock. * Non-panic CPUs abandon the flush anyway. * * Just keep the lockdep annotation. The panic-CPU should avoid * taking console_owner_lock because it might cause a deadlock. * This looks like the easiest way how to prevent false lockdep * reports without handling races a lockless way. */ if (panic_in_progress()) goto lockdep; raw_spin_lock(&console_owner_lock); console_owner = current; raw_spin_unlock(&console_owner_lock); lockdep: /* The waiter may spin on us after setting console_owner */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); } /** * console_lock_spinning_disable_and_check - mark end of code where another * thread was able to busy wait and check if there is a waiter * @cookie: cookie returned from console_srcu_read_lock() * * This is called at the end of the section where spinning is allowed. * It has two functions. First, it is a signal that it is no longer * safe to start busy waiting for the lock. Second, it checks if * there is a busy waiter and passes the lock rights to her. * * Important: Callers lose both the console_lock and the SRCU read lock if * there was a busy waiter. They must not touch items synchronized by * console_lock or SRCU read lock in this case. * * Return: 1 if the lock rights were passed, 0 otherwise. */ int console_lock_spinning_disable_and_check(int cookie) { int waiter; /* * Ignore spinning waiters during panic() because they might get stopped * or blocked at any time, * * It is safe because nobody is allowed to start spinning during panic * in the first place. If there has been a waiter then non panic CPUs * might stay spinning. They would get stopped anyway. The panic context * will never start spinning and an interrupted spin on panic CPU will * never continue. */ if (panic_in_progress()) { /* Keep lockdep happy. */ spin_release(&console_owner_dep_map, _THIS_IP_); return 0; } raw_spin_lock(&console_owner_lock); waiter = READ_ONCE(console_waiter); console_owner = NULL; raw_spin_unlock(&console_owner_lock); if (!waiter) { spin_release(&console_owner_dep_map, _THIS_IP_); return 0; } /* The waiter is now free to continue */ WRITE_ONCE(console_waiter, false); spin_release(&console_owner_dep_map, _THIS_IP_); /* * Preserve lockdep lock ordering. Release the SRCU read lock before * releasing the console_lock. */ console_srcu_read_unlock(cookie); /* * Hand off console_lock to waiter. The waiter will perform * the up(). After this, the waiter is the console_lock owner. */ mutex_release(&console_lock_dep_map, _THIS_IP_); return 1; } /** * console_trylock_spinning - try to get console_lock by busy waiting * * This allows to busy wait for the console_lock when the current * owner is running in specially marked sections. It means that * the current owner is running and cannot reschedule until it * is ready to lose the lock. * * Return: 1 if we got the lock, 0 othrewise */ static int console_trylock_spinning(void) { struct task_struct *owner = NULL; bool waiter; bool spin = false; unsigned long flags; if (console_trylock()) return 1; /* * It's unsafe to spin once a panic has begun. If we are the * panic CPU, we may have already halted the owner of the * console_sem. If we are not the panic CPU, then we should * avoid taking console_sem, so the panic CPU has a better * chance of cleanly acquiring it later. */ if (panic_in_progress()) return 0; printk_safe_enter_irqsave(flags); raw_spin_lock(&console_owner_lock); owner = READ_ONCE(console_owner); waiter = READ_ONCE(console_waiter); if (!waiter && owner && owner != current) { WRITE_ONCE(console_waiter, true); spin = true; } raw_spin_unlock(&console_owner_lock); /* * If there is an active printk() writing to the * consoles, instead of having it write our data too, * see if we can offload that load from the active * printer, and do some printing ourselves. * Go into a spin only if there isn't already a waiter * spinning, and there is an active printer, and * that active printer isn't us (recursive printk?). */ if (!spin) { printk_safe_exit_irqrestore(flags); return 0; } /* We spin waiting for the owner to release us */ spin_acquire(&console_owner_dep_map, 0, 0, _THIS_IP_); /* Owner will clear console_waiter on hand off */ while (READ_ONCE(console_waiter)) cpu_relax(); spin_release(&console_owner_dep_map, _THIS_IP_); printk_safe_exit_irqrestore(flags); /* * The owner passed the console lock to us. * Since we did not spin on console lock, annotate * this as a trylock. Otherwise lockdep will * complain. */ mutex_acquire(&console_lock_dep_map, 0, 1, _THIS_IP_); /* * Update @console_may_schedule for trylock because the previous * owner may have been schedulable. */ console_may_schedule = 0; return 1; } /* * Recursion is tracked separately on each CPU. If NMIs are supported, an * additional NMI context per CPU is also separately tracked. Until per-CPU * is available, a separate "early tracking" is performed. */ static DEFINE_PER_CPU(u8, printk_count); static u8 printk_count_early; #ifdef CONFIG_HAVE_NMI static DEFINE_PER_CPU(u8, printk_count_nmi); static u8 printk_count_nmi_early; #endif /* * Recursion is limited to keep the output sane. printk() should not require * more than 1 level of recursion (allowing, for example, printk() to trigger * a WARN), but a higher value is used in case some printk-internal errors * exist, such as the ringbuffer validation checks failing. */ #define PRINTK_MAX_RECURSION 3 /* * Return a pointer to the dedicated counter for the CPU+context of the * caller. */ static u8 *__printk_recursion_counter(void) { #ifdef CONFIG_HAVE_NMI if (in_nmi()) { if (printk_percpu_data_ready()) return this_cpu_ptr(&printk_count_nmi); return &printk_count_nmi_early; } #endif if (printk_percpu_data_ready()) return this_cpu_ptr(&printk_count); return &printk_count_early; } /* * Enter recursion tracking. Interrupts are disabled to simplify tracking. * The caller must check the boolean return value to see if the recursion is * allowed. On failure, interrupts are not disabled. * * @recursion_ptr must be a variable of type (u8 *) and is the same variable * that is passed to printk_exit_irqrestore(). */ #define printk_enter_irqsave(recursion_ptr, flags) \ ({ \ bool success = true; \ \ typecheck(u8 *, recursion_ptr); \ local_irq_save(flags); \ (recursion_ptr) = __printk_recursion_counter(); \ if (*(recursion_ptr) > PRINTK_MAX_RECURSION) { \ local_irq_restore(flags); \ success = false; \ } else { \ (*(recursion_ptr))++; \ } \ success; \ }) /* Exit recursion tracking, restoring interrupts. */ #define printk_exit_irqrestore(recursion_ptr, flags) \ do { \ typecheck(u8 *, recursion_ptr); \ (*(recursion_ptr))--; \ local_irq_restore(flags); \ } while (0) int printk_delay_msec __read_mostly; static inline void printk_delay(int level) { boot_delay_msec(level); if (unlikely(printk_delay_msec)) { int m = printk_delay_msec; while (m--) { mdelay(1); touch_nmi_watchdog(); } } } static inline u32 printk_caller_id(void) { return in_task() ? task_pid_nr(current) : 0x80000000 + smp_processor_id(); } /** * printk_parse_prefix - Parse level and control flags. * * @text: The terminated text message. * @level: A pointer to the current level value, will be updated. * @flags: A pointer to the current printk_info flags, will be updated. * * @level may be NULL if the caller is not interested in the parsed value. * Otherwise the variable pointed to by @level must be set to * LOGLEVEL_DEFAULT in order to be updated with the parsed value. * * @flags may be NULL if the caller is not interested in the parsed value. * Otherwise the variable pointed to by @flags will be OR'd with the parsed * value. * * Return: The length of the parsed level and control flags. */ u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags) { u16 prefix_len = 0; int kern_level; while (*text) { kern_level = printk_get_level(text); if (!kern_level) break; switch (kern_level) { case '0' ... '7': if (level && *level == LOGLEVEL_DEFAULT) *level = kern_level - '0'; break; case 'c': /* KERN_CONT */ if (flags) *flags |= LOG_CONT; } prefix_len += 2; text += 2; } return prefix_len; } __printf(5, 0) static u16 printk_sprint(char *text, u16 size, int facility, enum printk_info_flags *flags, const char *fmt, va_list args) { u16 text_len; text_len = vscnprintf(text, size, fmt, args); /* Mark and strip a trailing newline. */ if (text_len && text[text_len - 1] == '\n') { text_len--; *flags |= LOG_NEWLINE; } /* Strip log level and control flags. */ if (facility == 0) { u16 prefix_len; prefix_len = printk_parse_prefix(text, NULL, NULL); if (prefix_len) { text_len -= prefix_len; memmove(text, text + prefix_len, text_len); } } trace_console(text, text_len); return text_len; } __printf(4, 0) int vprintk_store(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { struct prb_reserved_entry e; enum printk_info_flags flags = 0; struct printk_record r; unsigned long irqflags; u16 trunc_msg_len = 0; char prefix_buf[8]; u8 *recursion_ptr; u16 reserve_size; va_list args2; u32 caller_id; u16 text_len; int ret = 0; u64 ts_nsec; if (!printk_enter_irqsave(recursion_ptr, irqflags)) return 0; /* * Since the duration of printk() can vary depending on the message * and state of the ringbuffer, grab the timestamp now so that it is * close to the call of printk(). This provides a more deterministic * timestamp with respect to the caller. */ ts_nsec = local_clock(); caller_id = printk_caller_id(); /* * The sprintf needs to come first since the syslog prefix might be * passed in as a parameter. An extra byte must be reserved so that * later the vscnprintf() into the reserved buffer has room for the * terminating '\0', which is not counted by vsnprintf(). */ va_copy(args2, args); reserve_size = vsnprintf(&prefix_buf[0], sizeof(prefix_buf), fmt, args2) + 1; va_end(args2); if (reserve_size > PRINTKRB_RECORD_MAX) reserve_size = PRINTKRB_RECORD_MAX; /* Extract log level or control flags. */ if (facility == 0) printk_parse_prefix(&prefix_buf[0], &level, &flags); if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; if (dev_info) flags |= LOG_NEWLINE; if (is_printk_force_console()) flags |= LOG_FORCE_CON; if (flags & LOG_CONT) { prb_rec_init_wr(&r, reserve_size); if (prb_reserve_in_last(&e, prb, &r, caller_id, PRINTKRB_RECORD_MAX)) { text_len = printk_sprint(&r.text_buf[r.info->text_len], reserve_size, facility, &flags, fmt, args); r.info->text_len += text_len; if (flags & LOG_FORCE_CON) r.info->flags |= LOG_FORCE_CON; if (flags & LOG_NEWLINE) { r.info->flags |= LOG_NEWLINE; prb_final_commit(&e); } else { prb_commit(&e); } ret = text_len; goto out; } } /* * Explicitly initialize the record before every prb_reserve() call. * prb_reserve_in_last() and prb_reserve() purposely invalidate the * structure when they fail. */ prb_rec_init_wr(&r, reserve_size); if (!prb_reserve(&e, prb, &r)) { /* truncate the message if it is too long for empty buffer */ truncate_msg(&reserve_size, &trunc_msg_len); prb_rec_init_wr(&r, reserve_size + trunc_msg_len); if (!prb_reserve(&e, prb, &r)) goto out; } /* fill message */ text_len = printk_sprint(&r.text_buf[0], reserve_size, facility, &flags, fmt, args); if (trunc_msg_len) memcpy(&r.text_buf[text_len], trunc_msg, trunc_msg_len); r.info->text_len = text_len + trunc_msg_len; r.info->facility = facility; r.info->level = level & 7; r.info->flags = flags & 0x1f; r.info->ts_nsec = ts_nsec; r.info->caller_id = caller_id; if (dev_info) memcpy(&r.info->dev_info, dev_info, sizeof(r.info->dev_info)); /* A message without a trailing newline can be continued. */ if (!(flags & LOG_NEWLINE)) prb_commit(&e); else prb_final_commit(&e); ret = text_len + trunc_msg_len; out: printk_exit_irqrestore(recursion_ptr, irqflags); return ret; } /* * This acts as a one-way switch to allow legacy consoles to print from * the printk() caller context on a panic CPU. It also attempts to flush * the legacy consoles in this context. */ void printk_legacy_allow_panic_sync(void) { struct console_flush_type ft; legacy_allow_panic_sync = true; printk_get_console_flush_type(&ft); if (ft.legacy_direct) { if (console_trylock()) console_unlock(); } } asmlinkage int vprintk_emit(int facility, int level, const struct dev_printk_info *dev_info, const char *fmt, va_list args) { struct console_flush_type ft; int printed_len; /* Suppress unimportant messages after panic happens */ if (unlikely(suppress_printk)) return 0; /* * The messages on the panic CPU are the most important. If * non-panic CPUs are generating any messages, they will be * silently dropped. */ if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; printk_get_console_flush_type(&ft); /* If called from the scheduler, we can not call up(). */ if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; ft.legacy_offload |= ft.legacy_direct; ft.legacy_direct = false; } printk_delay(level); printed_len = vprintk_store(facility, level, dev_info, fmt, args); if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); if (ft.nbcon_offload) nbcon_kthreads_wake(); if (ft.legacy_direct) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during * printing of all remaining records to all consoles so that * this context can return as soon as possible. Hopefully * another printk() caller will take over the printing. */ preempt_disable(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers. With the * spinning variant, this context tries to take over the * printing from another printing context. */ if (console_trylock_spinning()) console_unlock(); preempt_enable(); } if (ft.legacy_offload) defer_console_output(); else wake_up_klogd(); return printed_len; } EXPORT_SYMBOL(vprintk_emit); int vprintk_default(const char *fmt, va_list args) { return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, fmt, args); } EXPORT_SYMBOL_GPL(vprintk_default); asmlinkage __visible int _printk(const char *fmt, ...) { va_list args; int r; va_start(args, fmt); r = vprintk(fmt, args); va_end(args); return r; } EXPORT_SYMBOL(_printk); static bool pr_flush(int timeout_ms, bool reset_on_progress); static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); #else /* CONFIG_PRINTK */ #define printk_time false #define prb_read_valid(rb, seq, r) false #define prb_first_valid_seq(rb) 0 #define prb_next_seq(rb) 0 static u64 syslog_seq; static bool pr_flush(int timeout_ms, bool reset_on_progress) { return true; } static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK struct console *early_console; asmlinkage __visible void early_printk(const char *fmt, ...) { va_list ap; char buf[512]; int n; if (!early_console) return; va_start(ap, fmt); n = vscnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); early_console->write(early_console, buf, n); } #endif static void set_user_specified(struct console_cmdline *c, bool user_specified) { if (!user_specified) return; /* * @c console was defined by the user on the command line. * Do not clear when added twice also by SPCR or the device tree. */ c->user_specified = true; /* At least one console defined by the user on the command line. */ console_set_on_cmdline = 1; } static int __add_preferred_console(const char *name, const short idx, const char *devname, char *options, char *brl_options, bool user_specified) { struct console_cmdline *c; int i; if (!name && !devname) return -EINVAL; /* * We use a signed short index for struct console for device drivers to * indicate a not yet assigned index or port. However, a negative index * value is not valid when the console name and index are defined on * the command line. */ if (name && idx < 0) return -EINVAL; /* * See if this tty is not yet registered, and * if we have a slot free. */ for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { if ((name && strcmp(c->name, name) == 0 && c->index == idx) || (devname && strcmp(c->devname, devname) == 0)) { if (!brl_options) preferred_console = i; set_user_specified(c, user_specified); return 0; } } if (i == MAX_CMDLINECONSOLES) return -E2BIG; if (!brl_options) preferred_console = i; if (name) strscpy(c->name, name); if (devname) strscpy(c->devname, devname); c->options = options; set_user_specified(c, user_specified); braille_set_options(c, brl_options); c->index = idx; return 0; } static int __init console_msg_format_setup(char *str) { if (!strcmp(str, "syslog")) console_msg_format = MSG_FORMAT_SYSLOG; if (!strcmp(str, "default")) console_msg_format = MSG_FORMAT_DEFAULT; return 1; } __setup("console_msg_format=", console_msg_format_setup); /* * Set up a console. Called via do_early_param() in init/main.c * for each "console=" parameter in the boot command line. */ static int __init console_setup(char *str) { static_assert(sizeof(console_cmdline[0].devname) >= sizeof(console_cmdline[0].name) + 4); char buf[sizeof(console_cmdline[0].devname)]; char *brl_options = NULL; char *ttyname = NULL; char *devname = NULL; char *options; char *s; int idx; /* * console="" or console=null have been suggested as a way to * disable console output. Use ttynull that has been created * for exactly this purpose. */ if (str[0] == 0 || strcmp(str, "null") == 0) { __add_preferred_console("ttynull", 0, NULL, NULL, NULL, true); return 1; } if (_braille_console_setup(&str, &brl_options)) return 1; /* For a DEVNAME:0.0 style console the character device is unknown early */ if (strchr(str, ':')) devname = buf; else ttyname = buf; /* * Decode str into name, index, options. */ if (ttyname && isdigit(str[0])) scnprintf(buf, sizeof(buf), "ttyS%s", str); else strscpy(buf, str); options = strchr(str, ','); if (options) *(options++) = 0; #ifdef __sparc__ if (!strcmp(str, "ttya")) strscpy(buf, "ttyS0"); if (!strcmp(str, "ttyb")) strscpy(buf, "ttyS1"); #endif for (s = buf; *s; s++) if ((ttyname && isdigit(*s)) || *s == ',') break; /* @idx will get defined when devname matches. */ if (devname) idx = -1; else idx = simple_strtoul(s, NULL, 10); *s = 0; __add_preferred_console(ttyname, idx, devname, options, brl_options, true); return 1; } __setup("console=", console_setup); /** * add_preferred_console - add a device to the list of preferred consoles. * @name: device name * @idx: device index * @options: options for this console * * The last preferred console added will be used for kernel messages * and stdin/out/err for init. Normally this is used by console_setup * above to handle user-supplied console arguments; however it can also * be used by arch-specific code either to override the user or more * commonly to provide a default console (ie from PROM variables) when * the user has not supplied one. */ int add_preferred_console(const char *name, const short idx, char *options) { return __add_preferred_console(name, idx, NULL, options, NULL, false); } /** * match_devname_and_update_preferred_console - Update a preferred console * when matching devname is found. * @devname: DEVNAME:0.0 style device name * @name: Name of the corresponding console driver, e.g. "ttyS" * @idx: Console index, e.g. port number. * * The function checks whether a device with the given @devname is * preferred via the console=DEVNAME:0.0 command line option. * It fills the missing console driver name and console index * so that a later register_console() call could find (match) * and enable this device. * * It might be used when a driver subsystem initializes particular * devices with already known DEVNAME:0.0 style names. And it * could predict which console driver name and index this device * would later get associated with. * * Return: 0 on success, negative error code on failure. */ int match_devname_and_update_preferred_console(const char *devname, const char *name, const short idx) { struct console_cmdline *c = console_cmdline; int i; if (!devname || !strlen(devname) || !name || !strlen(name) || idx < 0) return -EINVAL; for (i = 0; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { if (!strcmp(devname, c->devname)) { pr_info("associate the preferred console \"%s\" with \"%s%d\"\n", devname, name, idx); strscpy(c->name, name); c->index = idx; return 0; } } return -ENOENT; } EXPORT_SYMBOL_GPL(match_devname_and_update_preferred_console); bool console_suspend_enabled = true; EXPORT_SYMBOL(console_suspend_enabled); static int __init console_suspend_disable(char *str) { console_suspend_enabled = false; return 1; } __setup("no_console_suspend", console_suspend_disable); module_param_named(console_suspend, console_suspend_enabled, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(console_suspend, "suspend console during suspend" " and hibernate operations"); static bool printk_console_no_auto_verbose; void console_verbose(void) { if (console_loglevel && !printk_console_no_auto_verbose) console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH; } EXPORT_SYMBOL_GPL(console_verbose); module_param_named(console_no_auto_verbose, printk_console_no_auto_verbose, bool, 0644); MODULE_PARM_DESC(console_no_auto_verbose, "Disable console loglevel raise to highest on oops/panic/etc"); /** * suspend_console - suspend the console subsystem * * This disables printk() while we go into suspend states */ void suspend_console(void) { struct console *con; if (!console_suspend_enabled) return; pr_info("Suspending console(s) (use no_console_suspend to debug)\n"); pr_flush(1000, true); console_list_lock(); for_each_console(con) console_srcu_write_flags(con, con->flags | CON_SUSPENDED); console_list_unlock(); /* * Ensure that all SRCU list walks have completed. All printing * contexts must be able to see that they are suspended so that it * is guaranteed that all printing has stopped when this function * completes. */ synchronize_srcu(&console_srcu); } void resume_console(void) { struct console_flush_type ft; struct console *con; if (!console_suspend_enabled) return; console_list_lock(); for_each_console(con) console_srcu_write_flags(con, con->flags & ~CON_SUSPENDED); console_list_unlock(); /* * Ensure that all SRCU list walks have completed. All printing * contexts must be able to see they are no longer suspended so * that they are guaranteed to wake up and resume printing. */ synchronize_srcu(&console_srcu); printk_get_console_flush_type(&ft); if (ft.nbcon_offload) nbcon_kthreads_wake(); if (ft.legacy_offload) defer_console_output(); pr_flush(1000, true); } /** * console_cpu_notify - print deferred console messages after CPU hotplug * @cpu: unused * * If printk() is called from a CPU that is not online yet, the messages * will be printed on the console only if there are CON_ANYTIME consoles. * This function is called when a new CPU comes online (or fails to come * up) or goes offline. */ static int console_cpu_notify(unsigned int cpu) { struct console_flush_type ft; if (!cpuhp_tasks_frozen) { printk_get_console_flush_type(&ft); if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); if (ft.legacy_direct) { if (console_trylock()) console_unlock(); } } return 0; } /** * console_lock - block the console subsystem from printing * * Acquires a lock which guarantees that no consoles will * be in or enter their write() callback. * * Can sleep, returns nothing. */ void console_lock(void) { might_sleep(); /* On panic, the console_lock must be left to the panic cpu. */ while (other_cpu_in_panic()) msleep(1000); down_console_sem(); console_locked = 1; console_may_schedule = 1; } EXPORT_SYMBOL(console_lock); /** * console_trylock - try to block the console subsystem from printing * * Try to acquire a lock which guarantees that no consoles will * be in or enter their write() callback. * * returns 1 on success, and 0 on failure to acquire the lock. */ int console_trylock(void) { /* On panic, the console_lock must be left to the panic cpu. */ if (other_cpu_in_panic()) return 0; if (down_trylock_console_sem()) return 0; console_locked = 1; console_may_schedule = 0; return 1; } EXPORT_SYMBOL(console_trylock); int is_console_locked(void) { return console_locked; } EXPORT_SYMBOL(is_console_locked); static void __console_unlock(void) { console_locked = 0; up_console_sem(); } #ifdef CONFIG_PRINTK /* * Prepend the message in @pmsg->pbufs->outbuf. This is achieved by shifting * the existing message over and inserting the scratchbuf message. * * @pmsg is the original printk message. * @fmt is the printf format of the message which will prepend the existing one. * * If there is not enough space in @pmsg->pbufs->outbuf, the existing * message text will be sufficiently truncated. * * If @pmsg->pbufs->outbuf is modified, @pmsg->outbuf_len is updated. */ __printf(2, 3) static void console_prepend_message(struct printk_message *pmsg, const char *fmt, ...) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf); char *scratchbuf = &pbufs->scratchbuf[0]; char *outbuf = &pbufs->outbuf[0]; va_list args; size_t len; va_start(args, fmt); len = vscnprintf(scratchbuf, scratchbuf_sz, fmt, args); va_end(args); /* * Make sure outbuf is sufficiently large before prepending. * Keep at least the prefix when the message must be truncated. * It is a rather theoretical problem when someone tries to * use a minimalist buffer. */ if (WARN_ON_ONCE(len + PRINTK_PREFIX_MAX >= outbuf_sz)) return; if (pmsg->outbuf_len + len >= outbuf_sz) { /* Truncate the message, but keep it terminated. */ pmsg->outbuf_len = outbuf_sz - (len + 1); outbuf[pmsg->outbuf_len] = 0; } memmove(outbuf + len, outbuf, pmsg->outbuf_len + 1); memcpy(outbuf, scratchbuf, len); pmsg->outbuf_len += len; } /* * Prepend the message in @pmsg->pbufs->outbuf with a "dropped message". * @pmsg->outbuf_len is updated appropriately. * * @pmsg is the printk message to prepend. * * @dropped is the dropped count to report in the dropped message. */ void console_prepend_dropped(struct printk_message *pmsg, unsigned long dropped) { console_prepend_message(pmsg, "** %lu printk messages dropped **\n", dropped); } /* * Prepend the message in @pmsg->pbufs->outbuf with a "replay message". * @pmsg->outbuf_len is updated appropriately. * * @pmsg is the printk message to prepend. */ void console_prepend_replay(struct printk_message *pmsg) { console_prepend_message(pmsg, "** replaying previous printk message **\n"); } /* * Read and format the specified record (or a later record if the specified * record is not available). * * @pmsg will contain the formatted result. @pmsg->pbufs must point to a * struct printk_buffers. * * @seq is the record to read and format. If it is not available, the next * valid record is read. * * @is_extended specifies if the message should be formatted for extended * console output. * * @may_supress specifies if records may be skipped based on loglevel. * * Returns false if no record is available. Otherwise true and all fields * of @pmsg are valid. (See the documentation of struct printk_message * for information about the @pmsg fields.) */ bool printk_get_next_message(struct printk_message *pmsg, u64 seq, bool is_extended, bool may_suppress) { struct printk_buffers *pbufs = pmsg->pbufs; const size_t scratchbuf_sz = sizeof(pbufs->scratchbuf); const size_t outbuf_sz = sizeof(pbufs->outbuf); char *scratchbuf = &pbufs->scratchbuf[0]; char *outbuf = &pbufs->outbuf[0]; struct printk_info info; struct printk_record r; size_t len = 0; bool force_con; /* * Formatting extended messages requires a separate buffer, so use the * scratch buffer to read in the ringbuffer text. * * Formatting normal messages is done in-place, so read the ringbuffer * text directly into the output buffer. */ if (is_extended) prb_rec_init_rd(&r, &info, scratchbuf, scratchbuf_sz); else prb_rec_init_rd(&r, &info, outbuf, outbuf_sz); if (!prb_read_valid(prb, seq, &r)) return false; pmsg->seq = r.info->seq; pmsg->dropped = r.info->seq - seq; force_con = r.info->flags & LOG_FORCE_CON; /* * Skip records that are not forced to be printed on consoles and that * has level above the console loglevel. */ if (!force_con && may_suppress && suppress_message_printing(r.info->level)) goto out; if (is_extended) { len = info_print_ext_header(outbuf, outbuf_sz, r.info); len += msg_print_ext_body(outbuf + len, outbuf_sz - len, &r.text_buf[0], r.info->text_len, &r.info->dev_info); } else { len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time); } out: pmsg->outbuf_len = len; return true; } /* * Legacy console printing from printk() caller context does not respect * raw_spinlock/spinlock nesting. For !PREEMPT_RT the lockdep warning is a * false positive. For PREEMPT_RT the false positive condition does not * occur. * * This map is used to temporarily establish LD_WAIT_SLEEP context for the * console write() callback when legacy printing to avoid false positive * lockdep complaints, thus allowing lockdep to continue to function for * real issues. */ #ifdef CONFIG_PREEMPT_RT static inline void printk_legacy_allow_spinlock_enter(void) { } static inline void printk_legacy_allow_spinlock_exit(void) { } #else static DEFINE_WAIT_OVERRIDE_MAP(printk_legacy_map, LD_WAIT_SLEEP); static inline void printk_legacy_allow_spinlock_enter(void) { lock_map_acquire_try(&printk_legacy_map); } static inline void printk_legacy_allow_spinlock_exit(void) { lock_map_release(&printk_legacy_map); } #endif /* CONFIG_PREEMPT_RT */ /* * Used as the printk buffers for non-panic, serialized console printing. * This is for legacy (!CON_NBCON) as well as all boot (CON_BOOT) consoles. * Its usage requires the console_lock held. */ struct printk_buffers printk_shared_pbufs; /* * Print one record for the given console. The record printed is whatever * record is the next available record for the given console. * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding both the * console_lock and the SRCU read lock. Otherwise it is set to false. * * @cookie is the cookie from the SRCU read lock. * * Returns false if the given console has no next record to print, otherwise * true. * * Requires the console_lock and the SRCU read lock. */ static bool console_emit_next_record(struct console *con, bool *handover, int cookie) { bool is_extended = console_srcu_read_flags(con) & CON_EXTENDED; char *outbuf = &printk_shared_pbufs.outbuf[0]; struct printk_message pmsg = { .pbufs = &printk_shared_pbufs, }; unsigned long flags; *handover = false; if (!printk_get_next_message(&pmsg, con->seq, is_extended, true)) return false; con->dropped += pmsg.dropped; /* Skip messages of formatted length 0. */ if (pmsg.outbuf_len == 0) { con->seq = pmsg.seq + 1; goto skip; } if (con->dropped && !is_extended) { console_prepend_dropped(&pmsg, con->dropped); con->dropped = 0; } /* Write everything out to the hardware. */ if (force_legacy_kthread() && !panic_in_progress()) { /* * With forced threading this function is in a task context * (either legacy kthread or get_init_console_seq()). There * is no need for concern about printk reentrance, handovers, * or lockdep complaints. */ con->write(con, outbuf, pmsg.outbuf_len); con->seq = pmsg.seq + 1; } else { /* * While actively printing out messages, if another printk() * were to occur on another CPU, it may wait for this one to * finish. This task can not be preempted if there is a * waiter waiting to take over. * * Interrupts are disabled because the hand over to a waiter * must not be interrupted until the hand over is completed * (@console_waiter is cleared). */ printk_safe_enter_irqsave(flags); console_lock_spinning_enable(); /* Do not trace print latency. */ stop_critical_timings(); printk_legacy_allow_spinlock_enter(); con->write(con, outbuf, pmsg.outbuf_len); printk_legacy_allow_spinlock_exit(); start_critical_timings(); con->seq = pmsg.seq + 1; *handover = console_lock_spinning_disable_and_check(cookie); printk_safe_exit_irqrestore(flags); } skip: return true; } #else static bool console_emit_next_record(struct console *con, bool *handover, int cookie) { *handover = false; return false; } static inline void printk_kthreads_check_locked(void) { } #endif /* CONFIG_PRINTK */ /* * Print out all remaining records to all consoles. * * @do_cond_resched is set by the caller. It can be true only in schedulable * context. * * @next_seq is set to the sequence number after the last available record. * The value is valid only when this function returns true. It means that all * usable consoles are completely flushed. * * @handover will be set to true if a printk waiter has taken over the * console_lock, in which case the caller is no longer holding the * console_lock. Otherwise it is set to false. * * Returns true when there was at least one usable console and all messages * were flushed to all usable consoles. A returned false informs the caller * that everything was not flushed (either there were no usable consoles or * another context has taken over printing or it is a panic situation and this * is not the panic CPU). Regardless the reason, the caller should assume it * is not useful to immediately try again. * * Requires the console_lock. */ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handover) { struct console_flush_type ft; bool any_usable = false; struct console *con; bool any_progress; int cookie; *next_seq = 0; *handover = false; do { any_progress = false; printk_get_console_flush_type(&ft); cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); u64 printk_seq; bool progress; /* * console_flush_all() is only responsible for nbcon * consoles when the nbcon consoles cannot print via * their atomic or threaded flushing. */ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) continue; if (!console_is_usable(con, flags, !do_cond_resched)) continue; any_usable = true; if (flags & CON_NBCON) { progress = nbcon_legacy_emit_next_record(con, handover, cookie, !do_cond_resched); printk_seq = nbcon_seq_read(con); } else { progress = console_emit_next_record(con, handover, cookie); printk_seq = con->seq; } /* * If a handover has occurred, the SRCU read lock * is already released. */ if (*handover) return false; /* Track the next of the highest seq flushed. */ if (printk_seq > *next_seq) *next_seq = printk_seq; if (!progress) continue; any_progress = true; /* Allow panic_cpu to take over the consoles safely. */ if (other_cpu_in_panic()) goto abandon; if (do_cond_resched) cond_resched(); } console_srcu_read_unlock(cookie); } while (any_progress); return any_usable; abandon: console_srcu_read_unlock(cookie); return false; } static void __console_flush_and_unlock(void) { bool do_cond_resched; bool handover; bool flushed; u64 next_seq; /* * Console drivers are called with interrupts disabled, so * @console_may_schedule should be cleared before; however, we may * end up dumping a lot of lines, for example, if called from * console registration path, and should invoke cond_resched() * between lines if allowable. Not doing so can cause a very long * scheduling stall on a slow console leading to RCU stall and * softlockup warnings which exacerbate the issue with more * messages practically incapacitating the system. Therefore, create * a local to use for the printing loop. */ do_cond_resched = console_may_schedule; do { console_may_schedule = 0; flushed = console_flush_all(do_cond_resched, &next_seq, &handover); if (!handover) __console_unlock(); /* * Abort if there was a failure to flush all messages to all * usable consoles. Either it is not possible to flush (in * which case it would be an infinite loop of retrying) or * another context has taken over printing. */ if (!flushed) break; /* * Some context may have added new records after * console_flush_all() but before unlocking the console. * Re-check if there is a new record to flush. If the trylock * fails, another context is already handling the printing. */ } while (prb_read_valid(prb, next_seq, NULL) && console_trylock()); } /** * console_unlock - unblock the legacy console subsystem from printing * * Releases the console_lock which the caller holds to block printing of * the legacy console subsystem. * * While the console_lock was held, console output may have been buffered * by printk(). If this is the case, console_unlock() emits the output on * legacy consoles prior to releasing the lock. * * console_unlock(); may be called from any context. */ void console_unlock(void) { struct console_flush_type ft; printk_get_console_flush_type(&ft); if (ft.legacy_direct) __console_flush_and_unlock(); else __console_unlock(); } EXPORT_SYMBOL(console_unlock); /** * console_conditional_schedule - yield the CPU if required * * If the console code is currently allowed to sleep, and * if this CPU should yield the CPU to another task, do * so here. * * Must be called within console_lock();. */ void __sched console_conditional_schedule(void) { if (console_may_schedule) cond_resched(); } EXPORT_SYMBOL(console_conditional_schedule); void console_unblank(void) { bool found_unblank = false; struct console *c; int cookie; /* * First check if there are any consoles implementing the unblank() * callback. If not, there is no reason to continue and take the * console lock, which in particular can be dangerous if * @oops_in_progress is set. */ cookie = console_srcu_read_lock(); for_each_console_srcu(c) { if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) { found_unblank = true; break; } } console_srcu_read_unlock(cookie); if (!found_unblank) return; /* * Stop console printing because the unblank() callback may * assume the console is not within its write() callback. * * If @oops_in_progress is set, this may be an atomic context. * In that case, attempt a trylock as best-effort. */ if (oops_in_progress) { /* Semaphores are not NMI-safe. */ if (in_nmi()) return; /* * Attempting to trylock the console lock can deadlock * if another CPU was stopped while modifying the * semaphore. "Hope and pray" that this is not the * current situation. */ if (down_trylock_console_sem() != 0) return; } else console_lock(); console_locked = 1; console_may_schedule = 0; cookie = console_srcu_read_lock(); for_each_console_srcu(c) { if ((console_srcu_read_flags(c) & CON_ENABLED) && c->unblank) c->unblank(); } console_srcu_read_unlock(cookie); console_unlock(); if (!oops_in_progress) pr_flush(1000, true); } /* * Rewind all consoles to the oldest available record. * * IMPORTANT: The function is safe only when called under * console_lock(). It is not enforced because * it is used as a best effort in panic(). */ static void __console_rewind_all(void) { struct console *c; short flags; int cookie; u64 seq; seq = prb_first_valid_seq(prb); cookie = console_srcu_read_lock(); for_each_console_srcu(c) { flags = console_srcu_read_flags(c); if (flags & CON_NBCON) { nbcon_seq_force(c, seq); } else { /* * This assignment is safe only when called under * console_lock(). On panic, legacy consoles are * only best effort. */ c->seq = seq; } } console_srcu_read_unlock(cookie); } /** * console_flush_on_panic - flush console content on panic * @mode: flush all messages in buffer or just the pending ones * * Immediately output all pending messages no matter what. */ void console_flush_on_panic(enum con_flush_mode mode) { struct console_flush_type ft; bool handover; u64 next_seq; /* * Ignore the console lock and flush out the messages. Attempting a * trylock would not be useful because: * * - if it is contended, it must be ignored anyway * - console_lock() and console_trylock() block and fail * respectively in panic for non-panic CPUs * - semaphores are not NMI-safe */ /* * If another context is holding the console lock, * @console_may_schedule might be set. Clear it so that * this context does not call cond_resched() while flushing. */ console_may_schedule = 0; if (mode == CONSOLE_REPLAY_ALL) __console_rewind_all(); printk_get_console_flush_type(&ft); if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); /* Flush legacy consoles once allowed, even when dangerous. */ if (legacy_allow_panic_sync) console_flush_all(false, &next_seq, &handover); } /* * Return the console tty driver structure and its associated index */ struct tty_driver *console_device(int *index) { struct console *c; struct tty_driver *driver = NULL; int cookie; /* * Take console_lock to serialize device() callback with * other console operations. For example, fg_console is * modified under console_lock when switching vt. */ console_lock(); cookie = console_srcu_read_lock(); for_each_console_srcu(c) { if (!c->device) continue; driver = c->device(c, index); if (driver) break; } console_srcu_read_unlock(cookie); console_unlock(); return driver; } /* * Prevent further output on the passed console device so that (for example) * serial drivers can disable console output before suspending a port, and can * re-enable output afterwards. */ void console_stop(struct console *console) { __pr_flush(console, 1000, true); console_list_lock(); console_srcu_write_flags(console, console->flags & ~CON_ENABLED); console_list_unlock(); /* * Ensure that all SRCU list walks have completed. All contexts must * be able to see that this console is disabled so that (for example) * the caller can suspend the port without risk of another context * using the port. */ synchronize_srcu(&console_srcu); } EXPORT_SYMBOL(console_stop); void console_start(struct console *console) { struct console_flush_type ft; bool is_nbcon; console_list_lock(); console_srcu_write_flags(console, console->flags | CON_ENABLED); is_nbcon = console->flags & CON_NBCON; console_list_unlock(); /* * Ensure that all SRCU list walks have completed. The related * printing context must be able to see it is enabled so that * it is guaranteed to wake up and resume printing. */ synchronize_srcu(&console_srcu); printk_get_console_flush_type(&ft); if (is_nbcon && ft.nbcon_offload) nbcon_kthread_wake(console); else if (ft.legacy_offload) defer_console_output(); __pr_flush(console, 1000, true); } EXPORT_SYMBOL(console_start); #ifdef CONFIG_PRINTK static int unregister_console_locked(struct console *console); /* True when system boot is far enough to create printer threads. */ static bool printk_kthreads_ready __ro_after_init; static struct task_struct *printk_legacy_kthread; static bool legacy_kthread_should_wakeup(void) { struct console_flush_type ft; struct console *con; bool ret = false; int cookie; if (kthread_should_stop()) return true; printk_get_console_flush_type(&ft); cookie = console_srcu_read_lock(); for_each_console_srcu(con) { short flags = console_srcu_read_flags(con); u64 printk_seq; /* * The legacy printer thread is only responsible for nbcon * consoles when the nbcon consoles cannot print via their * atomic or threaded flushing. */ if ((flags & CON_NBCON) && (ft.nbcon_atomic || ft.nbcon_offload)) continue; if (!console_is_usable(con, flags, false)) continue; if (flags & CON_NBCON) { printk_seq = nbcon_seq_read(con); } else { /* * It is safe to read @seq because only this * thread context updates @seq. */ printk_seq = con->seq; } if (prb_read_valid(prb, printk_seq, NULL)) { ret = true; break; } } console_srcu_read_unlock(cookie); return ret; } static int legacy_kthread_func(void *unused) { for (;;) { wait_event_interruptible(legacy_wait, legacy_kthread_should_wakeup()); if (kthread_should_stop()) break; console_lock(); __console_flush_and_unlock(); } return 0; } static bool legacy_kthread_create(void) { struct task_struct *kt; lockdep_assert_console_list_lock_held(); kt = kthread_run(legacy_kthread_func, NULL, "pr/legacy"); if (WARN_ON(IS_ERR(kt))) { pr_err("failed to start legacy printing thread\n"); return false; } printk_legacy_kthread = kt; /* * It is important that console printing threads are scheduled * shortly after a printk call and with generous runtime budgets. */ sched_set_normal(printk_legacy_kthread, -20); return true; } /** * printk_kthreads_shutdown - shutdown all threaded printers * * On system shutdown all threaded printers are stopped. This allows printk * to transition back to atomic printing, thus providing a robust mechanism * for the final shutdown/reboot messages to be output. */ static void printk_kthreads_shutdown(void) { struct console *con; console_list_lock(); if (printk_kthreads_running) { printk_kthreads_running = false; for_each_console(con) { if (con->flags & CON_NBCON) nbcon_kthread_stop(con); } /* * The threads may have been stopped while printing a * backlog. Flush any records left over. */ nbcon_atomic_flush_pending(); } console_list_unlock(); } static struct syscore_ops printk_syscore_ops = { .shutdown = printk_kthreads_shutdown, }; /* * If appropriate, start nbcon kthreads and set @printk_kthreads_running. * If any kthreads fail to start, those consoles are unregistered. * * Must be called under console_list_lock(). */ static void printk_kthreads_check_locked(void) { struct hlist_node *tmp; struct console *con; lockdep_assert_console_list_lock_held(); if (!printk_kthreads_ready) return; if (have_legacy_console || have_boot_console) { if (!printk_legacy_kthread && force_legacy_kthread() && !legacy_kthread_create()) { /* * All legacy consoles must be unregistered. If there * are any nbcon consoles, they will set up their own * kthread. */ hlist_for_each_entry_safe(con, tmp, &console_list, node) { if (con->flags & CON_NBCON) continue; unregister_console_locked(con); } } } else if (printk_legacy_kthread) { kthread_stop(printk_legacy_kthread); printk_legacy_kthread = NULL; } /* * Printer threads cannot be started as long as any boot console is * registered because there is no way to synchronize the hardware * registers between boot console code and regular console code. * It can only be known that there will be no new boot consoles when * an nbcon console is registered. */ if (have_boot_console || !have_nbcon_console) { /* Clear flag in case all nbcon consoles unregistered. */ printk_kthreads_running = false; return; } if (printk_kthreads_running) return; hlist_for_each_entry_safe(con, tmp, &console_list, node) { if (!(con->flags & CON_NBCON)) continue; if (!nbcon_kthread_create(con)) unregister_console_locked(con); } printk_kthreads_running = true; } static int __init printk_set_kthreads_ready(void) { register_syscore_ops(&printk_syscore_ops); console_list_lock(); printk_kthreads_ready = true; printk_kthreads_check_locked(); console_list_unlock(); return 0; } early_initcall(printk_set_kthreads_ready); #endif /* CONFIG_PRINTK */ static int __read_mostly keep_bootcon; static int __init keep_bootcon_setup(char *str) { keep_bootcon = 1; pr_info("debug: skip boot console de-registration.\n"); return 0; } early_param("keep_bootcon", keep_bootcon_setup); static int console_call_setup(struct console *newcon, char *options) { int err; if (!newcon->setup) return 0; /* Synchronize with possible boot console. */ console_lock(); err = newcon->setup(newcon, options); console_unlock(); return err; } /* * This is called by register_console() to try to match * the newly registered console with any of the ones selected * by either the command line or add_preferred_console() and * setup/enable it. * * Care need to be taken with consoles that are statically * enabled such as netconsole */ static int try_enable_preferred_console(struct console *newcon, bool user_specified) { struct console_cmdline *c; int i, err; for (i = 0, c = console_cmdline; i < MAX_CMDLINECONSOLES && (c->name[0] || c->devname[0]); i++, c++) { /* Console not yet initialized? */ if (!c->name[0]) continue; if (c->user_specified != user_specified) continue; if (!newcon->match || newcon->match(newcon, c->name, c->index, c->options) != 0) { /* default matching */ BUILD_BUG_ON(sizeof(c->name) != sizeof(newcon->name)); if (strcmp(c->name, newcon->name) != 0) continue; if (newcon->index >= 0 && newcon->index != c->index) continue; if (newcon->index < 0) newcon->index = c->index; if (_braille_register_console(newcon, c)) return 0; err = console_call_setup(newcon, c->options); if (err) return err; } newcon->flags |= CON_ENABLED; if (i == preferred_console) newcon->flags |= CON_CONSDEV; return 0; } /* * Some consoles, such as pstore and netconsole, can be enabled even * without matching. Accept the pre-enabled consoles only when match() * and setup() had a chance to be called. */ if (newcon->flags & CON_ENABLED && c->user_specified == user_specified) return 0; return -ENOENT; } /* Try to enable the console unconditionally */ static void try_enable_default_console(struct console *newcon) { if (newcon->index < 0) newcon->index = 0; if (console_call_setup(newcon, NULL) != 0) return; newcon->flags |= CON_ENABLED; if (newcon->device) newcon->flags |= CON_CONSDEV; } /* Return the starting sequence number for a newly registered console. */ static u64 get_init_console_seq(struct console *newcon, bool bootcon_registered) { struct console *con; bool handover; u64 init_seq; if (newcon->flags & (CON_PRINTBUFFER | CON_BOOT)) { /* Get a consistent copy of @syslog_seq. */ mutex_lock(&syslog_lock); init_seq = syslog_seq; mutex_unlock(&syslog_lock); } else { /* Begin with next message added to ringbuffer. */ init_seq = prb_next_seq(prb); /* * If any enabled boot consoles are due to be unregistered * shortly, some may not be caught up and may be the same * device as @newcon. Since it is not known which boot console * is the same device, flush all consoles and, if necessary, * start with the message of the enabled boot console that is * the furthest behind. */ if (bootcon_registered && !keep_bootcon) { /* * Hold the console_lock to stop console printing and * guarantee safe access to console->seq. */ console_lock(); /* * Flush all consoles and set the console to start at * the next unprinted sequence number. */ if (!console_flush_all(true, &init_seq, &handover)) { /* * Flushing failed. Just choose the lowest * sequence of the enabled boot consoles. */ /* * If there was a handover, this context no * longer holds the console_lock. */ if (handover) console_lock(); init_seq = prb_next_seq(prb); for_each_console(con) { u64 seq; if (!(con->flags & CON_BOOT) || !(con->flags & CON_ENABLED)) { continue; } if (con->flags & CON_NBCON) seq = nbcon_seq_read(con); else seq = con->seq; if (seq < init_seq) init_seq = seq; } } console_unlock(); } } return init_seq; } #define console_first() \ hlist_entry(console_list.first, struct console, node) static int unregister_console_locked(struct console *console); /* * The console driver calls this routine during kernel initialization * to register the console printing procedure with printk() and to * print any messages that were printed by the kernel before the * console driver was initialized. * * This can happen pretty early during the boot process (because of * early_printk) - sometimes before setup_arch() completes - be careful * of what kernel features are used - they may not be initialised yet. * * There are two types of consoles - bootconsoles (early_printk) and * "real" consoles (everything which is not a bootconsole) which are * handled differently. * - Any number of bootconsoles can be registered at any time. * - As soon as a "real" console is registered, all bootconsoles * will be unregistered automatically. * - Once a "real" console is registered, any attempt to register a * bootconsoles will be rejected */ void register_console(struct console *newcon) { bool use_device_lock = (newcon->flags & CON_NBCON) && newcon->write_atomic; bool bootcon_registered = false; bool realcon_registered = false; struct console *con; unsigned long flags; u64 init_seq; int err; console_list_lock(); for_each_console(con) { if (WARN(con == newcon, "console '%s%d' already registered\n", con->name, con->index)) { goto unlock; } if (con->flags & CON_BOOT) bootcon_registered = true; else realcon_registered = true; } /* Do not register boot consoles when there already is a real one. */ if ((newcon->flags & CON_BOOT) && realcon_registered) { pr_info("Too late to register bootconsole %s%d\n", newcon->name, newcon->index); goto unlock; } if (newcon->flags & CON_NBCON) { /* * Ensure the nbcon console buffers can be allocated * before modifying any global data. */ if (!nbcon_alloc(newcon)) goto unlock; } /* * See if we want to enable this console driver by default. * * Nope when a console is preferred by the command line, device * tree, or SPCR. * * The first real console with tty binding (driver) wins. More * consoles might get enabled before the right one is found. * * Note that a console with tty binding will have CON_CONSDEV * flag set and will be first in the list. */ if (preferred_console < 0) { if (hlist_empty(&console_list) || !console_first()->device || console_first()->flags & CON_BOOT) { try_enable_default_console(newcon); } } /* See if this console matches one we selected on the command line */ err = try_enable_preferred_console(newcon, true); /* If not, try to match against the platform default(s) */ if (err == -ENOENT) err = try_enable_preferred_console(newcon, false); /* printk() messages are not printed to the Braille console. */ if (err || newcon->flags & CON_BRL) { if (newcon->flags & CON_NBCON) nbcon_free(newcon); goto unlock; } /* * If we have a bootconsole, and are switching to a real console, * don't print everything out again, since when the boot console, and * the real console are the same physical device, it's annoying to * see the beginning boot messages twice */ if (bootcon_registered && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV)) { newcon->flags &= ~CON_PRINTBUFFER; } newcon->dropped = 0; init_seq = get_init_console_seq(newcon, bootcon_registered); if (newcon->flags & CON_NBCON) { have_nbcon_console = true; nbcon_seq_force(newcon, init_seq); } else { have_legacy_console = true; newcon->seq = init_seq; } if (newcon->flags & CON_BOOT) have_boot_console = true; /* * If another context is actively using the hardware of this new * console, it will not be aware of the nbcon synchronization. This * is a risk that two contexts could access the hardware * simultaneously if this new console is used for atomic printing * and the other context is still using the hardware. * * Use the driver synchronization to ensure that the hardware is not * in use while this new console transitions to being registered. */ if (use_device_lock) newcon->device_lock(newcon, &flags); /* * Put this console in the list - keep the * preferred driver at the head of the list. */ if (hlist_empty(&console_list)) { /* Ensure CON_CONSDEV is always set for the head. */ newcon->flags |= CON_CONSDEV; hlist_add_head_rcu(&newcon->node, &console_list); } else if (newcon->flags & CON_CONSDEV) { /* Only the new head can have CON_CONSDEV set. */ console_srcu_write_flags(console_first(), console_first()->flags & ~CON_CONSDEV); hlist_add_head_rcu(&newcon->node, &console_list); } else { hlist_add_behind_rcu(&newcon->node, console_list.first); } /* * No need to synchronize SRCU here! The caller does not rely * on all contexts being able to see the new console before * register_console() completes. */ /* This new console is now registered. */ if (use_device_lock) newcon->device_unlock(newcon, flags); console_sysfs_notify(); /* * By unregistering the bootconsoles after we enable the real console * we get the "console xxx enabled" message on all the consoles - * boot consoles, real consoles, etc - this is to ensure that end * users know there might be something in the kernel's log buffer that * went to the bootconsole (that they do not see on the real console) */ con_printk(KERN_INFO, newcon, "enabled\n"); if (bootcon_registered && ((newcon->flags & (CON_CONSDEV | CON_BOOT)) == CON_CONSDEV) && !keep_bootcon) { struct hlist_node *tmp; hlist_for_each_entry_safe(con, tmp, &console_list, node) { if (con->flags & CON_BOOT) unregister_console_locked(con); } } /* Changed console list, may require printer threads to start/stop. */ printk_kthreads_check_locked(); unlock: console_list_unlock(); } EXPORT_SYMBOL(register_console); /* Must be called under console_list_lock(). */ static int unregister_console_locked(struct console *console) { bool use_device_lock = (console->flags & CON_NBCON) && console->write_atomic; bool found_legacy_con = false; bool found_nbcon_con = false; bool found_boot_con = false; unsigned long flags; struct console *c; int res; lockdep_assert_console_list_lock_held(); con_printk(KERN_INFO, console, "disabled\n"); res = _braille_unregister_console(console); if (res < 0) return res; if (res > 0) return 0; if (!console_is_registered_locked(console)) res = -ENODEV; else if (console_is_usable(console, console->flags, true)) __pr_flush(console, 1000, true); /* Disable it unconditionally */ console_srcu_write_flags(console, console->flags & ~CON_ENABLED); if (res < 0) return res; /* * Use the driver synchronization to ensure that the hardware is not * in use while this console transitions to being unregistered. */ if (use_device_lock) console->device_lock(console, &flags); hlist_del_init_rcu(&console->node); if (use_device_lock) console->device_unlock(console, flags); /* * <HISTORICAL> * If this isn't the last console and it has CON_CONSDEV set, we * need to set it on the next preferred console. * </HISTORICAL> * * The above makes no sense as there is no guarantee that the next * console has any device attached. Oh well.... */ if (!hlist_empty(&console_list) && console->flags & CON_CONSDEV) console_srcu_write_flags(console_first(), console_first()->flags | CON_CONSDEV); /* * Ensure that all SRCU list walks have completed. All contexts * must not be able to see this console in the list so that any * exit/cleanup routines can be performed safely. */ synchronize_srcu(&console_srcu); if (console->flags & CON_NBCON) nbcon_free(console); console_sysfs_notify(); if (console->exit) res = console->exit(console); /* * With this console gone, the global flags tracking registered * console types may have changed. Update them. */ for_each_console(c) { if (c->flags & CON_BOOT) found_boot_con = true; if (c->flags & CON_NBCON) found_nbcon_con = true; else found_legacy_con = true; } if (!found_boot_con) have_boot_console = found_boot_con; if (!found_legacy_con) have_legacy_console = found_legacy_con; if (!found_nbcon_con) have_nbcon_console = found_nbcon_con; /* Changed console list, may require printer threads to start/stop. */ printk_kthreads_check_locked(); return res; } int unregister_console(struct console *console) { int res; console_list_lock(); res = unregister_console_locked(console); console_list_unlock(); return res; } EXPORT_SYMBOL(unregister_console); /** * console_force_preferred_locked - force a registered console preferred * @con: The registered console to force preferred. * * Must be called under console_list_lock(). */ void console_force_preferred_locked(struct console *con) { struct console *cur_pref_con; if (!console_is_registered_locked(con)) return; cur_pref_con = console_first(); /* Already preferred? */ if (cur_pref_con == con) return; /* * Delete, but do not re-initialize the entry. This allows the console * to continue to appear registered (via any hlist_unhashed_lockless() * checks), even though it was briefly removed from the console list. */ hlist_del_rcu(&con->node); /* * Ensure that all SRCU list walks have completed so that the console * can be added to the beginning of the console list and its forward * list pointer can be re-initialized. */ synchronize_srcu(&console_srcu); con->flags |= CON_CONSDEV; WARN_ON(!con->device); /* Only the new head can have CON_CONSDEV set. */ console_srcu_write_flags(cur_pref_con, cur_pref_con->flags & ~CON_CONSDEV); hlist_add_head_rcu(&con->node, &console_list); } EXPORT_SYMBOL(console_force_preferred_locked); /* * Initialize the console device. This is called *early*, so * we can't necessarily depend on lots of kernel help here. * Just do some early initializations, and do the complex setup * later. */ void __init console_init(void) { int ret; initcall_t call; initcall_entry_t *ce; /* Setup the default TTY line discipline. */ n_tty_init(); /* * set up the console device so that later boot sequences can * inform about problems etc.. */ ce = __con_initcall_start; trace_initcall_level("console"); while (ce < __con_initcall_end) { call = initcall_from_entry(ce); trace_initcall_start(call); ret = call(); trace_initcall_finish(call, ret); ce++; } } /* * Some boot consoles access data that is in the init section and which will * be discarded after the initcalls have been run. To make sure that no code * will access this data, unregister the boot consoles in a late initcall. * * If for some reason, such as deferred probe or the driver being a loadable * module, the real console hasn't registered yet at this point, there will * be a brief interval in which no messages are logged to the console, which * makes it difficult to diagnose problems that occur during this time. * * To mitigate this problem somewhat, only unregister consoles whose memory * intersects with the init section. Note that all other boot consoles will * get unregistered when the real preferred console is registered. */ static int __init printk_late_init(void) { struct hlist_node *tmp; struct console *con; int ret; console_list_lock(); hlist_for_each_entry_safe(con, tmp, &console_list, node) { if (!(con->flags & CON_BOOT)) continue; /* Check addresses that might be used for enabled consoles. */ if (init_section_intersects(con, sizeof(*con)) || init_section_contains(con->write, 0) || init_section_contains(con->read, 0) || init_section_contains(con->device, 0) || init_section_contains(con->unblank, 0) || init_section_contains(con->data, 0)) { /* * Please, consider moving the reported consoles out * of the init section. */ pr_warn("bootconsole [%s%d] uses init memory and must be disabled even before the real one is ready\n", con->name, con->index); unregister_console_locked(con); } } console_list_unlock(); ret = cpuhp_setup_state_nocalls(CPUHP_PRINTK_DEAD, "printk:dead", NULL, console_cpu_notify); WARN_ON(ret < 0); ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "printk:online", console_cpu_notify, NULL); WARN_ON(ret < 0); printk_sysctl_init(); return 0; } late_initcall(printk_late_init); #if defined CONFIG_PRINTK /* If @con is specified, only wait for that console. Otherwise wait for all. */ static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { unsigned long timeout_jiffies = msecs_to_jiffies(timeout_ms); unsigned long remaining_jiffies = timeout_jiffies; struct console_flush_type ft; struct console *c; u64 last_diff = 0; u64 printk_seq; short flags; int cookie; u64 diff; u64 seq; /* Sorry, pr_flush() will not work this early. */ if (system_state < SYSTEM_SCHEDULING) return false; might_sleep(); seq = prb_next_reserve_seq(prb); /* Flush the consoles so that records up to @seq are printed. */ printk_get_console_flush_type(&ft); if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); if (ft.legacy_direct) { console_lock(); console_unlock(); } for (;;) { unsigned long begin_jiffies; unsigned long slept_jiffies; diff = 0; /* * Hold the console_lock to guarantee safe access to * console->seq. Releasing console_lock flushes more * records in case @seq is still not printed on all * usable consoles. * * Holding the console_lock is not necessary if there * are no legacy or boot consoles. However, such a * console could register at any time. Always hold the * console_lock as a precaution rather than * synchronizing against register_console(). */ console_lock(); cookie = console_srcu_read_lock(); for_each_console_srcu(c) { if (con && con != c) continue; flags = console_srcu_read_flags(c); /* * If consoles are not usable, it cannot be expected * that they make forward progress, so only increment * @diff for usable consoles. */ if (!console_is_usable(c, flags, true) && !console_is_usable(c, flags, false)) { continue; } if (flags & CON_NBCON) { printk_seq = nbcon_seq_read(c); } else { printk_seq = c->seq; } if (printk_seq < seq) diff += seq - printk_seq; } console_srcu_read_unlock(cookie); if (diff != last_diff && reset_on_progress) remaining_jiffies = timeout_jiffies; console_unlock(); /* Note: @diff is 0 if there are no usable consoles. */ if (diff == 0 || remaining_jiffies == 0) break; /* msleep(1) might sleep much longer. Check time by jiffies. */ begin_jiffies = jiffies; msleep(1); slept_jiffies = jiffies - begin_jiffies; remaining_jiffies -= min(slept_jiffies, remaining_jiffies); last_diff = diff; } return (diff == 0); } /** * pr_flush() - Wait for printing threads to catch up. * * @timeout_ms: The maximum time (in ms) to wait. * @reset_on_progress: Reset the timeout if forward progress is seen. * * A value of 0 for @timeout_ms means no waiting will occur. A value of -1 * represents infinite waiting. * * If @reset_on_progress is true, the timeout will be reset whenever any * printer has been seen to make some forward progress. * * Context: Process context. May sleep while acquiring console lock. * Return: true if all usable printers are caught up. */ static bool pr_flush(int timeout_ms, bool reset_on_progress) { return __pr_flush(NULL, timeout_ms, reset_on_progress); } /* * Delayed printk version, for scheduler-internal messages: */ #define PRINTK_PENDING_WAKEUP 0x01 #define PRINTK_PENDING_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = this_cpu_xchg(printk_pending, 0); if (pending & PRINTK_PENDING_OUTPUT) { if (force_legacy_kthread()) { if (printk_legacy_kthread) wake_up_interruptible(&legacy_wait); } else { if (console_trylock()) console_unlock(); } } if (pending & PRINTK_PENDING_WAKEUP) wake_up_interruptible(&log_wait); } static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = IRQ_WORK_INIT_LAZY(wake_up_klogd_work_func); static void __wake_up_klogd(int val) { if (!printk_percpu_data_ready()) return; preempt_disable(); /* * Guarantee any new records can be seen by tasks preparing to wait * before this context checks if the wait queue is empty. * * The full memory barrier within wq_has_sleeper() pairs with the full * memory barrier within set_current_state() of * prepare_to_wait_event(), which is called after ___wait_event() adds * the waiter but before it has checked the wait condition. * * This pairs with devkmsg_read:A and syslog_print:A. */ if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ (val & PRINTK_PENDING_OUTPUT)) { this_cpu_or(printk_pending, val); irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } preempt_enable(); } /** * wake_up_klogd - Wake kernel logging daemon * * Use this function when new records have been added to the ringbuffer * and the console printing of those records has already occurred or is * known to be handled by some other context. This function will only * wake the logging daemon. * * Context: Any context. */ void wake_up_klogd(void) { __wake_up_klogd(PRINTK_PENDING_WAKEUP); } /** * defer_console_output - Wake kernel logging daemon and trigger * console printing in a deferred context * * Use this function when new records have been added to the ringbuffer, * this context is responsible for console printing those records, but * the current context is not allowed to perform the console printing. * Trigger an irq_work context to perform the console printing. This * function also wakes the logging daemon. * * Context: Any context. */ void defer_console_output(void) { /* * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); } void printk_trigger_flush(void) { defer_console_output(); } int vprintk_deferred(const char *fmt, va_list args) { return vprintk_emit(0, LOGLEVEL_SCHED, NULL, fmt, args); } int _printk_deferred(const char *fmt, ...) { va_list args; int r; va_start(args, fmt); r = vprintk_deferred(fmt, args); va_end(args); return r; } /* * printk rate limiting, lifted from the networking subsystem. * * This enforces a rate limit: not more than 10 kernel messages * every 5s to make a denial-of-service attack impossible. */ DEFINE_RATELIMIT_STATE(printk_ratelimit_state, 5 * HZ, 10); int __printk_ratelimit(const char *func) { return ___ratelimit(&printk_ratelimit_state, func); } EXPORT_SYMBOL(__printk_ratelimit); /** * printk_timed_ratelimit - caller-controlled printk ratelimiting * @caller_jiffies: pointer to caller's state * @interval_msecs: minimum interval between prints * * printk_timed_ratelimit() returns true if more than @interval_msecs * milliseconds have elapsed since the last time printk_timed_ratelimit() * returned true. */ bool printk_timed_ratelimit(unsigned long *caller_jiffies, unsigned int interval_msecs) { unsigned long elapsed = jiffies - *caller_jiffies; if (*caller_jiffies && elapsed <= msecs_to_jiffies(interval_msecs)) return false; *caller_jiffies = jiffies; return true; } EXPORT_SYMBOL(printk_timed_ratelimit); static DEFINE_SPINLOCK(dump_list_lock); static LIST_HEAD(dump_list); /** * kmsg_dump_register - register a kernel log dumper. * @dumper: pointer to the kmsg_dumper structure * * Adds a kernel log dumper to the system. The dump callback in the * structure will be called when the kernel oopses or panics and must be * set. Returns zero on success and %-EINVAL or %-EBUSY otherwise. */ int kmsg_dump_register(struct kmsg_dumper *dumper) { unsigned long flags; int err = -EBUSY; /* The dump callback needs to be set */ if (!dumper->dump) return -EINVAL; spin_lock_irqsave(&dump_list_lock, flags); /* Don't allow registering multiple times */ if (!dumper->registered) { dumper->registered = 1; list_add_tail_rcu(&dumper->list, &dump_list); err = 0; } spin_unlock_irqrestore(&dump_list_lock, flags); return err; } EXPORT_SYMBOL_GPL(kmsg_dump_register); /** * kmsg_dump_unregister - unregister a kmsg dumper. * @dumper: pointer to the kmsg_dumper structure * * Removes a dump device from the system. Returns zero on success and * %-EINVAL otherwise. */ int kmsg_dump_unregister(struct kmsg_dumper *dumper) { unsigned long flags; int err = -EINVAL; spin_lock_irqsave(&dump_list_lock, flags); if (dumper->registered) { dumper->registered = 0; list_del_rcu(&dumper->list); err = 0; } spin_unlock_irqrestore(&dump_list_lock, flags); synchronize_rcu(); return err; } EXPORT_SYMBOL_GPL(kmsg_dump_unregister); static bool always_kmsg_dump; module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) { switch (reason) { case KMSG_DUMP_PANIC: return "Panic"; case KMSG_DUMP_OOPS: return "Oops"; case KMSG_DUMP_EMERG: return "Emergency"; case KMSG_DUMP_SHUTDOWN: return "Shutdown"; default: return "Unknown"; } } EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); /** * kmsg_dump_desc - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping * @desc: a short string to describe what caused the panic or oops. Can be NULL * if no additional description is available. * * Call each of the registered dumper's dump() callback, which can * retrieve the kmsg records with kmsg_dump_get_line() or * kmsg_dump_get_buffer(). */ void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { struct kmsg_dumper *dumper; struct kmsg_dump_detail detail = { .reason = reason, .description = desc}; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { enum kmsg_dump_reason max_reason = dumper->max_reason; /* * If client has not provided a specific max_reason, default * to KMSG_DUMP_OOPS, unless always_kmsg_dump was set. */ if (max_reason == KMSG_DUMP_UNDEF) { max_reason = always_kmsg_dump ? KMSG_DUMP_MAX : KMSG_DUMP_OOPS; } if (reason > max_reason) continue; /* invoke dumper which will iterate over records */ dumper->dump(dumper, &detail); } rcu_read_unlock(); } /** * kmsg_dump_get_line - retrieve one kmsg log line * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to * @size: maximum size of the buffer * @len: length of line placed into buffer * * Start at the beginning of the kmsg buffer, with the oldest kmsg * record, and copy one record into the provided buffer. * * Consecutive calls will return the next available record moving * towards the end of the buffer with the youngest messages. * * A return value of FALSE indicates that there are no more records to * read. */ bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len) { u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; unsigned int line_count; struct printk_record r; size_t l = 0; bool ret = false; if (iter->cur_seq < min_seq) iter->cur_seq = min_seq; prb_rec_init_rd(&r, &info, line, size); /* Read text or count text lines? */ if (line) { if (!prb_read_valid(prb, iter->cur_seq, &r)) goto out; l = record_print_text(&r, syslog, printk_time); } else { if (!prb_read_valid_info(prb, iter->cur_seq, &info, &line_count)) { goto out; } l = get_record_print_text_size(&info, line_count, syslog, printk_time); } iter->cur_seq = r.info->seq + 1; ret = true; out: if (len) *len = l; return ret; } EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** * kmsg_dump_get_buffer - copy kmsg log lines * @iter: kmsg dump iterator * @syslog: include the "<4>" prefixes * @buf: buffer to copy the line to * @size: maximum size of the buffer * @len_out: length of line placed into buffer * * Start at the end of the kmsg buffer and fill the provided buffer * with as many of the *youngest* kmsg records that fit into it. * If the buffer is large enough, all available kmsg records will be * copied with a single call. * * Consecutive calls will fill the buffer with the next block of * available older records, not including the earlier retrieved ones. * * A return value of FALSE indicates that there are no more records to * read. */ bool kmsg_dump_get_buffer(struct kmsg_dump_iter *iter, bool syslog, char *buf, size_t size, size_t *len_out) { u64 min_seq = latched_seq_read_nolock(&clear_seq); struct printk_info info; struct printk_record r; u64 seq; u64 next_seq; size_t len = 0; bool ret = false; bool time = printk_time; if (!buf || !size) goto out; if (iter->cur_seq < min_seq) iter->cur_seq = min_seq; if (prb_read_valid_info(prb, iter->cur_seq, &info, NULL)) { if (info.seq != iter->cur_seq) { /* messages are gone, move to first available one */ iter->cur_seq = info.seq; } } /* last entry */ if (iter->cur_seq >= iter->next_seq) goto out; /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. Pass in size-1 * because this function (by way of record_print_text()) will * not write more than size-1 bytes of text into @buf. */ seq = find_first_fitting_seq(iter->cur_seq, iter->next_seq, size - 1, syslog, time); /* * Next kmsg_dump_get_buffer() invocation will dump block of * older records stored right before this one. */ next_seq = seq; prb_rec_init_rd(&r, &info, buf, size); prb_for_each_record(seq, prb, seq, &r) { if (r.info->seq >= iter->next_seq) break; len += record_print_text(&r, syslog, time); /* Adjust record to store to remaining buffer space. */ prb_rec_init_rd(&r, &info, buf + len, size - len); } iter->next_seq = next_seq; ret = true; out: if (len_out) *len_out = len; return ret; } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); /** * kmsg_dump_rewind - reset the iterator * @iter: kmsg dump iterator * * Reset the dumper's iterator so that kmsg_dump_get_line() and * kmsg_dump_get_buffer() can be called again and used multiple * times within the same dumper.dump() callback. */ void kmsg_dump_rewind(struct kmsg_dump_iter *iter) { iter->cur_seq = latched_seq_read_nolock(&clear_seq); iter->next_seq = prb_next_seq(prb); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); /** * console_try_replay_all - try to replay kernel log on consoles * * Try to obtain lock on console subsystem and replay all * available records in printk buffer on the consoles. * Does nothing if lock is not obtained. * * Context: Any, except for NMI. */ void console_try_replay_all(void) { struct console_flush_type ft; printk_get_console_flush_type(&ft); if (console_trylock()) { __console_rewind_all(); if (ft.nbcon_atomic) nbcon_atomic_flush_pending(); if (ft.nbcon_offload) nbcon_kthreads_wake(); if (ft.legacy_offload) defer_console_output(); /* Consoles are flushed as part of console_unlock(). */ console_unlock(); } } #endif #ifdef CONFIG_SMP static atomic_t printk_cpu_sync_owner = ATOMIC_INIT(-1); static atomic_t printk_cpu_sync_nested = ATOMIC_INIT(0); bool is_printk_cpu_sync_owner(void) { return (atomic_read(&printk_cpu_sync_owner) == raw_smp_processor_id()); } /** * __printk_cpu_sync_wait() - Busy wait until the printk cpu-reentrant * spinning lock is not owned by any CPU. * * Context: Any context. */ void __printk_cpu_sync_wait(void) { do { cpu_relax(); } while (atomic_read(&printk_cpu_sync_owner) != -1); } EXPORT_SYMBOL(__printk_cpu_sync_wait); /** * __printk_cpu_sync_try_get() - Try to acquire the printk cpu-reentrant * spinning lock. * * If no processor has the lock, the calling processor takes the lock and * becomes the owner. If the calling processor is already the owner of the * lock, this function succeeds immediately. * * Context: Any context. Expects interrupts to be disabled. * Return: 1 on success, otherwise 0. */ int __printk_cpu_sync_try_get(void) { int cpu; int old; cpu = smp_processor_id(); /* * Guarantee loads and stores from this CPU when it is the lock owner * are _not_ visible to the previous lock owner. This pairs with * __printk_cpu_sync_put:B. * * Memory barrier involvement: * * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, * then __printk_cpu_sync_put:A can never read from * __printk_cpu_sync_try_get:B. * * Relies on: * * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B * of the previous CPU * matching * ACQUIRE from __printk_cpu_sync_try_get:A to * __printk_cpu_sync_try_get:B of this CPU */ old = atomic_cmpxchg_acquire(&printk_cpu_sync_owner, -1, cpu); /* LMM(__printk_cpu_sync_try_get:A) */ if (old == -1) { /* * This CPU is now the owner and begins loading/storing * data: LMM(__printk_cpu_sync_try_get:B) */ return 1; } else if (old == cpu) { /* This CPU is already the owner. */ atomic_inc(&printk_cpu_sync_nested); return 1; } return 0; } EXPORT_SYMBOL(__printk_cpu_sync_try_get); /** * __printk_cpu_sync_put() - Release the printk cpu-reentrant spinning lock. * * The calling processor must be the owner of the lock. * * Context: Any context. Expects interrupts to be disabled. */ void __printk_cpu_sync_put(void) { if (atomic_read(&printk_cpu_sync_nested)) { atomic_dec(&printk_cpu_sync_nested); return; } /* * This CPU is finished loading/storing data: * LMM(__printk_cpu_sync_put:A) */ /* * Guarantee loads and stores from this CPU when it was the * lock owner are visible to the next lock owner. This pairs * with __printk_cpu_sync_try_get:A. * * Memory barrier involvement: * * If __printk_cpu_sync_try_get:A reads from __printk_cpu_sync_put:B, * then __printk_cpu_sync_try_get:B reads from __printk_cpu_sync_put:A. * * Relies on: * * RELEASE from __printk_cpu_sync_put:A to __printk_cpu_sync_put:B * of this CPU * matching * ACQUIRE from __printk_cpu_sync_try_get:A to * __printk_cpu_sync_try_get:B of the next CPU */ atomic_set_release(&printk_cpu_sync_owner, -1); /* LMM(__printk_cpu_sync_put:B) */ } EXPORT_SYMBOL(__printk_cpu_sync_put); #endif /* CONFIG_SMP */
7 7 7 3 3 3 3 2 2 3 3 2 1 4 4 4 4 112 113 112 1 1 1 1 1 1 1 1 1 1 1 1 1 181 183 183 85 1 84 200 201 85 125 201 201 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 // SPDX-License-Identifier: GPL-2.0-or-later /* * Anycast support for IPv6 * Linux INET6 implementation * * Authors: * David L Stevens (dlstevens@us.ibm.com) * * based heavily on net/ipv6/mcast.c */ #include <linux/capability.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/random.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/route.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/if_inet6.h> #include <net/ndisc.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/checksum.h> #define IN6_ADDR_HSIZE_SHIFT 8 #define IN6_ADDR_HSIZE BIT(IN6_ADDR_HSIZE_SHIFT) /* anycast address hash table */ static struct hlist_head inet6_acaddr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(acaddr_hash_lock); static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr); static u32 inet6_acaddr_hash(const struct net *net, const struct in6_addr *addr) { u32 val = __ipv6_addr_jhash(addr, net_hash_mix(net)); return hash_32(val, IN6_ADDR_HSIZE_SHIFT); } /* * socket join an anycast group */ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct inet6_dev *idev; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; ASSERT_RTNL(); if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) return -EINVAL; if (ifindex) dev = __dev_get_by_index(net, ifindex); if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE)) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); if (!pac) return -ENOMEM; pac->acl_next = NULL; pac->acl_addr = *addr; if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, NULL, 0); if (rt) { dev = rt->dst.dev; ip6_rt_put(rt); } else if (ishost) { err = -EADDRNOTAVAIL; goto error; } else { /* router, no matching interface: just pick one */ dev = __dev_get_by_flags(net, IFF_UP, IFF_UP | IFF_LOOPBACK); } } if (!dev) { err = -ENODEV; goto error; } idev = __in6_dev_get(dev); if (!idev) { if (ifindex) err = -ENODEV; else err = -EADDRNOTAVAIL; goto error; } /* reset ishost, now that we have a specific device */ ishost = !idev->cnf.forwarding; pac->acl_ifindex = dev->ifindex; /* XXX * For hosts, allow link-local or matching prefix anycasts. * This obviates the need for propagating anycast routes while * still allowing some non-router anycast participation. */ if (!ipv6_chk_prefix(addr, dev)) { if (ishost) err = -EADDRNOTAVAIL; if (err) goto error; } err = __ipv6_dev_ac_inc(idev, addr); if (!err) { pac->acl_next = np->ipv6_ac_list; np->ipv6_ac_list = pac; pac = NULL; } error: if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; } /* * socket leave an anycast group */ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev; struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); ASSERT_RTNL(); prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && ipv6_addr_equal(&pac->acl_addr, addr)) break; prev_pac = pac; } if (!pac) return -ENOENT; if (prev_pac) prev_pac->acl_next = pac->acl_next; else np->ipv6_ac_list = pac->acl_next; dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); sock_kfree_s(sk, pac, sizeof(*pac)); return 0; } void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; struct ipv6_ac_socklist *pac; struct net *net = sock_net(sk); int prev_index; ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; prev_index = 0; while (pac) { struct ipv6_ac_socklist *next = pac->acl_next; if (pac->acl_ifindex != prev_index) { dev = __dev_get_by_index(net, pac->acl_ifindex); prev_index = pac->acl_ifindex; } if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } } void ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); if (!np->ipv6_ac_list) return; rtnl_lock(); __ipv6_sock_ac_close(sk); rtnl_unlock(); } static void ipv6_add_acaddr_hash(struct net *net, struct ifacaddr6 *aca) { unsigned int hash = inet6_acaddr_hash(net, &aca->aca_addr); spin_lock(&acaddr_hash_lock); hlist_add_head_rcu(&aca->aca_addr_lst, &inet6_acaddr_lst[hash]); spin_unlock(&acaddr_hash_lock); } static void ipv6_del_acaddr_hash(struct ifacaddr6 *aca) { spin_lock(&acaddr_hash_lock); hlist_del_init_rcu(&aca->aca_addr_lst); spin_unlock(&acaddr_hash_lock); } static void aca_get(struct ifacaddr6 *aca) { refcount_inc(&aca->aca_refcnt); } static void aca_free_rcu(struct rcu_head *h) { struct ifacaddr6 *aca = container_of(h, struct ifacaddr6, rcu); fib6_info_release(aca->aca_rt); kfree(aca); } static void aca_put(struct ifacaddr6 *ac) { if (refcount_dec_and_test(&ac->aca_refcnt)) call_rcu_hurry(&ac->rcu, aca_free_rcu); } static struct ifacaddr6 *aca_alloc(struct fib6_info *f6i, const struct in6_addr *addr) { struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); if (!aca) return NULL; aca->aca_addr = *addr; fib6_info_hold(f6i); aca->aca_rt = f6i; INIT_HLIST_NODE(&aca->aca_addr_lst); aca->aca_users = 1; /* aca_tstamp should be updated upon changes */ aca->aca_cstamp = aca->aca_tstamp = jiffies; refcount_set(&aca->aca_refcnt, 1); return aca; } static void inet6_ifacaddr_notify(struct net_device *dev, const struct ifacaddr6 *ifaca, int event) { struct inet6_fill_args fillargs = { .event = event, .netnsid = -1, }; struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOMEM; skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + nla_total_size(sizeof(struct in6_addr)) + nla_total_size(sizeof(struct ifa_cacheinfo)), GFP_KERNEL); if (!skb) goto error; err = inet6_fill_ifacaddr(skb, ifaca, &fillargs); if (err < 0) { pr_err("Failed to fill in anycast addresses (err %d)\n", err); nlmsg_free(skb); goto error; } rtnl_notify(skb, net, 0, RTNLGRP_IPV6_ACADDR, NULL, GFP_KERNEL); return; error: rtnl_set_sk_err(net, RTNLGRP_IPV6_ACADDR, err); } /* * device anycast group inc (add if not found) */ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca; struct fib6_info *f6i; struct net *net; int err; ASSERT_RTNL(); write_lock_bh(&idev->lock); if (idev->dead) { err = -ENODEV; goto out; } for (aca = rtnl_dereference(idev->ac_list); aca; aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) { aca->aca_users++; err = 0; goto out; } } net = dev_net(idev->dev); f6i = addrconf_f6i_alloc(net, idev, addr, true, GFP_ATOMIC, NULL); if (IS_ERR(f6i)) { err = PTR_ERR(f6i); goto out; } aca = aca_alloc(f6i, addr); if (!aca) { fib6_info_release(f6i); err = -ENOMEM; goto out; } /* Hold this for addrconf_join_solict() below before we unlock, * it is already exposed via idev->ac_list. */ aca_get(aca); aca->aca_next = idev->ac_list; rcu_assign_pointer(idev->ac_list, aca); write_unlock_bh(&idev->lock); ipv6_add_acaddr_hash(net, aca); ip6_ins_rt(net, f6i); addrconf_join_solict(idev->dev, &aca->aca_addr); inet6_ifacaddr_notify(idev->dev, aca, RTM_NEWANYCAST); aca_put(aca); return 0; out: write_unlock_bh(&idev->lock); return err; } /* * device anycast group decrement */ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr) { struct ifacaddr6 *aca, *prev_aca; ASSERT_RTNL(); write_lock_bh(&idev->lock); prev_aca = NULL; for (aca = rtnl_dereference(idev->ac_list); aca; aca = rtnl_dereference(aca->aca_next)) { if (ipv6_addr_equal(&aca->aca_addr, addr)) break; prev_aca = aca; } if (!aca) { write_unlock_bh(&idev->lock); return -ENOENT; } if (--aca->aca_users > 0) { write_unlock_bh(&idev->lock); return 0; } if (prev_aca) rcu_assign_pointer(prev_aca->aca_next, aca->aca_next); else rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); inet6_ifacaddr_notify(idev->dev, aca, RTM_DELANYCAST); aca_put(aca); return 0; } /* called with rtnl_lock() */ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); if (!idev) return -ENODEV; return __ipv6_dev_ac_dec(idev, addr); } void ipv6_ac_destroy_dev(struct inet6_dev *idev) { struct ifacaddr6 *aca; write_lock_bh(&idev->lock); while ((aca = rtnl_dereference(idev->ac_list)) != NULL) { rcu_assign_pointer(idev->ac_list, aca->aca_next); write_unlock_bh(&idev->lock); ipv6_del_acaddr_hash(aca); addrconf_leave_solict(idev, &aca->aca_addr); ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false); aca_put(aca); write_lock_bh(&idev->lock); } write_unlock_bh(&idev->lock); } /* * check if the interface has this anycast address * called with rcu_read_lock() */ static bool ipv6_chk_acast_dev(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev; struct ifacaddr6 *aca; idev = __in6_dev_get(dev); if (idev) { for (aca = rcu_dereference(idev->ac_list); aca; aca = rcu_dereference(aca->aca_next)) if (ipv6_addr_equal(&aca->aca_addr, addr)) break; return aca != NULL; } return false; } /* * check if given interface (or any, if dev==0) has this anycast address */ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, const struct in6_addr *addr) { struct net_device *nh_dev; struct ifacaddr6 *aca; bool found = false; rcu_read_lock(); if (dev) found = ipv6_chk_acast_dev(dev, addr); else { unsigned int hash = inet6_acaddr_hash(net, addr); hlist_for_each_entry_rcu(aca, &inet6_acaddr_lst[hash], aca_addr_lst) { nh_dev = fib6_info_nh_dev(aca->aca_rt); if (!nh_dev || !net_eq(dev_net(nh_dev), net)) continue; if (ipv6_addr_equal(&aca->aca_addr, addr)) { found = true; break; } } } rcu_read_unlock(); return found; } /* check if this anycast address is link-local on given interface or * is global */ bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, const struct in6_addr *addr) { return ipv6_chk_acast_addr(net, (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? dev : NULL), addr); } #ifdef CONFIG_PROC_FS struct ac6_iter_state { struct seq_net_private p; struct net_device *dev; }; #define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) { struct ac6_iter_state *state = ac6_seq_private(seq); struct net *net = seq_file_net(seq); struct ifacaddr6 *im = NULL; for_each_netdev_rcu(net, state->dev) { struct inet6_dev *idev; idev = __in6_dev_get(state->dev); if (!idev) continue; im = rcu_dereference(idev->ac_list); if (im) break; } return im; } static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) { struct ac6_iter_state *state = ac6_seq_private(seq); struct inet6_dev *idev; im = rcu_dereference(im->aca_next); while (!im) { state->dev = next_net_device_rcu(state->dev); if (!state->dev) break; idev = __in6_dev_get(state->dev); if (!idev) continue; im = rcu_dereference(idev->ac_list); } return im; } static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) { struct ifacaddr6 *im = ac6_get_first(seq); if (im) while (pos && (im = ac6_get_next(seq, im)) != NULL) --pos; return pos ? NULL : im; } static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); return ac6_get_idx(seq, *pos); } static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ifacaddr6 *im = ac6_get_next(seq, v); ++*pos; return im; } static void ac6_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int ac6_seq_show(struct seq_file *seq, void *v) { struct ifacaddr6 *im = (struct ifacaddr6 *)v; struct ac6_iter_state *state = ac6_seq_private(seq); seq_printf(seq, "%-4d %-15s %pi6 %5d\n", state->dev->ifindex, state->dev->name, &im->aca_addr, im->aca_users); return 0; } static const struct seq_operations ac6_seq_ops = { .start = ac6_seq_start, .next = ac6_seq_next, .stop = ac6_seq_stop, .show = ac6_seq_show, }; int __net_init ac6_proc_init(struct net *net) { if (!proc_create_net("anycast6", 0444, net->proc_net, &ac6_seq_ops, sizeof(struct ac6_iter_state))) return -ENOMEM; return 0; } void ac6_proc_exit(struct net *net) { remove_proc_entry("anycast6", net->proc_net); } #endif /* Init / cleanup code */ int __init ipv6_anycast_init(void) { int i; for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_acaddr_lst[i]); return 0; } void ipv6_anycast_cleanup(void) { int i; spin_lock(&acaddr_hash_lock); for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_acaddr_lst[i])); spin_unlock(&acaddr_hash_lock); }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0 /* * consolidates trace point definitions * * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> */ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> #include <linux/inet.h> #include <linux/interrupt.h> #include <linux/export.h> #include <linux/netpoll.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/rcupdate.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/netlink.h> #include <linux/net_dropmon.h> #include <linux/slab.h> #include <linux/unaligned.h> #include <asm/bitops.h> #define CREATE_TRACE_POINTS #include <trace/events/skb.h> #include <trace/events/net.h> #include <trace/events/napi.h> #include <trace/events/sock.h> #include <trace/events/udp.h> #include <trace/events/tcp.h> #include <trace/events/fib.h> #include <trace/events/qdisc.h> #if IS_ENABLED(CONFIG_BRIDGE) #include <trace/events/bridge.h> EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full); #endif #if IS_ENABLED(CONFIG_PAGE_POOL) #include <trace/events/page_pool.h> #endif #include <trace/events/neigh.h> EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum); EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
10 2 2 2 2 3 2 2 1 1 5 4 1 4 4 2 1 1 8 7 1 2 4 4 2 2 4 2 2 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 5 5 2 4 4 2 2 6 1 3 2 1 3 3 3 10 10 10 2 10 2 8 4 5 4 5 5 7 4 2 2 2 2 2 1 1 18 18 22 6 6 6 6 6 6 6 6 6 6 6 4 4 4 18 23 18 18 1094 1090 10 10 5 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/macsec.c - MACsec device * * Copyright (c) 2015 Sabrina Dubroca <sd@queasysnail.net> */ #include <linux/types.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/module.h> #include <crypto/aead.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/refcount.h> #include <net/genetlink.h> #include <net/sock.h> #include <net/gro_cells.h> #include <net/macsec.h> #include <net/dst_metadata.h> #include <linux/phy.h> #include <linux/byteorder/generic.h> #include <linux/if_arp.h> #include <uapi/linux/if_macsec.h> /* SecTAG length = macsec_eth_header without the optional SCI */ #define MACSEC_TAG_LEN 6 struct macsec_eth_header { struct ethhdr eth; /* SecTAG */ u8 tci_an; #if defined(__LITTLE_ENDIAN_BITFIELD) u8 short_length:6, unused:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 unused:2, short_length:6; #else #error "Please fix <asm/byteorder.h>" #endif __be32 packet_number; u8 secure_channel_id[8]; /* optional */ } __packed; /* minimum secure data length deemed "not short", see IEEE 802.1AE-2006 9.7 */ #define MIN_NON_SHORT_LEN 48 #define GCM_AES_IV_LEN 12 #define for_each_rxsc(secy, sc) \ for (sc = rcu_dereference_bh(secy->rx_sc); \ sc; \ sc = rcu_dereference_bh(sc->next)) #define for_each_rxsc_rtnl(secy, sc) \ for (sc = rtnl_dereference(secy->rx_sc); \ sc; \ sc = rtnl_dereference(sc->next)) #define pn_same_half(pn1, pn2) (!(((pn1) >> 31) ^ ((pn2) >> 31))) struct gcm_iv_xpn { union { u8 short_secure_channel_id[4]; ssci_t ssci; }; __be64 pn; } __packed; struct gcm_iv { union { u8 secure_channel_id[8]; sci_t sci; }; __be32 pn; }; #define MACSEC_VALIDATE_DEFAULT MACSEC_VALIDATE_STRICT struct pcpu_secy_stats { struct macsec_dev_stats stats; struct u64_stats_sync syncp; }; /** * struct macsec_dev - private data * @secy: SecY config * @real_dev: pointer to underlying netdevice * @dev_tracker: refcount tracker for @real_dev reference * @stats: MACsec device stats * @secys: linked list of SecY's on the underlying device * @gro_cells: pointer to the Generic Receive Offload cell * @offload: status of offloading on the MACsec device * @insert_tx_tag: when offloading, device requires to insert an * additional tag */ struct macsec_dev { struct macsec_secy secy; struct net_device *real_dev; netdevice_tracker dev_tracker; struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; enum macsec_offload offload; bool insert_tx_tag; }; /** * struct macsec_rxh_data - rx_handler private argument * @secys: linked list of SecY's on this underlying device */ struct macsec_rxh_data { struct list_head secys; }; static struct macsec_dev *macsec_priv(const struct net_device *dev) { return (struct macsec_dev *)netdev_priv(dev); } static struct macsec_rxh_data *macsec_data_rcu(const struct net_device *dev) { return rcu_dereference_bh(dev->rx_handler_data); } static struct macsec_rxh_data *macsec_data_rtnl(const struct net_device *dev) { return rtnl_dereference(dev->rx_handler_data); } struct macsec_cb { struct aead_request *req; union { struct macsec_tx_sa *tx_sa; struct macsec_rx_sa *rx_sa; }; u8 assoc_num; bool valid; bool has_sci; }; static struct macsec_rx_sa *macsec_rxsa_get(struct macsec_rx_sa __rcu *ptr) { struct macsec_rx_sa *sa = rcu_dereference_bh(ptr); if (!sa || !sa->active) return NULL; if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; } static void free_rx_sc_rcu(struct rcu_head *head) { struct macsec_rx_sc *rx_sc = container_of(head, struct macsec_rx_sc, rcu_head); free_percpu(rx_sc->stats); kfree(rx_sc); } static struct macsec_rx_sc *macsec_rxsc_get(struct macsec_rx_sc *sc) { return refcount_inc_not_zero(&sc->refcnt) ? sc : NULL; } static void macsec_rxsc_put(struct macsec_rx_sc *sc) { if (refcount_dec_and_test(&sc->refcnt)) call_rcu(&sc->rcu_head, free_rx_sc_rcu); } static void free_rxsa(struct rcu_head *head) { struct macsec_rx_sa *sa = container_of(head, struct macsec_rx_sa, rcu); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); kfree(sa); } static void macsec_rxsa_put(struct macsec_rx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_rxsa); } static struct macsec_tx_sa *macsec_txsa_get(struct macsec_tx_sa __rcu *ptr) { struct macsec_tx_sa *sa = rcu_dereference_bh(ptr); if (!sa || !sa->active) return NULL; if (!refcount_inc_not_zero(&sa->refcnt)) return NULL; return sa; } static void free_txsa(struct rcu_head *head) { struct macsec_tx_sa *sa = container_of(head, struct macsec_tx_sa, rcu); crypto_free_aead(sa->key.tfm); free_percpu(sa->stats); kfree(sa); } static void macsec_txsa_put(struct macsec_tx_sa *sa) { if (refcount_dec_and_test(&sa->refcnt)) call_rcu(&sa->rcu, free_txsa); } static struct macsec_cb *macsec_skb_cb(struct sk_buff *skb) { BUILD_BUG_ON(sizeof(struct macsec_cb) > sizeof(skb->cb)); return (struct macsec_cb *)skb->cb; } #define MACSEC_PORT_SCB (0x0000) #define MACSEC_UNDEF_SCI ((__force sci_t)0xffffffffffffffffULL) #define MACSEC_UNDEF_SSCI ((__force ssci_t)0xffffffff) #define MACSEC_GCM_AES_128_SAK_LEN 16 #define MACSEC_GCM_AES_256_SAK_LEN 32 #define DEFAULT_SAK_LEN MACSEC_GCM_AES_128_SAK_LEN #define DEFAULT_XPN false #define DEFAULT_SEND_SCI true #define DEFAULT_ENCRYPT false #define DEFAULT_ENCODING_SA 0 #define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1)) static sci_t make_sci(const u8 *addr, __be16 port) { sci_t sci; memcpy(&sci, addr, ETH_ALEN); memcpy(((char *)&sci) + ETH_ALEN, &port, sizeof(port)); return sci; } static sci_t macsec_frame_sci(struct macsec_eth_header *hdr, bool sci_present) { sci_t sci; if (sci_present) memcpy(&sci, hdr->secure_channel_id, sizeof(hdr->secure_channel_id)); else sci = make_sci(hdr->eth.h_source, MACSEC_PORT_ES); return sci; } static unsigned int macsec_sectag_len(bool sci_present) { return MACSEC_TAG_LEN + (sci_present ? MACSEC_SCI_LEN : 0); } static unsigned int macsec_hdr_len(bool sci_present) { return macsec_sectag_len(sci_present) + ETH_HLEN; } static unsigned int macsec_extra_len(bool sci_present) { return macsec_sectag_len(sci_present) + sizeof(__be16); } /* Fill SecTAG according to IEEE 802.1AE-2006 10.5.3 */ static void macsec_fill_sectag(struct macsec_eth_header *h, const struct macsec_secy *secy, u32 pn, bool sci_present) { const struct macsec_tx_sc *tx_sc = &secy->tx_sc; memset(&h->tci_an, 0, macsec_sectag_len(sci_present)); h->eth.h_proto = htons(ETH_P_MACSEC); if (sci_present) { h->tci_an |= MACSEC_TCI_SC; memcpy(&h->secure_channel_id, &secy->sci, sizeof(h->secure_channel_id)); } else { if (tx_sc->end_station) h->tci_an |= MACSEC_TCI_ES; if (tx_sc->scb) h->tci_an |= MACSEC_TCI_SCB; } h->packet_number = htonl(pn); /* with GCM, C/E clear for !encrypt, both set for encrypt */ if (tx_sc->encrypt) h->tci_an |= MACSEC_TCI_CONFID; else if (secy->icv_len != MACSEC_DEFAULT_ICV_LEN) h->tci_an |= MACSEC_TCI_C; h->tci_an |= tx_sc->encoding_sa; } static void macsec_set_shortlen(struct macsec_eth_header *h, size_t data_len) { if (data_len < MIN_NON_SHORT_LEN) h->short_length = data_len; } /* Checks if a MACsec interface is being offloaded to an hardware engine */ static bool macsec_is_offloaded(struct macsec_dev *macsec) { if (macsec->offload == MACSEC_OFFLOAD_MAC || macsec->offload == MACSEC_OFFLOAD_PHY) return true; return false; } /* Checks if underlying layers implement MACsec offloading functions. */ static bool macsec_check_offload(enum macsec_offload offload, struct macsec_dev *macsec) { if (!macsec || !macsec->real_dev) return false; if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev && macsec->real_dev->phydev->macsec_ops; else if (offload == MACSEC_OFFLOAD_MAC) return macsec->real_dev->features & NETIF_F_HW_MACSEC && macsec->real_dev->macsec_ops; return false; } static const struct macsec_ops *__macsec_get_ops(enum macsec_offload offload, struct macsec_dev *macsec, struct macsec_context *ctx) { if (ctx) { memset(ctx, 0, sizeof(*ctx)); ctx->offload = offload; if (offload == MACSEC_OFFLOAD_PHY) ctx->phydev = macsec->real_dev->phydev; else if (offload == MACSEC_OFFLOAD_MAC) ctx->netdev = macsec->real_dev; } if (offload == MACSEC_OFFLOAD_PHY) return macsec->real_dev->phydev->macsec_ops; else return macsec->real_dev->macsec_ops; } /* Returns a pointer to the MACsec ops struct if any and updates the MACsec * context device reference if provided. */ static const struct macsec_ops *macsec_get_ops(struct macsec_dev *macsec, struct macsec_context *ctx) { if (!macsec_check_offload(macsec->offload, macsec)) return NULL; return __macsec_get_ops(macsec->offload, macsec, ctx); } /* validate MACsec packet according to IEEE 802.1AE-2018 9.12 */ static bool macsec_validate_skb(struct sk_buff *skb, u16 icv_len, bool xpn) { struct macsec_eth_header *h = (struct macsec_eth_header *)skb->data; int len = skb->len - 2 * ETH_ALEN; int extra_len = macsec_extra_len(!!(h->tci_an & MACSEC_TCI_SC)) + icv_len; /* a) It comprises at least 17 octets */ if (skb->len <= 16) return false; /* b) MACsec EtherType: already checked */ /* c) V bit is clear */ if (h->tci_an & MACSEC_TCI_VERSION) return false; /* d) ES or SCB => !SC */ if ((h->tci_an & MACSEC_TCI_ES || h->tci_an & MACSEC_TCI_SCB) && (h->tci_an & MACSEC_TCI_SC)) return false; /* e) Bits 7 and 8 of octet 4 of the SecTAG are clear */ if (h->unused) return false; /* rx.pn != 0 if not XPN (figure 10-5 with 802.11AEbw-2013 amendment) */ if (!h->packet_number && !xpn) return false; /* length check, f) g) h) i) */ if (h->short_length) return len == extra_len + h->short_length; return len >= extra_len + MIN_NON_SHORT_LEN; } #define MACSEC_NEEDED_HEADROOM (macsec_extra_len(true)) #define MACSEC_NEEDED_TAILROOM MACSEC_STD_ICV_LEN static void macsec_fill_iv_xpn(unsigned char *iv, ssci_t ssci, u64 pn, salt_t salt) { struct gcm_iv_xpn *gcm_iv = (struct gcm_iv_xpn *)iv; gcm_iv->ssci = ssci ^ salt.ssci; gcm_iv->pn = cpu_to_be64(pn) ^ salt.pn; } static void macsec_fill_iv(unsigned char *iv, sci_t sci, u32 pn) { struct gcm_iv *gcm_iv = (struct gcm_iv *)iv; gcm_iv->sci = sci; gcm_iv->pn = htonl(pn); } static struct macsec_eth_header *macsec_ethhdr(struct sk_buff *skb) { return (struct macsec_eth_header *)skb_mac_header(skb); } static void __macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { pr_debug("PN wrapped, transitioning to !oper\n"); tx_sa->active = false; if (secy->protect_frames) secy->operational = false; } void macsec_pn_wrapped(struct macsec_secy *secy, struct macsec_tx_sa *tx_sa) { spin_lock_bh(&tx_sa->lock); __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); } EXPORT_SYMBOL_GPL(macsec_pn_wrapped); static pn_t tx_sa_update_pn(struct macsec_tx_sa *tx_sa, struct macsec_secy *secy) { pn_t pn; spin_lock_bh(&tx_sa->lock); pn = tx_sa->next_pn_halves; if (secy->xpn) tx_sa->next_pn++; else tx_sa->next_pn_halves.lower++; if (tx_sa->next_pn == 0) __macsec_pn_wrapped(secy, tx_sa); spin_unlock_bh(&tx_sa->lock); return pn; } static void macsec_encrypt_finish(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = netdev_priv(dev); skb->dev = macsec->real_dev; skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; } static unsigned int macsec_msdu_len(struct sk_buff *skb) { struct macsec_dev *macsec = macsec_priv(skb->dev); struct macsec_secy *secy = &macsec->secy; bool sci_present = macsec_skb_cb(skb)->has_sci; return skb->len - macsec_hdr_len(sci_present) - secy->icv_len; } static void macsec_count_tx(struct sk_buff *skb, struct macsec_tx_sc *tx_sc, struct macsec_tx_sa *tx_sa) { unsigned int msdu_len = macsec_msdu_len(skb); struct pcpu_tx_sc_stats *txsc_stats = this_cpu_ptr(tx_sc->stats); u64_stats_update_begin(&txsc_stats->syncp); if (tx_sc->encrypt) { txsc_stats->stats.OutOctetsEncrypted += msdu_len; txsc_stats->stats.OutPktsEncrypted++; this_cpu_inc(tx_sa->stats->OutPktsEncrypted); } else { txsc_stats->stats.OutOctetsProtected += msdu_len; txsc_stats->stats.OutPktsProtected++; this_cpu_inc(tx_sa->stats->OutPktsProtected); } u64_stats_update_end(&txsc_stats->syncp); } static void count_tx(struct net_device *dev, int ret, int len) { if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) dev_sw_netstats_tx_add(dev, 1, len); } static void macsec_encrypt_done(void *data, int err) { struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_tx_sa *sa = macsec_skb_cb(skb)->tx_sa; int len, ret; aead_request_free(macsec_skb_cb(skb)->req); rcu_read_lock_bh(); macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa); /* packet is encrypted/protected so tx_bytes must be calculated */ len = macsec_msdu_len(skb) + 2 * ETH_ALEN; macsec_encrypt_finish(skb, dev); ret = dev_queue_xmit(skb); count_tx(dev, ret, len); rcu_read_unlock_bh(); macsec_txsa_put(sa); dev_put(dev); } static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, unsigned char **iv, struct scatterlist **sg, int num_frags) { size_t size, iv_offset, sg_offset; struct aead_request *req; void *tmp; size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm); iv_offset = size; size += GCM_AES_IV_LEN; size = ALIGN(size, __alignof__(struct scatterlist)); sg_offset = size; size += sizeof(struct scatterlist) * num_frags; tmp = kmalloc(size, GFP_ATOMIC); if (!tmp) return NULL; *iv = (unsigned char *)(tmp + iv_offset); *sg = (struct scatterlist *)(tmp + sg_offset); req = tmp; aead_request_set_tfm(req, tfm); return req; } static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct net_device *dev) { int ret; struct scatterlist *sg; struct sk_buff *trailer; unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; size_t unprotected_len; struct aead_request *req; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; struct macsec_dev *macsec = macsec_priv(dev); bool sci_present; pn_t pn; secy = &macsec->secy; tx_sc = &secy->tx_sc; /* 10.5.1 TX SA assignment */ tx_sa = macsec_txsa_get(tx_sc->sa[tx_sc->encoding_sa]); if (!tx_sa) { secy->operational = false; kfree_skb(skb); return ERR_PTR(-EINVAL); } if (unlikely(skb_headroom(skb) < MACSEC_NEEDED_HEADROOM || skb_tailroom(skb) < MACSEC_NEEDED_TAILROOM)) { struct sk_buff *nskb = skb_copy_expand(skb, MACSEC_NEEDED_HEADROOM, MACSEC_NEEDED_TAILROOM, GFP_ATOMIC); if (likely(nskb)) { consume_skb(skb); skb = nskb; } else { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } } else { skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { macsec_txsa_put(tx_sa); return ERR_PTR(-ENOMEM); } } unprotected_len = skb->len; eth = eth_hdr(skb); sci_present = macsec_send_sci(secy); hh = skb_push(skb, macsec_extra_len(sci_present)); memmove(hh, eth, 2 * ETH_ALEN); pn = tx_sa_update_pn(tx_sa, secy); if (pn.full64 == 0) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOLINK); } macsec_fill_sectag(hh, secy, pn.lower, sci_present); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); skb_put(skb, secy->icv_len); if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) { struct pcpu_secy_stats *secy_stats = this_cpu_ptr(macsec->stats); u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsTooLong++; u64_stats_update_end(&secy_stats->syncp); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-EINVAL); } ret = skb_cow_data(skb, 0, &trailer); if (unlikely(ret < 0)) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); } req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg, ret); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } if (secy->xpn) macsec_fill_iv_xpn(iv, tx_sa->ssci, pn.full64, tx_sa->key.salt); else macsec_fill_iv(iv, secy->sci, pn.lower); sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { aead_request_free(req); macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(ret); } if (tx_sc->encrypt) { int len = skb->len - macsec_hdr_len(sci_present) - secy->icv_len; aead_request_set_crypt(req, sg, sg, len, iv); aead_request_set_ad(req, macsec_hdr_len(sci_present)); } else { aead_request_set_crypt(req, sg, sg, 0, iv); aead_request_set_ad(req, skb->len - secy->icv_len); } macsec_skb_cb(skb)->req = req; macsec_skb_cb(skb)->tx_sa = tx_sa; macsec_skb_cb(skb)->has_sci = sci_present; aead_request_set_callback(req, 0, macsec_encrypt_done, skb); dev_hold(skb->dev); ret = crypto_aead_encrypt(req); if (ret == -EINPROGRESS) { return ERR_PTR(ret); } else if (ret != 0) { dev_put(skb->dev); kfree_skb(skb); aead_request_free(req); macsec_txsa_put(tx_sa); return ERR_PTR(-EINVAL); } dev_put(skb->dev); aead_request_free(req); macsec_txsa_put(tx_sa); return skb; } static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u32 pn) { struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; struct pcpu_rx_sc_stats *rxsc_stats = this_cpu_ptr(rx_sa->sc->stats); struct macsec_eth_header *hdr = macsec_ethhdr(skb); u32 lowest_pn = 0; spin_lock(&rx_sa->lock); if (rx_sa->next_pn_halves.lower >= secy->replay_window) lowest_pn = rx_sa->next_pn_halves.lower - secy->replay_window; /* Now perform replay protection check again * (see IEEE 802.1AE-2006 figure 10-5) */ if (secy->replay_protect && pn < lowest_pn && (!secy->xpn || pn_same_half(pn, lowest_pn))) { spin_unlock(&rx_sa->lock); u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_dropped); return false; } if (secy->validate_frames != MACSEC_VALIDATE_DISABLED) { unsigned int msdu_len = macsec_msdu_len(skb); u64_stats_update_begin(&rxsc_stats->syncp); if (hdr->tci_an & MACSEC_TCI_E) rxsc_stats->stats.InOctetsDecrypted += msdu_len; else rxsc_stats->stats.InOctetsValidated += msdu_len; u64_stats_update_end(&rxsc_stats->syncp); } if (!macsec_skb_cb(skb)->valid) { spin_unlock(&rx_sa->lock); /* 10.6.5 */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotValid++; u64_stats_update_end(&rxsc_stats->syncp); this_cpu_inc(rx_sa->stats->InPktsNotValid); DEV_STATS_INC(secy->netdev, rx_errors); return false; } u64_stats_update_begin(&rxsc_stats->syncp); if (secy->validate_frames == MACSEC_VALIDATE_CHECK) { rxsc_stats->stats.InPktsInvalid++; this_cpu_inc(rx_sa->stats->InPktsInvalid); } else if (pn < lowest_pn) { rxsc_stats->stats.InPktsDelayed++; } else { rxsc_stats->stats.InPktsUnchecked++; } u64_stats_update_end(&rxsc_stats->syncp); } else { u64_stats_update_begin(&rxsc_stats->syncp); if (pn < lowest_pn) { rxsc_stats->stats.InPktsDelayed++; } else { rxsc_stats->stats.InPktsOK++; this_cpu_inc(rx_sa->stats->InPktsOK); } u64_stats_update_end(&rxsc_stats->syncp); // Instead of "pn >=" - to support pn overflow in xpn if (pn + 1 > rx_sa->next_pn_halves.lower) { rx_sa->next_pn_halves.lower = pn + 1; } else if (secy->xpn && !pn_same_half(pn, rx_sa->next_pn_halves.lower)) { rx_sa->next_pn_halves.upper++; rx_sa->next_pn_halves.lower = pn + 1; } spin_unlock(&rx_sa->lock); } return true; } static void macsec_reset_skb(struct sk_buff *skb, struct net_device *dev) { skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, dev); skb_reset_network_header(skb); if (!skb_transport_header_was_set(skb)) skb_reset_transport_header(skb); skb_reset_mac_len(skb); } static void macsec_finalize_skb(struct sk_buff *skb, u8 icv_len, u8 hdr_len) { skb->ip_summed = CHECKSUM_NONE; memmove(skb->data + hdr_len, skb->data, 2 * ETH_ALEN); skb_pull(skb, hdr_len); pskb_trim_unique(skb, skb->len - icv_len); } static void count_rx(struct net_device *dev, int len) { dev_sw_netstats_rx_add(dev, len); } static void macsec_decrypt_done(void *data, int err) { struct sk_buff *skb = data; struct net_device *dev = skb->dev; struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rx_sa *rx_sa = macsec_skb_cb(skb)->rx_sa; struct macsec_rx_sc *rx_sc = rx_sa->sc; int len; u32 pn; aead_request_free(macsec_skb_cb(skb)->req); if (!err) macsec_skb_cb(skb)->valid = true; rcu_read_lock_bh(); pn = ntohl(macsec_ethhdr(skb)->packet_number); if (!macsec_post_decrypt(skb, &macsec->secy, pn)) { rcu_read_unlock_bh(); kfree_skb(skb); goto out; } macsec_finalize_skb(skb, macsec->secy.icv_len, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); len = skb->len; macsec_reset_skb(skb, macsec->secy.netdev); if (gro_cells_receive(&macsec->gro_cells, skb) == NET_RX_SUCCESS) count_rx(dev, len); rcu_read_unlock_bh(); out: macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); dev_put(dev); } static struct sk_buff *macsec_decrypt(struct sk_buff *skb, struct net_device *dev, struct macsec_rx_sa *rx_sa, sci_t sci, struct macsec_secy *secy) { int ret; struct scatterlist *sg; struct sk_buff *trailer; unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; u32 hdr_pn; u16 icv_len = secy->icv_len; macsec_skb_cb(skb)->valid = false; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) return ERR_PTR(-ENOMEM); ret = skb_cow_data(skb, 0, &trailer); if (unlikely(ret < 0)) { kfree_skb(skb); return ERR_PTR(ret); } req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg, ret); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); } hdr = (struct macsec_eth_header *)skb->data; hdr_pn = ntohl(hdr->packet_number); if (secy->xpn) { pn_t recovered_pn = rx_sa->next_pn_halves; recovered_pn.lower = hdr_pn; if (hdr_pn < rx_sa->next_pn_halves.lower && !pn_same_half(hdr_pn, rx_sa->next_pn_halves.lower)) recovered_pn.upper++; macsec_fill_iv_xpn(iv, rx_sa->ssci, recovered_pn.full64, rx_sa->key.salt); } else { macsec_fill_iv(iv, sci, hdr_pn); } sg_init_table(sg, ret); ret = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(ret < 0)) { aead_request_free(req); kfree_skb(skb); return ERR_PTR(ret); } if (hdr->tci_an & MACSEC_TCI_E) { /* confidentiality: ethernet + macsec header * authenticated, encrypted payload */ int len = skb->len - macsec_hdr_len(macsec_skb_cb(skb)->has_sci); aead_request_set_crypt(req, sg, sg, len, iv); aead_request_set_ad(req, macsec_hdr_len(macsec_skb_cb(skb)->has_sci)); skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { aead_request_free(req); return ERR_PTR(-ENOMEM); } } else { /* integrity only: all headers + data authenticated */ aead_request_set_crypt(req, sg, sg, icv_len, iv); aead_request_set_ad(req, skb->len - icv_len); } macsec_skb_cb(skb)->req = req; skb->dev = dev; aead_request_set_callback(req, 0, macsec_decrypt_done, skb); dev_hold(dev); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) { return ERR_PTR(ret); } else if (ret != 0) { /* decryption/authentication failed * 10.6 if validateFrames is disabled, deliver anyway */ if (ret != -EBADMSG) { kfree_skb(skb); skb = ERR_PTR(ret); } } else { macsec_skb_cb(skb)->valid = true; } dev_put(dev); aead_request_free(req); return skb; } static struct macsec_rx_sc *find_rx_sc(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc; for_each_rxsc(secy, rx_sc) { if (rx_sc->sci == sci) return rx_sc; } return NULL; } static struct macsec_rx_sc *find_rx_sc_rtnl(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc; for_each_rxsc_rtnl(secy, rx_sc) { if (rx_sc->sci == sci) return rx_sc; } return NULL; } static enum rx_handler_result handle_not_macsec(struct sk_buff *skb) { /* Deliver to the uncontrolled port by default */ enum rx_handler_result ret = RX_HANDLER_PASS; struct ethhdr *hdr = eth_hdr(skb); struct metadata_dst *md_dst; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; bool is_macsec_md_dst; rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); md_dst = skb_metadata_dst(skb); is_macsec_md_dst = md_dst && md_dst->type == METADATA_MACSEC; list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; struct pcpu_secy_stats *secy_stats = this_cpu_ptr(macsec->stats); struct net_device *ndev = macsec->secy.netdev; /* If h/w offloading is enabled, HW decodes frames and strips * the SecTAG, so we have to deduce which port to deliver to. */ if (macsec_is_offloaded(macsec) && netif_running(ndev)) { const struct macsec_ops *ops; ops = macsec_get_ops(macsec, NULL); if (ops->rx_uses_md_dst && !is_macsec_md_dst) continue; if (is_macsec_md_dst) { struct macsec_rx_sc *rx_sc; /* All drivers that implement MACsec offload * support using skb metadata destinations must * indicate that they do so. */ DEBUG_NET_WARN_ON_ONCE(!ops->rx_uses_md_dst); rx_sc = find_rx_sc(&macsec->secy, md_dst->u.macsec_info.sci); if (!rx_sc) continue; /* device indicated macsec offload occurred */ skb->dev = ndev; skb->pkt_type = PACKET_HOST; eth_skb_pkt_type(skb, ndev); ret = RX_HANDLER_ANOTHER; goto out; } /* This datapath is insecure because it is unable to * enforce isolation of broadcast/multicast traffic and * unicast traffic with promiscuous mode on the macsec * netdev. Since the core stack has no mechanism to * check that the hardware did indeed receive MACsec * traffic, it is possible that the response handling * done by the MACsec port was to a plaintext packet. * This violates the MACsec protocol standard. */ if (ether_addr_equal_64bits(hdr->h_dest, ndev->dev_addr)) { /* exact match, divert skb to this port */ skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; goto out; } else if (is_multicast_ether_addr_64bits( hdr->h_dest)) { /* multicast frame, deliver on this port too */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; nskb->dev = ndev; eth_skb_pkt_type(nskb, ndev); __netif_rx(nskb); } else if (ndev->flags & IFF_PROMISC) { skb->dev = ndev; skb->pkt_type = PACKET_HOST; ret = RX_HANDLER_ANOTHER; goto out; } continue; } /* 10.6 If the management control validateFrames is not * Strict, frames without a SecTAG are received, counted, and * delivered to the Controlled Port */ if (macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsNoTag++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_dropped); continue; } /* deliver on this port */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; nskb->dev = ndev; if (__netif_rx(nskb) == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); } } out: rcu_read_unlock(); return ret; } static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct macsec_eth_header *hdr; struct macsec_secy *secy = NULL; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; struct macsec_rxh_data *rxd; struct macsec_dev *macsec; unsigned int len; sci_t sci; u32 hdr_pn; bool cbit; struct pcpu_rx_sc_stats *rxsc_stats; struct pcpu_secy_stats *secy_stats; bool pulled_sci; int ret; if (skb_headroom(skb) < ETH_HLEN) goto drop_direct; hdr = macsec_ethhdr(skb); if (hdr->eth.h_proto != htons(ETH_P_MACSEC)) return handle_not_macsec(skb); skb = skb_unshare(skb, GFP_ATOMIC); *pskb = skb; if (!skb) return RX_HANDLER_CONSUMED; pulled_sci = pskb_may_pull(skb, macsec_extra_len(true)); if (!pulled_sci) { if (!pskb_may_pull(skb, macsec_extra_len(false))) goto drop_direct; } hdr = macsec_ethhdr(skb); /* Frames with a SecTAG that has the TCI E bit set but the C * bit clear are discarded, as this reserved encoding is used * to identify frames with a SecTAG that are not to be * delivered to the Controlled Port. */ if ((hdr->tci_an & (MACSEC_TCI_C | MACSEC_TCI_E)) == MACSEC_TCI_E) return RX_HANDLER_PASS; /* now, pull the extra length */ if (hdr->tci_an & MACSEC_TCI_SC) { if (!pulled_sci) goto drop_direct; } /* ethernet header is part of crypto processing */ skb_push(skb, ETH_HLEN); macsec_skb_cb(skb)->has_sci = !!(hdr->tci_an & MACSEC_TCI_SC); macsec_skb_cb(skb)->assoc_num = hdr->tci_an & MACSEC_AN_MASK; sci = macsec_frame_sci(hdr, macsec_skb_cb(skb)->has_sci); rcu_read_lock(); rxd = macsec_data_rcu(skb->dev); list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct macsec_rx_sc *sc = find_rx_sc(&macsec->secy, sci); sc = sc ? macsec_rxsc_get(sc) : NULL; if (sc) { secy = &macsec->secy; rx_sc = sc; break; } } if (!secy) goto nosci; dev = secy->netdev; macsec = macsec_priv(dev); secy_stats = this_cpu_ptr(macsec->stats); rxsc_stats = this_cpu_ptr(rx_sc->stats); if (!macsec_validate_skb(skb, secy->icv_len, secy->xpn)) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsBadTag++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); goto drop_nosa; } rx_sa = macsec_rxsa_get(rx_sc->sa[macsec_skb_cb(skb)->assoc_num]); if (!rx_sa) { /* 10.6.1 if the SA is not in use */ /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsNotUsingSA++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(secy->netdev, rx_errors); goto drop_nosa; } /* not Strict, the frame (with the SecTAG and ICV * removed) is delivered to the Controlled Port. */ u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsUnusedSA++; u64_stats_update_end(&rxsc_stats->syncp); goto deliver; } /* First, PN check to avoid decrypting obviously wrong packets */ hdr_pn = ntohl(hdr->packet_number); if (secy->replay_protect) { bool late; spin_lock(&rx_sa->lock); late = rx_sa->next_pn_halves.lower >= secy->replay_window && hdr_pn < (rx_sa->next_pn_halves.lower - secy->replay_window); if (secy->xpn) late = late && pn_same_half(rx_sa->next_pn_halves.lower, hdr_pn); spin_unlock(&rx_sa->lock); if (late) { u64_stats_update_begin(&rxsc_stats->syncp); rxsc_stats->stats.InPktsLate++; u64_stats_update_end(&rxsc_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_dropped); goto drop; } } macsec_skb_cb(skb)->rx_sa = rx_sa; /* Disabled && !changed text => skip validation */ if (hdr->tci_an & MACSEC_TCI_C || secy->validate_frames != MACSEC_VALIDATE_DISABLED) skb = macsec_decrypt(skb, dev, rx_sa, sci, secy); if (IS_ERR(skb)) { /* the decrypt callback needs the reference */ if (PTR_ERR(skb) != -EINPROGRESS) { macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); } rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; } if (!macsec_post_decrypt(skb, secy, hdr_pn)) goto drop; deliver: macsec_finalize_skb(skb, secy->icv_len, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); len = skb->len; macsec_reset_skb(skb, secy->netdev); if (rx_sa) macsec_rxsa_put(rx_sa); macsec_rxsc_put(rx_sc); skb_orphan(skb); ret = gro_cells_receive(&macsec->gro_cells, skb); if (ret == NET_RX_SUCCESS) count_rx(dev, len); else DEV_STATS_INC(macsec->secy.netdev, rx_dropped); rcu_read_unlock(); *pskb = NULL; return RX_HANDLER_CONSUMED; drop: macsec_rxsa_put(rx_sa); drop_nosa: macsec_rxsc_put(rx_sc); rcu_read_unlock(); drop_direct: kfree_skb(skb); *pskb = NULL; return RX_HANDLER_CONSUMED; nosci: /* 10.6.1 if the SC is not found */ cbit = !!(hdr->tci_an & MACSEC_TCI_C); if (!cbit) macsec_finalize_skb(skb, MACSEC_DEFAULT_ICV_LEN, macsec_extra_len(macsec_skb_cb(skb)->has_sci)); list_for_each_entry_rcu(macsec, &rxd->secys, secys) { struct sk_buff *nskb; secy_stats = this_cpu_ptr(macsec->stats); /* If validateFrames is Strict or the C bit in the * SecTAG is set, discard */ if (cbit || macsec->secy.validate_frames == MACSEC_VALIDATE_STRICT) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsNoSCI++; u64_stats_update_end(&secy_stats->syncp); DEV_STATS_INC(macsec->secy.netdev, rx_errors); continue; } /* not strict, the frame (with the SecTAG and ICV * removed) is delivered to the Controlled Port. */ nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) break; macsec_reset_skb(nskb, macsec->secy.netdev); ret = __netif_rx(nskb); if (ret == NET_RX_SUCCESS) { u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.InPktsUnknownSCI++; u64_stats_update_end(&secy_stats->syncp); } else { DEV_STATS_INC(macsec->secy.netdev, rx_dropped); } } rcu_read_unlock(); *pskb = skb; return RX_HANDLER_PASS; } static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len) { struct crypto_aead *tfm; int ret; tfm = crypto_alloc_aead("gcm(aes)", 0, 0); if (IS_ERR(tfm)) return tfm; ret = crypto_aead_setkey(tfm, key, key_len); if (ret < 0) goto fail; ret = crypto_aead_setauthsize(tfm, icv_len); if (ret < 0) goto fail; return tfm; fail: crypto_free_aead(tfm); return ERR_PTR(ret); } static int init_rx_sa(struct macsec_rx_sa *rx_sa, char *sak, int key_len, int icv_len) { rx_sa->stats = alloc_percpu(struct macsec_rx_sa_stats); if (!rx_sa->stats) return -ENOMEM; rx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len); if (IS_ERR(rx_sa->key.tfm)) { free_percpu(rx_sa->stats); return PTR_ERR(rx_sa->key.tfm); } rx_sa->ssci = MACSEC_UNDEF_SSCI; rx_sa->active = false; rx_sa->next_pn = 1; refcount_set(&rx_sa->refcnt, 1); spin_lock_init(&rx_sa->lock); return 0; } static void clear_rx_sa(struct macsec_rx_sa *rx_sa) { rx_sa->active = false; macsec_rxsa_put(rx_sa); } static void free_rx_sc(struct macsec_rx_sc *rx_sc) { int i; for (i = 0; i < MACSEC_NUM_AN; i++) { struct macsec_rx_sa *sa = rtnl_dereference(rx_sc->sa[i]); RCU_INIT_POINTER(rx_sc->sa[i], NULL); if (sa) clear_rx_sa(sa); } macsec_rxsc_put(rx_sc); } static struct macsec_rx_sc *del_rx_sc(struct macsec_secy *secy, sci_t sci) { struct macsec_rx_sc *rx_sc, __rcu **rx_scp; for (rx_scp = &secy->rx_sc, rx_sc = rtnl_dereference(*rx_scp); rx_sc; rx_scp = &rx_sc->next, rx_sc = rtnl_dereference(*rx_scp)) { if (rx_sc->sci == sci) { if (rx_sc->active) secy->n_rx_sc--; rcu_assign_pointer(*rx_scp, rx_sc->next); return rx_sc; } } return NULL; } static struct macsec_rx_sc *create_rx_sc(struct net_device *dev, sci_t sci, bool active) { struct macsec_rx_sc *rx_sc; struct macsec_dev *macsec; struct net_device *real_dev = macsec_priv(dev)->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); struct macsec_secy *secy; list_for_each_entry(macsec, &rxd->secys, secys) { if (find_rx_sc_rtnl(&macsec->secy, sci)) return ERR_PTR(-EEXIST); } rx_sc = kzalloc(sizeof(*rx_sc), GFP_KERNEL); if (!rx_sc) return ERR_PTR(-ENOMEM); rx_sc->stats = netdev_alloc_pcpu_stats(struct pcpu_rx_sc_stats); if (!rx_sc->stats) { kfree(rx_sc); return ERR_PTR(-ENOMEM); } rx_sc->sci = sci; rx_sc->active = active; refcount_set(&rx_sc->refcnt, 1); secy = &macsec_priv(dev)->secy; rcu_assign_pointer(rx_sc->next, secy->rx_sc); rcu_assign_pointer(secy->rx_sc, rx_sc); if (rx_sc->active) secy->n_rx_sc++; return rx_sc; } static int init_tx_sa(struct macsec_tx_sa *tx_sa, char *sak, int key_len, int icv_len) { tx_sa->stats = alloc_percpu(struct macsec_tx_sa_stats); if (!tx_sa->stats) return -ENOMEM; tx_sa->key.tfm = macsec_alloc_tfm(sak, key_len, icv_len); if (IS_ERR(tx_sa->key.tfm)) { free_percpu(tx_sa->stats); return PTR_ERR(tx_sa->key.tfm); } tx_sa->ssci = MACSEC_UNDEF_SSCI; tx_sa->active = false; refcount_set(&tx_sa->refcnt, 1); spin_lock_init(&tx_sa->lock); return 0; } static void clear_tx_sa(struct macsec_tx_sa *tx_sa) { tx_sa->active = false; macsec_txsa_put(tx_sa); } static struct genl_family macsec_fam; static struct net_device *get_dev_from_nl(struct net *net, struct nlattr **attrs) { int ifindex = nla_get_u32(attrs[MACSEC_ATTR_IFINDEX]); struct net_device *dev; dev = __dev_get_by_index(net, ifindex); if (!dev) return ERR_PTR(-ENODEV); if (!netif_is_macsec(dev)) return ERR_PTR(-ENODEV); return dev; } static enum macsec_offload nla_get_offload(const struct nlattr *nla) { return (__force enum macsec_offload)nla_get_u8(nla); } static sci_t nla_get_sci(const struct nlattr *nla) { return (__force sci_t)nla_get_u64(nla); } static int nla_put_sci(struct sk_buff *skb, int attrtype, sci_t value, int padattr) { return nla_put_u64_64bit(skb, attrtype, (__force u64)value, padattr); } static ssci_t nla_get_ssci(const struct nlattr *nla) { return (__force ssci_t)nla_get_u32(nla); } static int nla_put_ssci(struct sk_buff *skb, int attrtype, ssci_t value) { return nla_put_u32(skb, attrtype, (__force u64)value); } static struct macsec_tx_sa *get_txsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_sa, struct net_device **devp, struct macsec_secy **secyp, struct macsec_tx_sc **scp, u8 *assoc_num) { struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; if (!tb_sa[MACSEC_SA_ATTR_AN]) return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); dev = get_dev_from_nl(net, attrs); if (IS_ERR(dev)) return ERR_CAST(dev); if (*assoc_num >= MACSEC_NUM_AN) return ERR_PTR(-EINVAL); secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; tx_sa = rtnl_dereference(tx_sc->sa[*assoc_num]); if (!tx_sa) return ERR_PTR(-ENODEV); *devp = dev; *scp = tx_sc; *secyp = secy; return tx_sa; } static struct macsec_rx_sc *get_rxsc_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_rxsc, struct net_device **devp, struct macsec_secy **secyp) { struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; sci_t sci; dev = get_dev_from_nl(net, attrs); if (IS_ERR(dev)) return ERR_CAST(dev); secy = &macsec_priv(dev)->secy; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return ERR_PTR(-EINVAL); sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); rx_sc = find_rx_sc_rtnl(secy, sci); if (!rx_sc) return ERR_PTR(-ENODEV); *secyp = secy; *devp = dev; return rx_sc; } static struct macsec_rx_sa *get_rxsa_from_nl(struct net *net, struct nlattr **attrs, struct nlattr **tb_rxsc, struct nlattr **tb_sa, struct net_device **devp, struct macsec_secy **secyp, struct macsec_rx_sc **scp, u8 *assoc_num) { struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; if (!tb_sa[MACSEC_SA_ATTR_AN]) return ERR_PTR(-EINVAL); *assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (*assoc_num >= MACSEC_NUM_AN) return ERR_PTR(-EINVAL); rx_sc = get_rxsc_from_nl(net, attrs, tb_rxsc, devp, secyp); if (IS_ERR(rx_sc)) return ERR_CAST(rx_sc); rx_sa = rtnl_dereference(rx_sc->sa[*assoc_num]); if (!rx_sa) return ERR_PTR(-ENODEV); *scp = rx_sc; return rx_sa; } static const struct nla_policy macsec_genl_policy[NUM_MACSEC_ATTR] = { [MACSEC_ATTR_IFINDEX] = { .type = NLA_U32 }, [MACSEC_ATTR_RXSC_CONFIG] = { .type = NLA_NESTED }, [MACSEC_ATTR_SA_CONFIG] = { .type = NLA_NESTED }, [MACSEC_ATTR_OFFLOAD] = { .type = NLA_NESTED }, }; static const struct nla_policy macsec_genl_rxsc_policy[NUM_MACSEC_RXSC_ATTR] = { [MACSEC_RXSC_ATTR_SCI] = { .type = NLA_U64 }, [MACSEC_RXSC_ATTR_ACTIVE] = { .type = NLA_U8 }, }; static const struct nla_policy macsec_genl_sa_policy[NUM_MACSEC_SA_ATTR] = { [MACSEC_SA_ATTR_AN] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_ACTIVE] = { .type = NLA_U8 }, [MACSEC_SA_ATTR_PN] = NLA_POLICY_MIN_LEN(4), [MACSEC_SA_ATTR_KEYID] = { .type = NLA_BINARY, .len = MACSEC_KEYID_LEN, }, [MACSEC_SA_ATTR_KEY] = { .type = NLA_BINARY, .len = MACSEC_MAX_KEY_LEN, }, [MACSEC_SA_ATTR_SSCI] = { .type = NLA_U32 }, [MACSEC_SA_ATTR_SALT] = { .type = NLA_BINARY, .len = MACSEC_SALT_LEN, }, }; static const struct nla_policy macsec_genl_offload_policy[NUM_MACSEC_OFFLOAD_ATTR] = { [MACSEC_OFFLOAD_ATTR_TYPE] = { .type = NLA_U8 }, }; /* Offloads an operation to a device driver */ static int macsec_offload(int (* const func)(struct macsec_context *), struct macsec_context *ctx) { int ret; if (unlikely(!func)) return 0; if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_lock(&ctx->phydev->lock); ret = (*func)(ctx); if (ctx->offload == MACSEC_OFFLOAD_PHY) mutex_unlock(&ctx->phydev->lock); return ret; } static int parse_sa_config(struct nlattr **attrs, struct nlattr **tb_sa) { if (!attrs[MACSEC_ATTR_SA_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb_sa, MACSEC_SA_ATTR_MAX, attrs[MACSEC_ATTR_SA_CONFIG], macsec_genl_sa_policy, NULL)) return -EINVAL; return 0; } static int parse_rxsc_config(struct nlattr **attrs, struct nlattr **tb_rxsc) { if (!attrs[MACSEC_ATTR_RXSC_CONFIG]) return -EINVAL; if (nla_parse_nested_deprecated(tb_rxsc, MACSEC_RXSC_ATTR_MAX, attrs[MACSEC_ATTR_RXSC_CONFIG], macsec_genl_rxsc_policy, NULL)) return -EINVAL; return 0; } static bool validate_add_rxsa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || !attrs[MACSEC_SA_ATTR_KEY] || !attrs[MACSEC_SA_ATTR_KEYID]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) return false; } if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) return false; return true; } static int macsec_add_rxsa(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct nlattr **attrs = info->attrs; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; unsigned char assoc_num; int pn_len; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int err; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsa(tb_sa)) return -EINVAL; rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (nla_len(tb_sa[MACSEC_SA_ATTR_KEY]) != secy->key_len) { pr_notice("macsec: nl: add_rxsa: bad key length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); rtnl_unlock(); return -EINVAL; } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (tb_sa[MACSEC_SA_ATTR_PN] && nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } if (secy->xpn) { if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { rtnl_unlock(); return -EINVAL; } if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } } rx_sa = rtnl_dereference(rx_sc->sa[assoc_num]); if (rx_sa) { rtnl_unlock(); return -EBUSY; } rx_sa = kmalloc(sizeof(*rx_sa), GFP_KERNEL); if (!rx_sa) { rtnl_unlock(); return -ENOMEM; } err = init_rx_sa(rx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len); if (err < 0) { kfree(rx_sa); rtnl_unlock(); return err; } if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); rx_sa->sc = rx_sc; if (secy->xpn) { rx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); nla_memcpy(rx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], MACSEC_SALT_LEN); } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); err = macsec_offload(ops->mdo_add_rxsa, &ctx); memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } nla_memcpy(rx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(rx_sc->sa[assoc_num], rx_sa); rtnl_unlock(); return 0; cleanup: macsec_rxsa_put(rx_sa); rtnl_unlock(); return err; } static bool validate_add_rxsc(struct nlattr **attrs) { if (!attrs[MACSEC_RXSC_ATTR_SCI]) return false; if (attrs[MACSEC_RXSC_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_RXSC_ATTR_ACTIVE]) > 1) return false; } return true; } static int macsec_add_rxsc(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; sci_t sci = MACSEC_UNDEF_SCI; struct nlattr **attrs = info->attrs; struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct macsec_secy *secy; bool active = true; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsc(tb_rxsc)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) active = nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); rx_sc = create_rx_sc(dev, sci, active); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_add_rxsc, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: del_rx_sc(secy, sci); free_rx_sc(rx_sc); rtnl_unlock(); return ret; } static bool validate_add_txsa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || !attrs[MACSEC_SA_ATTR_PN] || !attrs[MACSEC_SA_ATTR_KEY] || !attrs[MACSEC_SA_ATTR_KEYID]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) return false; } if (nla_len(attrs[MACSEC_SA_ATTR_KEYID]) != MACSEC_KEYID_LEN) return false; return true; } static int macsec_add_txsa(struct sk_buff *skb, struct genl_info *info) { struct net_device *dev; struct nlattr **attrs = info->attrs; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; unsigned char assoc_num; int pn_len; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational; int err; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_add_txsa(tb_sa)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; assoc_num = nla_get_u8(tb_sa[MACSEC_SA_ATTR_AN]); if (nla_len(tb_sa[MACSEC_SA_ATTR_KEY]) != secy->key_len) { pr_notice("macsec: nl: add_txsa: bad key length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); rtnl_unlock(); return -EINVAL; } pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: add_txsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } if (secy->xpn) { if (!tb_sa[MACSEC_SA_ATTR_SSCI] || !tb_sa[MACSEC_SA_ATTR_SALT]) { rtnl_unlock(); return -EINVAL; } if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) { pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_SALT]), MACSEC_SALT_LEN); rtnl_unlock(); return -EINVAL; } } tx_sa = rtnl_dereference(tx_sc->sa[assoc_num]); if (tx_sa) { rtnl_unlock(); return -EBUSY; } tx_sa = kmalloc(sizeof(*tx_sa), GFP_KERNEL); if (!tx_sa) { rtnl_unlock(); return -ENOMEM; } err = init_tx_sa(tx_sa, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len, secy->icv_len); if (err < 0) { kfree(tx_sa); rtnl_unlock(); return err; } spin_lock_bh(&tx_sa->lock); tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) tx_sa->active = !!nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); was_operational = secy->operational; if (assoc_num == tx_sc->encoding_sa && tx_sa->active) secy->operational = true; if (secy->xpn) { tx_sa->ssci = nla_get_ssci(tb_sa[MACSEC_SA_ATTR_SSCI]); nla_memcpy(tx_sa->key.salt.bytes, tb_sa[MACSEC_SA_ATTR_SALT], MACSEC_SALT_LEN); } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.secy = secy; memcpy(ctx.sa.key, nla_data(tb_sa[MACSEC_SA_ATTR_KEY]), secy->key_len); err = macsec_offload(ops->mdo_add_txsa, &ctx); memzero_explicit(ctx.sa.key, secy->key_len); if (err) goto cleanup; } nla_memcpy(tx_sa->key.id, tb_sa[MACSEC_SA_ATTR_KEYID], MACSEC_KEYID_LEN); rcu_assign_pointer(tx_sc->sa[assoc_num], tx_sa); rtnl_unlock(); return 0; cleanup: secy->operational = was_operational; macsec_txsa_put(tx_sa); rtnl_unlock(); return err; } static int macsec_del_rxsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; u8 assoc_num; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; rtnl_lock(); rx_sa = get_rxsa_from_nl(genl_info_net(info), attrs, tb_rxsc, tb_sa, &dev, &secy, &rx_sc, &assoc_num); if (IS_ERR(rx_sa)) { rtnl_unlock(); return PTR_ERR(rx_sa); } if (rx_sa->active) { rtnl_unlock(); return -EBUSY; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_rxsa, &ctx); if (ret) goto cleanup; } RCU_INIT_POINTER(rx_sc->sa[assoc_num], NULL); clear_rx_sa(rx_sa); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static int macsec_del_rxsc(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; sci_t sci; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!tb_rxsc[MACSEC_RXSC_ATTR_SCI]) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), info->attrs); if (IS_ERR(dev)) { rtnl_unlock(); return PTR_ERR(dev); } secy = &macsec_priv(dev)->secy; sci = nla_get_sci(tb_rxsc[MACSEC_RXSC_ATTR_SCI]); rx_sc = del_rx_sc(secy, sci); if (!rx_sc) { rtnl_unlock(); return -ENODEV; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_rxsc, &ctx); if (ret) goto cleanup; } free_rx_sc(rx_sc); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static int macsec_del_txsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; rtnl_lock(); tx_sa = get_txsa_from_nl(genl_info_net(info), attrs, tb_sa, &dev, &secy, &tx_sc, &assoc_num); if (IS_ERR(tx_sa)) { rtnl_unlock(); return PTR_ERR(tx_sa); } if (tx_sa->active) { rtnl_unlock(); return -EBUSY; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.secy = secy; ret = macsec_offload(ops->mdo_del_txsa, &ctx); if (ret) goto cleanup; } RCU_INIT_POINTER(tx_sc->sa[assoc_num], NULL); clear_tx_sa(tx_sa); rtnl_unlock(); return 0; cleanup: rtnl_unlock(); return ret; } static bool validate_upd_sa(struct nlattr **attrs) { if (!attrs[MACSEC_SA_ATTR_AN] || attrs[MACSEC_SA_ATTR_KEY] || attrs[MACSEC_SA_ATTR_KEYID] || attrs[MACSEC_SA_ATTR_SSCI] || attrs[MACSEC_SA_ATTR_SALT]) return false; if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN) return false; if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0) return false; if (attrs[MACSEC_SA_ATTR_ACTIVE]) { if (nla_get_u8(attrs[MACSEC_SA_ATTR_ACTIVE]) > 1) return false; } return true; } static int macsec_upd_txsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; struct macsec_tx_sa *tx_sa; u8 assoc_num; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_operational, was_active; pn_t prev_pn; int ret = 0; prev_pn.full64 = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_upd_sa(tb_sa)) return -EINVAL; rtnl_lock(); tx_sa = get_txsa_from_nl(genl_info_net(info), attrs, tb_sa, &dev, &secy, &tx_sc, &assoc_num); if (IS_ERR(tx_sa)) { rtnl_unlock(); return PTR_ERR(tx_sa); } if (tb_sa[MACSEC_SA_ATTR_PN]) { int pn_len; pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: upd_txsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } spin_lock_bh(&tx_sa->lock); prev_pn = tx_sa->next_pn_halves; tx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&tx_sa->lock); } was_active = tx_sa->active; if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) tx_sa->active = nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); was_operational = secy->operational; if (assoc_num == tx_sc->encoding_sa) secy->operational = tx_sa->active; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.tx_sa = tx_sa; ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_txsa, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&tx_sa->lock); tx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&tx_sa->lock); } tx_sa->active = was_active; secy->operational = was_operational; rtnl_unlock(); return ret; } static int macsec_upd_rxsa(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct macsec_rx_sa *rx_sa; u8 assoc_num; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; struct nlattr *tb_sa[MACSEC_SA_ATTR_MAX + 1]; bool was_active; pn_t prev_pn; int ret = 0; prev_pn.full64 = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (parse_sa_config(attrs, tb_sa)) return -EINVAL; if (!validate_upd_sa(tb_sa)) return -EINVAL; rtnl_lock(); rx_sa = get_rxsa_from_nl(genl_info_net(info), attrs, tb_rxsc, tb_sa, &dev, &secy, &rx_sc, &assoc_num); if (IS_ERR(rx_sa)) { rtnl_unlock(); return PTR_ERR(rx_sa); } if (tb_sa[MACSEC_SA_ATTR_PN]) { int pn_len; pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN; if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) { pr_notice("macsec: nl: upd_rxsa: bad pn length: %d != %d\n", nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len); rtnl_unlock(); return -EINVAL; } spin_lock_bh(&rx_sa->lock); prev_pn = rx_sa->next_pn_halves; rx_sa->next_pn = nla_get_u64(tb_sa[MACSEC_SA_ATTR_PN]); spin_unlock_bh(&rx_sa->lock); } was_active = rx_sa->active; if (tb_sa[MACSEC_SA_ATTR_ACTIVE]) rx_sa->active = nla_get_u8(tb_sa[MACSEC_SA_ATTR_ACTIVE]); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.sa.assoc_num = assoc_num; ctx.sa.rx_sa = rx_sa; ctx.sa.update_pn = !!prev_pn.full64; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsa, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: if (tb_sa[MACSEC_SA_ATTR_PN]) { spin_lock_bh(&rx_sa->lock); rx_sa->next_pn_halves = prev_pn; spin_unlock_bh(&rx_sa->lock); } rx_sa->active = was_active; rtnl_unlock(); return ret; } static int macsec_upd_rxsc(struct sk_buff *skb, struct genl_info *info) { struct nlattr **attrs = info->attrs; struct net_device *dev; struct macsec_secy *secy; struct macsec_rx_sc *rx_sc; struct nlattr *tb_rxsc[MACSEC_RXSC_ATTR_MAX + 1]; unsigned int prev_n_rx_sc; bool was_active; int ret; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (parse_rxsc_config(attrs, tb_rxsc)) return -EINVAL; if (!validate_add_rxsc(tb_rxsc)) return -EINVAL; rtnl_lock(); rx_sc = get_rxsc_from_nl(genl_info_net(info), attrs, tb_rxsc, &dev, &secy); if (IS_ERR(rx_sc)) { rtnl_unlock(); return PTR_ERR(rx_sc); } was_active = rx_sc->active; prev_n_rx_sc = secy->n_rx_sc; if (tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]) { bool new = !!nla_get_u8(tb_rxsc[MACSEC_RXSC_ATTR_ACTIVE]); if (rx_sc->active != new) secy->n_rx_sc += new ? 1 : -1; rx_sc->active = new; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(netdev_priv(dev))) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.rx_sc = rx_sc; ctx.secy = secy; ret = macsec_offload(ops->mdo_upd_rxsc, &ctx); if (ret) goto cleanup; } rtnl_unlock(); return 0; cleanup: secy->n_rx_sc = prev_n_rx_sc; rx_sc->active = was_active; rtnl_unlock(); return ret; } static bool macsec_is_configured(struct macsec_dev *macsec) { struct macsec_secy *secy = &macsec->secy; struct macsec_tx_sc *tx_sc = &secy->tx_sc; int i; if (secy->rx_sc) return true; for (i = 0; i < MACSEC_NUM_AN; i++) if (tx_sc->sa[i]) return true; return false; } static bool macsec_needs_tx_tag(struct macsec_dev *macsec, const struct macsec_ops *ops) { return macsec->offload == MACSEC_OFFLOAD_PHY && ops->mdo_insert_tx_tag; } static void macsec_set_head_tail_room(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int needed_headroom, needed_tailroom; const struct macsec_ops *ops; ops = macsec_get_ops(macsec, NULL); if (ops) { needed_headroom = ops->needed_headroom; needed_tailroom = ops->needed_tailroom; } else { needed_headroom = MACSEC_NEEDED_HEADROOM; needed_tailroom = MACSEC_NEEDED_TAILROOM; } dev->needed_headroom = real_dev->needed_headroom + needed_headroom; dev->needed_tailroom = real_dev->needed_tailroom + needed_tailroom; } static void macsec_inherit_tso_max(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); /* if macsec is offloaded, we need to follow the lower * device's capabilities. otherwise, we can ignore them. */ if (macsec_is_offloaded(macsec)) netif_inherit_tso_max(dev, macsec->real_dev); } static int macsec_update_offload(struct net_device *dev, enum macsec_offload offload) { enum macsec_offload prev_offload; const struct macsec_ops *ops; struct macsec_context ctx; struct macsec_dev *macsec; int ret = 0; macsec = macsec_priv(dev); /* Check if the offloading mode is supported by the underlying layers */ if (offload != MACSEC_OFFLOAD_OFF && !macsec_check_offload(offload, macsec)) return -EOPNOTSUPP; /* Check if the net device is busy. */ if (netif_running(dev)) return -EBUSY; /* Check if the device already has rules configured: we do not support * rules migration. */ if (macsec_is_configured(macsec)) return -EBUSY; prev_offload = macsec->offload; ops = __macsec_get_ops(offload == MACSEC_OFFLOAD_OFF ? prev_offload : offload, macsec, &ctx); if (!ops) return -EOPNOTSUPP; macsec->offload = offload; ctx.secy = &macsec->secy; ret = offload == MACSEC_OFFLOAD_OFF ? macsec_offload(ops->mdo_del_secy, &ctx) : macsec_offload(ops->mdo_add_secy, &ctx); if (ret) { macsec->offload = prev_offload; return ret; } macsec_set_head_tail_room(dev); macsec->insert_tx_tag = macsec_needs_tx_tag(macsec, ops); macsec_inherit_tso_max(dev); netdev_update_features(dev); return ret; } static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb_offload[MACSEC_OFFLOAD_ATTR_MAX + 1]; struct nlattr **attrs = info->attrs; enum macsec_offload offload; struct macsec_dev *macsec; struct net_device *dev; int ret = 0; if (!attrs[MACSEC_ATTR_IFINDEX]) return -EINVAL; if (!attrs[MACSEC_ATTR_OFFLOAD]) return -EINVAL; if (nla_parse_nested_deprecated(tb_offload, MACSEC_OFFLOAD_ATTR_MAX, attrs[MACSEC_ATTR_OFFLOAD], macsec_genl_offload_policy, NULL)) return -EINVAL; rtnl_lock(); dev = get_dev_from_nl(genl_info_net(info), attrs); if (IS_ERR(dev)) { ret = PTR_ERR(dev); goto out; } macsec = macsec_priv(dev); if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) { ret = -EINVAL; goto out; } offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]); if (macsec->offload != offload) ret = macsec_update_offload(dev, offload); out: rtnl_unlock(); return ret; } static void get_tx_sa_stats(struct net_device *dev, int an, struct macsec_tx_sa *tx_sa, struct macsec_tx_sa_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.sa.assoc_num = an; ctx.sa.tx_sa = tx_sa; ctx.stats.tx_sa_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_tx_sa_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct macsec_tx_sa_stats *stats = per_cpu_ptr(tx_sa->stats, cpu); sum->OutPktsProtected += stats->OutPktsProtected; sum->OutPktsEncrypted += stats->OutPktsEncrypted; } } static int copy_tx_sa_stats(struct sk_buff *skb, struct macsec_tx_sa_stats *sum) { if (nla_put_u32(skb, MACSEC_SA_STATS_ATTR_OUT_PKTS_PROTECTED, sum->OutPktsProtected) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum->OutPktsEncrypted)) return -EMSGSIZE; return 0; } static void get_rx_sa_stats(struct net_device *dev, struct macsec_rx_sc *rx_sc, int an, struct macsec_rx_sa *rx_sa, struct macsec_rx_sa_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.sa.assoc_num = an; ctx.sa.rx_sa = rx_sa; ctx.stats.rx_sa_stats = sum; ctx.secy = &macsec_priv(dev)->secy; ctx.rx_sc = rx_sc; macsec_offload(ops->mdo_get_rx_sa_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct macsec_rx_sa_stats *stats = per_cpu_ptr(rx_sa->stats, cpu); sum->InPktsOK += stats->InPktsOK; sum->InPktsInvalid += stats->InPktsInvalid; sum->InPktsNotValid += stats->InPktsNotValid; sum->InPktsNotUsingSA += stats->InPktsNotUsingSA; sum->InPktsUnusedSA += stats->InPktsUnusedSA; } } static int copy_rx_sa_stats(struct sk_buff *skb, struct macsec_rx_sa_stats *sum) { if (nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_OK, sum->InPktsOK) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_INVALID, sum->InPktsInvalid) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_VALID, sum->InPktsNotValid) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum->InPktsNotUsingSA) || nla_put_u32(skb, MACSEC_SA_STATS_ATTR_IN_PKTS_UNUSED_SA, sum->InPktsUnusedSA)) return -EMSGSIZE; return 0; } static void get_rx_sc_stats(struct net_device *dev, struct macsec_rx_sc *rx_sc, struct macsec_rx_sc_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.rx_sc_stats = sum; ctx.secy = &macsec_priv(dev)->secy; ctx.rx_sc = rx_sc; macsec_offload(ops->mdo_get_rx_sc_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_rx_sc_stats *stats; struct macsec_rx_sc_stats tmp; unsigned int start; stats = per_cpu_ptr(rx_sc->stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->InOctetsValidated += tmp.InOctetsValidated; sum->InOctetsDecrypted += tmp.InOctetsDecrypted; sum->InPktsUnchecked += tmp.InPktsUnchecked; sum->InPktsDelayed += tmp.InPktsDelayed; sum->InPktsOK += tmp.InPktsOK; sum->InPktsInvalid += tmp.InPktsInvalid; sum->InPktsLate += tmp.InPktsLate; sum->InPktsNotValid += tmp.InPktsNotValid; sum->InPktsNotUsingSA += tmp.InPktsNotUsingSA; sum->InPktsUnusedSA += tmp.InPktsUnusedSA; } } static int copy_rx_sc_stats(struct sk_buff *skb, struct macsec_rx_sc_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_VALIDATED, sum->InOctetsValidated, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_OCTETS_DECRYPTED, sum->InOctetsDecrypted, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNCHECKED, sum->InPktsUnchecked, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_DELAYED, sum->InPktsDelayed, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_OK, sum->InPktsOK, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_INVALID, sum->InPktsInvalid, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_LATE, sum->InPktsLate, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_VALID, sum->InPktsNotValid, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_NOT_USING_SA, sum->InPktsNotUsingSA, MACSEC_RXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_RXSC_STATS_ATTR_IN_PKTS_UNUSED_SA, sum->InPktsUnusedSA, MACSEC_RXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static void get_tx_sc_stats(struct net_device *dev, struct macsec_tx_sc_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.tx_sc_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_tx_sc_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_tx_sc_stats *stats; struct macsec_tx_sc_stats tmp; unsigned int start; stats = per_cpu_ptr(macsec_priv(dev)->secy.tx_sc.stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsProtected += tmp.OutPktsProtected; sum->OutPktsEncrypted += tmp.OutPktsEncrypted; sum->OutOctetsProtected += tmp.OutOctetsProtected; sum->OutOctetsEncrypted += tmp.OutOctetsEncrypted; } } static int copy_tx_sc_stats(struct sk_buff *skb, struct macsec_tx_sc_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_PROTECTED, sum->OutPktsProtected, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_PKTS_ENCRYPTED, sum->OutPktsEncrypted, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_PROTECTED, sum->OutOctetsProtected, MACSEC_TXSC_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_TXSC_STATS_ATTR_OUT_OCTETS_ENCRYPTED, sum->OutOctetsEncrypted, MACSEC_TXSC_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static void get_secy_stats(struct net_device *dev, struct macsec_dev_stats *sum) { struct macsec_dev *macsec = macsec_priv(dev); int cpu; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.stats.dev_stats = sum; ctx.secy = &macsec_priv(dev)->secy; macsec_offload(ops->mdo_get_dev_stats, &ctx); } return; } for_each_possible_cpu(cpu) { const struct pcpu_secy_stats *stats; struct macsec_dev_stats tmp; unsigned int start; stats = per_cpu_ptr(macsec_priv(dev)->stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); memcpy(&tmp, &stats->stats, sizeof(tmp)); } while (u64_stats_fetch_retry(&stats->syncp, start)); sum->OutPktsUntagged += tmp.OutPktsUntagged; sum->InPktsUntagged += tmp.InPktsUntagged; sum->OutPktsTooLong += tmp.OutPktsTooLong; sum->InPktsNoTag += tmp.InPktsNoTag; sum->InPktsBadTag += tmp.InPktsBadTag; sum->InPktsUnknownSCI += tmp.InPktsUnknownSCI; sum->InPktsNoSCI += tmp.InPktsNoSCI; sum->InPktsOverrun += tmp.InPktsOverrun; } } static int copy_secy_stats(struct sk_buff *skb, struct macsec_dev_stats *sum) { if (nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_UNTAGGED, sum->OutPktsUntagged, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNTAGGED, sum->InPktsUntagged, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_OUT_PKTS_TOO_LONG, sum->OutPktsTooLong, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_TAG, sum->InPktsNoTag, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_BAD_TAG, sum->InPktsBadTag, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_UNKNOWN_SCI, sum->InPktsUnknownSCI, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_NO_SCI, sum->InPktsNoSCI, MACSEC_SECY_STATS_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_STATS_ATTR_IN_PKTS_OVERRUN, sum->InPktsOverrun, MACSEC_SECY_STATS_ATTR_PAD)) return -EMSGSIZE; return 0; } static int nla_put_secy(struct macsec_secy *secy, struct sk_buff *skb) { struct macsec_tx_sc *tx_sc = &secy->tx_sc; struct nlattr *secy_nest = nla_nest_start_noflag(skb, MACSEC_ATTR_SECY); u64 csid; if (!secy_nest) return 1; switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_128 : MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_256 : MACSEC_CIPHER_ID_GCM_AES_256; break; default: goto cancel; } if (nla_put_sci(skb, MACSEC_SECY_ATTR_SCI, secy->sci, MACSEC_SECY_ATTR_PAD) || nla_put_u64_64bit(skb, MACSEC_SECY_ATTR_CIPHER_SUITE, csid, MACSEC_SECY_ATTR_PAD) || nla_put_u8(skb, MACSEC_SECY_ATTR_ICV_LEN, secy->icv_len) || nla_put_u8(skb, MACSEC_SECY_ATTR_OPER, secy->operational) || nla_put_u8(skb, MACSEC_SECY_ATTR_PROTECT, secy->protect_frames) || nla_put_u8(skb, MACSEC_SECY_ATTR_REPLAY, secy->replay_protect) || nla_put_u8(skb, MACSEC_SECY_ATTR_VALIDATE, secy->validate_frames) || nla_put_u8(skb, MACSEC_SECY_ATTR_ENCRYPT, tx_sc->encrypt) || nla_put_u8(skb, MACSEC_SECY_ATTR_INC_SCI, tx_sc->send_sci) || nla_put_u8(skb, MACSEC_SECY_ATTR_ES, tx_sc->end_station) || nla_put_u8(skb, MACSEC_SECY_ATTR_SCB, tx_sc->scb) || nla_put_u8(skb, MACSEC_SECY_ATTR_ENCODING_SA, tx_sc->encoding_sa)) goto cancel; if (secy->replay_protect) { if (nla_put_u32(skb, MACSEC_SECY_ATTR_WINDOW, secy->replay_window)) goto cancel; } nla_nest_end(skb, secy_nest); return 0; cancel: nla_nest_cancel(skb, secy_nest); return 1; } static noinline_for_stack int dump_secy(struct macsec_secy *secy, struct net_device *dev, struct sk_buff *skb, struct netlink_callback *cb) { struct macsec_tx_sc_stats tx_sc_stats = {0, }; struct macsec_tx_sa_stats tx_sa_stats = {0, }; struct macsec_rx_sc_stats rx_sc_stats = {0, }; struct macsec_rx_sa_stats rx_sa_stats = {0, }; struct macsec_dev *macsec = netdev_priv(dev); struct macsec_dev_stats dev_stats = {0, }; struct macsec_tx_sc *tx_sc = &secy->tx_sc; struct nlattr *txsa_list, *rxsc_list; struct macsec_rx_sc *rx_sc; struct nlattr *attr; void *hdr; int i, j; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &macsec_fam, NLM_F_MULTI, MACSEC_CMD_GET_TXSC); if (!hdr) return -EMSGSIZE; genl_dump_check_consistent(cb, hdr); if (nla_put_u32(skb, MACSEC_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; attr = nla_nest_start_noflag(skb, MACSEC_ATTR_OFFLOAD); if (!attr) goto nla_put_failure; if (nla_put_u8(skb, MACSEC_OFFLOAD_ATTR_TYPE, macsec->offload)) goto nla_put_failure; nla_nest_end(skb, attr); if (nla_put_secy(secy, skb)) goto nla_put_failure; attr = nla_nest_start_noflag(skb, MACSEC_ATTR_TXSC_STATS); if (!attr) goto nla_put_failure; get_tx_sc_stats(dev, &tx_sc_stats); if (copy_tx_sc_stats(skb, &tx_sc_stats)) { nla_nest_cancel(skb, attr); goto nla_put_failure; } nla_nest_end(skb, attr); attr = nla_nest_start_noflag(skb, MACSEC_ATTR_SECY_STATS); if (!attr) goto nla_put_failure; get_secy_stats(dev, &dev_stats); if (copy_secy_stats(skb, &dev_stats)) { nla_nest_cancel(skb, attr); goto nla_put_failure; } nla_nest_end(skb, attr); txsa_list = nla_nest_start_noflag(skb, MACSEC_ATTR_TXSA_LIST); if (!txsa_list) goto nla_put_failure; for (i = 0, j = 1; i < MACSEC_NUM_AN; i++) { struct macsec_tx_sa *tx_sa = rtnl_dereference(tx_sc->sa[i]); struct nlattr *txsa_nest; u64 pn; int pn_len; if (!tx_sa) continue; txsa_nest = nla_nest_start_noflag(skb, j++); if (!txsa_nest) { nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } attr = nla_nest_start_noflag(skb, MACSEC_SA_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } memset(&tx_sa_stats, 0, sizeof(tx_sa_stats)); get_tx_sa_stats(dev, i, tx_sa, &tx_sa_stats); if (copy_tx_sa_stats(skb, &tx_sa_stats)) { nla_nest_cancel(skb, attr); nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } nla_nest_end(skb, attr); if (secy->xpn) { pn = tx_sa->next_pn; pn_len = MACSEC_XPN_PN_LEN; } else { pn = tx_sa->next_pn_halves.lower; pn_len = MACSEC_DEFAULT_PN_LEN; } if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put(skb, MACSEC_SA_ATTR_PN, pn_len, &pn) || nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, tx_sa->key.id) || (secy->xpn && nla_put_ssci(skb, MACSEC_SA_ATTR_SSCI, tx_sa->ssci)) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, tx_sa->active)) { nla_nest_cancel(skb, txsa_nest); nla_nest_cancel(skb, txsa_list); goto nla_put_failure; } nla_nest_end(skb, txsa_nest); } nla_nest_end(skb, txsa_list); rxsc_list = nla_nest_start_noflag(skb, MACSEC_ATTR_RXSC_LIST); if (!rxsc_list) goto nla_put_failure; j = 1; for_each_rxsc_rtnl(secy, rx_sc) { int k; struct nlattr *rxsa_list; struct nlattr *rxsc_nest = nla_nest_start_noflag(skb, j++); if (!rxsc_nest) { nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } if (nla_put_u8(skb, MACSEC_RXSC_ATTR_ACTIVE, rx_sc->active) || nla_put_sci(skb, MACSEC_RXSC_ATTR_SCI, rx_sc->sci, MACSEC_RXSC_ATTR_PAD)) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } attr = nla_nest_start_noflag(skb, MACSEC_RXSC_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } memset(&rx_sc_stats, 0, sizeof(rx_sc_stats)); get_rx_sc_stats(dev, rx_sc, &rx_sc_stats); if (copy_rx_sc_stats(skb, &rx_sc_stats)) { nla_nest_cancel(skb, attr); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } nla_nest_end(skb, attr); rxsa_list = nla_nest_start_noflag(skb, MACSEC_RXSC_ATTR_SA_LIST); if (!rxsa_list) { nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } for (i = 0, k = 1; i < MACSEC_NUM_AN; i++) { struct macsec_rx_sa *rx_sa = rtnl_dereference(rx_sc->sa[i]); struct nlattr *rxsa_nest; u64 pn; int pn_len; if (!rx_sa) continue; rxsa_nest = nla_nest_start_noflag(skb, k++); if (!rxsa_nest) { nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } attr = nla_nest_start_noflag(skb, MACSEC_SA_ATTR_STATS); if (!attr) { nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } memset(&rx_sa_stats, 0, sizeof(rx_sa_stats)); get_rx_sa_stats(dev, rx_sc, i, rx_sa, &rx_sa_stats); if (copy_rx_sa_stats(skb, &rx_sa_stats)) { nla_nest_cancel(skb, attr); nla_nest_cancel(skb, rxsa_list); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } nla_nest_end(skb, attr); if (secy->xpn) { pn = rx_sa->next_pn; pn_len = MACSEC_XPN_PN_LEN; } else { pn = rx_sa->next_pn_halves.lower; pn_len = MACSEC_DEFAULT_PN_LEN; } if (nla_put_u8(skb, MACSEC_SA_ATTR_AN, i) || nla_put(skb, MACSEC_SA_ATTR_PN, pn_len, &pn) || nla_put(skb, MACSEC_SA_ATTR_KEYID, MACSEC_KEYID_LEN, rx_sa->key.id) || (secy->xpn && nla_put_ssci(skb, MACSEC_SA_ATTR_SSCI, rx_sa->ssci)) || nla_put_u8(skb, MACSEC_SA_ATTR_ACTIVE, rx_sa->active)) { nla_nest_cancel(skb, rxsa_nest); nla_nest_cancel(skb, rxsc_nest); nla_nest_cancel(skb, rxsc_list); goto nla_put_failure; } nla_nest_end(skb, rxsa_nest); } nla_nest_end(skb, rxsa_list); nla_nest_end(skb, rxsc_nest); } nla_nest_end(skb, rxsc_list); genlmsg_end(skb, hdr); return 0; nla_put_failure: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } static int macsec_generation = 1; /* protected by RTNL */ static int macsec_dump_txsc(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct net_device *dev; int dev_idx, d; dev_idx = cb->args[0]; d = 0; rtnl_lock(); cb->seq = macsec_generation; for_each_netdev(net, dev) { struct macsec_secy *secy; if (d < dev_idx) goto next; if (!netif_is_macsec(dev)) goto next; secy = &macsec_priv(dev)->secy; if (dump_secy(secy, dev, skb, cb) < 0) goto done; next: d++; } done: rtnl_unlock(); cb->args[0] = d; return skb->len; } static const struct genl_small_ops macsec_genl_ops[] = { { .cmd = MACSEC_CMD_GET_TXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .dumpit = macsec_dump_txsc, }, { .cmd = MACSEC_CMD_ADD_RXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSC, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_rxsc, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_TXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_TXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_TXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_txsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_ADD_RXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_add_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_DEL_RXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_del_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_RXSA, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_rxsa, .flags = GENL_ADMIN_PERM, }, { .cmd = MACSEC_CMD_UPD_OFFLOAD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = macsec_upd_offload, .flags = GENL_ADMIN_PERM, }, }; static struct genl_family macsec_fam __ro_after_init = { .name = MACSEC_GENL_NAME, .hdrsize = 0, .version = MACSEC_GENL_VERSION, .maxattr = MACSEC_ATTR_MAX, .policy = macsec_genl_policy, .netnsok = true, .module = THIS_MODULE, .small_ops = macsec_genl_ops, .n_small_ops = ARRAY_SIZE(macsec_genl_ops), .resv_start_op = MACSEC_CMD_UPD_OFFLOAD + 1, }; static struct sk_buff *macsec_insert_tx_tag(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); const struct macsec_ops *ops; struct phy_device *phydev; struct macsec_context ctx; int skb_final_len; int err; ops = macsec_get_ops(macsec, &ctx); skb_final_len = skb->len - ETH_HLEN + ops->needed_headroom + ops->needed_tailroom; if (unlikely(skb_final_len > macsec->real_dev->mtu)) { err = -EINVAL; goto cleanup; } phydev = macsec->real_dev->phydev; err = skb_ensure_writable_head_tail(skb, dev); if (unlikely(err < 0)) goto cleanup; err = ops->mdo_insert_tx_tag(phydev, skb); if (unlikely(err)) goto cleanup; return skb; cleanup: kfree_skb(skb); return ERR_PTR(err); } static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct macsec_dev *macsec = netdev_priv(dev); struct macsec_secy *secy = &macsec->secy; struct pcpu_secy_stats *secy_stats; int ret, len; if (macsec_is_offloaded(netdev_priv(dev))) { struct metadata_dst *md_dst = secy->tx_sc.md_dst; skb_dst_drop(skb); dst_hold(&md_dst->dst); skb_dst_set(skb, &md_dst->dst); if (macsec->insert_tx_tag) { skb = macsec_insert_tx_tag(skb, dev); if (IS_ERR(skb)) { DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } } skb->dev = macsec->real_dev; return dev_queue_xmit(skb); } /* 10.5 */ if (!secy->protect_frames) { secy_stats = this_cpu_ptr(macsec->stats); u64_stats_update_begin(&secy_stats->syncp); secy_stats->stats.OutPktsUntagged++; u64_stats_update_end(&secy_stats->syncp); skb->dev = macsec->real_dev; len = skb->len; ret = dev_queue_xmit(skb); count_tx(dev, ret, len); return ret; } if (!secy->operational) { kfree_skb(skb); DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } len = skb->len; skb = macsec_encrypt(skb, dev); if (IS_ERR(skb)) { if (PTR_ERR(skb) != -EINPROGRESS) DEV_STATS_INC(dev, tx_dropped); return NETDEV_TX_OK; } macsec_count_tx(skb, &macsec->secy.tx_sc, macsec_skb_cb(skb)->tx_sa); macsec_encrypt_finish(skb, dev); ret = dev_queue_xmit(skb); count_tx(dev, ret, len); return ret; } #define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) #define MACSEC_OFFLOAD_FEATURES \ (MACSEC_FEATURES | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES | \ NETIF_F_LRO | NETIF_F_RXHASH | NETIF_F_CSUM_MASK | NETIF_F_RXCSUM) static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int err; err = gro_cells_init(&macsec->gro_cells, dev); if (err) return err; macsec_inherit_tso_max(dev); dev->hw_features = real_dev->hw_features & MACSEC_OFFLOAD_FEATURES; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->features = real_dev->features & MACSEC_OFFLOAD_FEATURES; dev->features |= NETIF_F_GSO_SOFTWARE; dev->lltx = true; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; macsec_set_head_tail_room(dev); if (is_zero_ether_addr(dev->dev_addr)) eth_hw_addr_inherit(dev, real_dev); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, real_dev->broadcast, dev->addr_len); /* Get macsec's reference to real_dev */ netdev_hold(real_dev, &macsec->dev_tracker, GFP_KERNEL); return 0; } static void macsec_dev_uninit(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); gro_cells_destroy(&macsec->gro_cells); } static netdev_features_t macsec_fix_features(struct net_device *dev, netdev_features_t features) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; netdev_features_t mask; mask = macsec_is_offloaded(macsec) ? MACSEC_OFFLOAD_FEATURES : MACSEC_FEATURES; features &= (real_dev->features & mask) | NETIF_F_GSO_SOFTWARE | NETIF_F_SOFT_FEATURES; return features; } static int macsec_dev_open(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; int err; err = dev_uc_add(real_dev, dev->dev_addr); if (err < 0) return err; if (dev->flags & IFF_ALLMULTI) { err = dev_set_allmulti(real_dev, 1); if (err < 0) goto del_unicast; } if (dev->flags & IFF_PROMISC) { err = dev_set_promiscuity(real_dev, 1); if (err < 0) goto clear_allmulti; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { err = -EOPNOTSUPP; goto clear_allmulti; } ctx.secy = &macsec->secy; err = macsec_offload(ops->mdo_dev_open, &ctx); if (err) goto clear_allmulti; } if (netif_carrier_ok(real_dev)) netif_carrier_on(dev); return 0; clear_allmulti: if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); del_unicast: dev_uc_del(real_dev, dev->dev_addr); netif_carrier_off(dev); return err; } static int macsec_dev_stop(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; netif_carrier_off(dev); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.secy = &macsec->secy; macsec_offload(ops->mdo_dev_stop, &ctx); } } dev_mc_unsync(real_dev, dev); dev_uc_unsync(real_dev, dev); if (dev->flags & IFF_ALLMULTI) dev_set_allmulti(real_dev, -1); if (dev->flags & IFF_PROMISC) dev_set_promiscuity(real_dev, -1); dev_uc_del(real_dev, dev->dev_addr); return 0; } static void macsec_dev_change_rx_flags(struct net_device *dev, int change) { struct net_device *real_dev = macsec_priv(dev)->real_dev; if (!(dev->flags & IFF_UP)) return; if (change & IFF_ALLMULTI) dev_set_allmulti(real_dev, dev->flags & IFF_ALLMULTI ? 1 : -1); if (change & IFF_PROMISC) dev_set_promiscuity(real_dev, dev->flags & IFF_PROMISC ? 1 : -1); } static void macsec_dev_set_rx_mode(struct net_device *dev) { struct net_device *real_dev = macsec_priv(dev)->real_dev; dev_mc_sync(real_dev, dev); dev_uc_sync(real_dev, dev); } static int macsec_set_mac_address(struct net_device *dev, void *p) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct sockaddr *addr = p; u8 old_addr[ETH_ALEN]; int err; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; if (dev->flags & IFF_UP) { err = dev_uc_add(real_dev, addr->sa_data); if (err < 0) return err; } ether_addr_copy(old_addr, dev->dev_addr); eth_hw_addr_set(dev, addr->sa_data); /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (!ops) { err = -EOPNOTSUPP; goto restore_old_addr; } ctx.secy = &macsec->secy; err = macsec_offload(ops->mdo_upd_secy, &ctx); if (err) goto restore_old_addr; } if (dev->flags & IFF_UP) dev_uc_del(real_dev, old_addr); return 0; restore_old_addr: if (dev->flags & IFF_UP) dev_uc_del(real_dev, addr->sa_data); eth_hw_addr_set(dev, old_addr); return err; } static int macsec_change_mtu(struct net_device *dev, int new_mtu) { struct macsec_dev *macsec = macsec_priv(dev); unsigned int extra = macsec->secy.icv_len + macsec_extra_len(true); if (macsec->real_dev->mtu - extra < new_mtu) return -ERANGE; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static void macsec_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *s) { if (!dev->tstats) return; dev_fetch_sw_netstats(s, dev->tstats); s->rx_dropped = DEV_STATS_READ(dev, rx_dropped); s->tx_dropped = DEV_STATS_READ(dev, tx_dropped); s->rx_errors = DEV_STATS_READ(dev, rx_errors); } static int macsec_get_iflink(const struct net_device *dev) { return READ_ONCE(macsec_priv(dev)->real_dev->ifindex); } static const struct net_device_ops macsec_netdev_ops = { .ndo_init = macsec_dev_init, .ndo_uninit = macsec_dev_uninit, .ndo_open = macsec_dev_open, .ndo_stop = macsec_dev_stop, .ndo_fix_features = macsec_fix_features, .ndo_change_mtu = macsec_change_mtu, .ndo_set_rx_mode = macsec_dev_set_rx_mode, .ndo_change_rx_flags = macsec_dev_change_rx_flags, .ndo_set_mac_address = macsec_set_mac_address, .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, }; static const struct device_type macsec_type = { .name = "macsec", }; static const struct nla_policy macsec_rtnl_policy[IFLA_MACSEC_MAX + 1] = { [IFLA_MACSEC_SCI] = { .type = NLA_U64 }, [IFLA_MACSEC_PORT] = { .type = NLA_U16 }, [IFLA_MACSEC_ICV_LEN] = { .type = NLA_U8 }, [IFLA_MACSEC_CIPHER_SUITE] = { .type = NLA_U64 }, [IFLA_MACSEC_WINDOW] = { .type = NLA_U32 }, [IFLA_MACSEC_ENCODING_SA] = { .type = NLA_U8 }, [IFLA_MACSEC_ENCRYPT] = { .type = NLA_U8 }, [IFLA_MACSEC_PROTECT] = { .type = NLA_U8 }, [IFLA_MACSEC_INC_SCI] = { .type = NLA_U8 }, [IFLA_MACSEC_ES] = { .type = NLA_U8 }, [IFLA_MACSEC_SCB] = { .type = NLA_U8 }, [IFLA_MACSEC_REPLAY_PROTECT] = { .type = NLA_U8 }, [IFLA_MACSEC_VALIDATION] = { .type = NLA_U8 }, [IFLA_MACSEC_OFFLOAD] = { .type = NLA_U8 }, }; static void macsec_free_netdev(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); dst_release(&macsec->secy.tx_sc.md_dst->dst); free_percpu(macsec->stats); free_percpu(macsec->secy.tx_sc.stats); /* Get rid of the macsec's reference to real_dev */ netdev_put(macsec->real_dev, &macsec->dev_tracker); } static void macsec_setup(struct net_device *dev) { ether_setup(dev); dev->min_mtu = 0; dev->max_mtu = ETH_MAX_MTU; dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = macsec_free_netdev; SET_NETDEV_DEVTYPE(dev, &macsec_type); eth_zero_addr(dev->broadcast); } static int macsec_changelink_common(struct net_device *dev, struct nlattr *data[]) { struct macsec_secy *secy; struct macsec_tx_sc *tx_sc; secy = &macsec_priv(dev)->secy; tx_sc = &secy->tx_sc; if (data[IFLA_MACSEC_ENCODING_SA]) { struct macsec_tx_sa *tx_sa; tx_sc->encoding_sa = nla_get_u8(data[IFLA_MACSEC_ENCODING_SA]); tx_sa = rtnl_dereference(tx_sc->sa[tx_sc->encoding_sa]); secy->operational = tx_sa && tx_sa->active; } if (data[IFLA_MACSEC_ENCRYPT]) tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]); if (data[IFLA_MACSEC_PROTECT]) secy->protect_frames = !!nla_get_u8(data[IFLA_MACSEC_PROTECT]); if (data[IFLA_MACSEC_INC_SCI]) tx_sc->send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); if (data[IFLA_MACSEC_ES]) tx_sc->end_station = !!nla_get_u8(data[IFLA_MACSEC_ES]); if (data[IFLA_MACSEC_SCB]) tx_sc->scb = !!nla_get_u8(data[IFLA_MACSEC_SCB]); if (data[IFLA_MACSEC_REPLAY_PROTECT]) secy->replay_protect = !!nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT]); if (data[IFLA_MACSEC_VALIDATION]) secy->validate_frames = nla_get_u8(data[IFLA_MACSEC_VALIDATION]); if (data[IFLA_MACSEC_CIPHER_SUITE]) { switch (nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE])) { case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_DEFAULT_CIPHER_ID: secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; secy->xpn = false; break; case MACSEC_CIPHER_ID_GCM_AES_256: secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; secy->xpn = false; break; case MACSEC_CIPHER_ID_GCM_AES_XPN_128: secy->key_len = MACSEC_GCM_AES_128_SAK_LEN; secy->xpn = true; break; case MACSEC_CIPHER_ID_GCM_AES_XPN_256: secy->key_len = MACSEC_GCM_AES_256_SAK_LEN; secy->xpn = true; break; default: return -EINVAL; } } if (data[IFLA_MACSEC_WINDOW]) { secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]); /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window * for XPN cipher suites */ if (secy->xpn && secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW) return -EINVAL; } return 0; } static int macsec_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); bool macsec_offload_state_change = false; enum macsec_offload offload; struct macsec_tx_sc tx_sc; struct macsec_secy secy; int ret; if (!data) return 0; if (data[IFLA_MACSEC_CIPHER_SUITE] || data[IFLA_MACSEC_ICV_LEN] || data[IFLA_MACSEC_SCI] || data[IFLA_MACSEC_PORT]) return -EINVAL; /* Keep a copy of unmodified secy and tx_sc, in case the offload * propagation fails, to revert macsec_changelink_common. */ memcpy(&secy, &macsec->secy, sizeof(secy)); memcpy(&tx_sc, &macsec->secy.tx_sc, sizeof(tx_sc)); ret = macsec_changelink_common(dev, data); if (ret) goto cleanup; if (data[IFLA_MACSEC_OFFLOAD]) { offload = nla_get_u8(data[IFLA_MACSEC_OFFLOAD]); if (macsec->offload != offload) { macsec_offload_state_change = true; ret = macsec_update_offload(dev, offload); if (ret) goto cleanup; } } /* If h/w offloading is available, propagate to the device */ if (!macsec_offload_state_change && macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (!ops) { ret = -EOPNOTSUPP; goto cleanup; } ctx.secy = &macsec->secy; ret = macsec_offload(ops->mdo_upd_secy, &ctx); if (ret) goto cleanup; } return 0; cleanup: memcpy(&macsec->secy.tx_sc, &tx_sc, sizeof(tx_sc)); memcpy(&macsec->secy, &secy, sizeof(secy)); return ret; } static void macsec_del_dev(struct macsec_dev *macsec) { int i; while (macsec->secy.rx_sc) { struct macsec_rx_sc *rx_sc = rtnl_dereference(macsec->secy.rx_sc); rcu_assign_pointer(macsec->secy.rx_sc, rx_sc->next); free_rx_sc(rx_sc); } for (i = 0; i < MACSEC_NUM_AN; i++) { struct macsec_tx_sa *sa = rtnl_dereference(macsec->secy.tx_sc.sa[i]); if (sa) { RCU_INIT_POINTER(macsec->secy.tx_sc.sa[i], NULL); clear_tx_sa(sa); } } } static void macsec_common_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(netdev_priv(dev), &ctx); if (ops) { ctx.secy = &macsec->secy; macsec_offload(ops->mdo_del_secy, &ctx); } } unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); netdev_upper_dev_unlink(real_dev, dev); macsec_generation++; } static void macsec_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); macsec_common_dellink(dev, head); if (list_empty(&rxd->secys)) { netdev_rx_handler_unregister(real_dev); kfree(rxd); } } static int register_macsec_dev(struct net_device *real_dev, struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); if (!rxd) { int err; rxd = kmalloc(sizeof(*rxd), GFP_KERNEL); if (!rxd) return -ENOMEM; INIT_LIST_HEAD(&rxd->secys); err = netdev_rx_handler_register(real_dev, macsec_handle_frame, rxd); if (err < 0) { kfree(rxd); return err; } } list_add_tail_rcu(&macsec->secys, &rxd->secys); return 0; } static bool sci_exists(struct net_device *dev, sci_t sci) { struct macsec_rxh_data *rxd = macsec_data_rtnl(dev); struct macsec_dev *macsec; list_for_each_entry(macsec, &rxd->secys, secys) { if (macsec->secy.sci == sci) return true; } return false; } static sci_t dev_to_sci(struct net_device *dev, __be16 port) { return make_sci(dev->dev_addr, port); } static int macsec_add_dev(struct net_device *dev, sci_t sci, u8 icv_len) { struct macsec_dev *macsec = macsec_priv(dev); struct macsec_secy *secy = &macsec->secy; macsec->stats = netdev_alloc_pcpu_stats(struct pcpu_secy_stats); if (!macsec->stats) return -ENOMEM; secy->tx_sc.stats = netdev_alloc_pcpu_stats(struct pcpu_tx_sc_stats); if (!secy->tx_sc.stats) return -ENOMEM; secy->tx_sc.md_dst = metadata_dst_alloc(0, METADATA_MACSEC, GFP_KERNEL); if (!secy->tx_sc.md_dst) /* macsec and secy percpu stats will be freed when unregistering * net_device in macsec_free_netdev() */ return -ENOMEM; if (sci == MACSEC_UNDEF_SCI) sci = dev_to_sci(dev, MACSEC_PORT_ES); secy->netdev = dev; secy->operational = true; secy->key_len = DEFAULT_SAK_LEN; secy->icv_len = icv_len; secy->validate_frames = MACSEC_VALIDATE_DEFAULT; secy->protect_frames = true; secy->replay_protect = false; secy->xpn = DEFAULT_XPN; secy->sci = sci; secy->tx_sc.md_dst->u.macsec_info.sci = sci; secy->tx_sc.active = true; secy->tx_sc.encoding_sa = DEFAULT_ENCODING_SA; secy->tx_sc.encrypt = DEFAULT_ENCRYPT; secy->tx_sc.send_sci = DEFAULT_SEND_SCI; secy->tx_sc.end_station = false; secy->tx_sc.scb = false; return 0; } static struct lock_class_key macsec_netdev_addr_lock_key; static int macsec_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct macsec_dev *macsec = macsec_priv(dev); rx_handler_func_t *rx_handler; u8 icv_len = MACSEC_DEFAULT_ICV_LEN; struct net_device *real_dev; int err, mtu; sci_t sci; if (!tb[IFLA_LINK]) return -EINVAL; real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev) return -ENODEV; if (real_dev->type != ARPHRD_ETHER) return -EINVAL; dev->priv_flags |= IFF_MACSEC; macsec->real_dev = real_dev; if (data && data[IFLA_MACSEC_OFFLOAD]) macsec->offload = nla_get_offload(data[IFLA_MACSEC_OFFLOAD]); else /* MACsec offloading is off by default */ macsec->offload = MACSEC_OFFLOAD_OFF; /* Check if the offloading mode is supported by the underlying layers */ if (macsec->offload != MACSEC_OFFLOAD_OFF && !macsec_check_offload(macsec->offload, macsec)) return -EOPNOTSUPP; /* send_sci must be set to true when transmit sci explicitly is set */ if ((data && data[IFLA_MACSEC_SCI]) && (data && data[IFLA_MACSEC_INC_SCI])) { u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); if (!send_sci) return -EINVAL; } if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); if (mtu < 0) dev->mtu = 0; else dev->mtu = mtu; rx_handler = rtnl_dereference(real_dev->rx_handler); if (rx_handler && rx_handler != macsec_handle_frame) return -EBUSY; err = register_netdevice(dev); if (err < 0) return err; netdev_lockdep_set_classes(dev); lockdep_set_class(&dev->addr_list_lock, &macsec_netdev_addr_lock_key); err = netdev_upper_dev_link(real_dev, dev, extack); if (err < 0) goto unregister; /* need to be already registered so that ->init has run and * the MAC addr is set */ if (data && data[IFLA_MACSEC_SCI]) sci = nla_get_sci(data[IFLA_MACSEC_SCI]); else if (data && data[IFLA_MACSEC_PORT]) sci = dev_to_sci(dev, nla_get_be16(data[IFLA_MACSEC_PORT])); else sci = dev_to_sci(dev, MACSEC_PORT_ES); if (rx_handler && sci_exists(real_dev, sci)) { err = -EBUSY; goto unlink; } err = macsec_add_dev(dev, sci, icv_len); if (err) goto unlink; if (data) { err = macsec_changelink_common(dev, data); if (err) goto del_dev; } /* If h/w offloading is available, propagate to the device */ if (macsec_is_offloaded(macsec)) { const struct macsec_ops *ops; struct macsec_context ctx; ops = macsec_get_ops(macsec, &ctx); if (ops) { ctx.secy = &macsec->secy; err = macsec_offload(ops->mdo_add_secy, &ctx); if (err) goto del_dev; macsec->insert_tx_tag = macsec_needs_tx_tag(macsec, ops); } } err = register_macsec_dev(real_dev, dev); if (err < 0) goto del_dev; netif_stacked_transfer_operstate(real_dev, dev); linkwatch_fire_event(dev); macsec_generation++; return 0; del_dev: macsec_del_dev(macsec); unlink: netdev_upper_dev_unlink(real_dev, dev); unregister: unregister_netdevice(dev); return err; } static int macsec_validate_attr(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { u64 csid = MACSEC_DEFAULT_CIPHER_ID; u8 icv_len = MACSEC_DEFAULT_ICV_LEN; int flag; bool es, scb, sci; if (!data) return 0; if (data[IFLA_MACSEC_CIPHER_SUITE]) csid = nla_get_u64(data[IFLA_MACSEC_CIPHER_SUITE]); if (data[IFLA_MACSEC_ICV_LEN]) { icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); if (icv_len != MACSEC_DEFAULT_ICV_LEN) { char dummy_key[DEFAULT_SAK_LEN] = { 0 }; struct crypto_aead *dummy_tfm; dummy_tfm = macsec_alloc_tfm(dummy_key, DEFAULT_SAK_LEN, icv_len); if (IS_ERR(dummy_tfm)) return PTR_ERR(dummy_tfm); crypto_free_aead(dummy_tfm); } } switch (csid) { case MACSEC_CIPHER_ID_GCM_AES_128: case MACSEC_CIPHER_ID_GCM_AES_256: case MACSEC_CIPHER_ID_GCM_AES_XPN_128: case MACSEC_CIPHER_ID_GCM_AES_XPN_256: case MACSEC_DEFAULT_CIPHER_ID: if (icv_len < MACSEC_MIN_ICV_LEN || icv_len > MACSEC_STD_ICV_LEN) return -EINVAL; break; default: return -EINVAL; } if (data[IFLA_MACSEC_ENCODING_SA]) { if (nla_get_u8(data[IFLA_MACSEC_ENCODING_SA]) >= MACSEC_NUM_AN) return -EINVAL; } for (flag = IFLA_MACSEC_ENCODING_SA + 1; flag < IFLA_MACSEC_VALIDATION; flag++) { if (data[flag]) { if (nla_get_u8(data[flag]) > 1) return -EINVAL; } } es = nla_get_u8_default(data[IFLA_MACSEC_ES], false); sci = nla_get_u8_default(data[IFLA_MACSEC_INC_SCI], false); scb = nla_get_u8_default(data[IFLA_MACSEC_SCB], false); if ((sci && (scb || es)) || (scb && es)) return -EINVAL; if (data[IFLA_MACSEC_VALIDATION] && nla_get_u8(data[IFLA_MACSEC_VALIDATION]) > MACSEC_VALIDATE_MAX) return -EINVAL; if ((data[IFLA_MACSEC_REPLAY_PROTECT] && nla_get_u8(data[IFLA_MACSEC_REPLAY_PROTECT])) && !data[IFLA_MACSEC_WINDOW]) return -EINVAL; return 0; } static struct net *macsec_get_link_net(const struct net_device *dev) { return dev_net(macsec_priv(dev)->real_dev); } struct net_device *macsec_get_real_dev(const struct net_device *dev) { return macsec_priv(dev)->real_dev; } EXPORT_SYMBOL_GPL(macsec_get_real_dev); bool macsec_netdev_is_offloaded(struct net_device *dev) { return macsec_is_offloaded(macsec_priv(dev)); } EXPORT_SYMBOL_GPL(macsec_netdev_is_offloaded); static size_t macsec_get_size(const struct net_device *dev) { return nla_total_size_64bit(8) + /* IFLA_MACSEC_SCI */ nla_total_size(1) + /* IFLA_MACSEC_ICV_LEN */ nla_total_size_64bit(8) + /* IFLA_MACSEC_CIPHER_SUITE */ nla_total_size(4) + /* IFLA_MACSEC_WINDOW */ nla_total_size(1) + /* IFLA_MACSEC_ENCODING_SA */ nla_total_size(1) + /* IFLA_MACSEC_ENCRYPT */ nla_total_size(1) + /* IFLA_MACSEC_PROTECT */ nla_total_size(1) + /* IFLA_MACSEC_INC_SCI */ nla_total_size(1) + /* IFLA_MACSEC_ES */ nla_total_size(1) + /* IFLA_MACSEC_SCB */ nla_total_size(1) + /* IFLA_MACSEC_REPLAY_PROTECT */ nla_total_size(1) + /* IFLA_MACSEC_VALIDATION */ nla_total_size(1) + /* IFLA_MACSEC_OFFLOAD */ 0; } static int macsec_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct macsec_tx_sc *tx_sc; struct macsec_dev *macsec; struct macsec_secy *secy; u64 csid; macsec = macsec_priv(dev); secy = &macsec->secy; tx_sc = &secy->tx_sc; switch (secy->key_len) { case MACSEC_GCM_AES_128_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_128 : MACSEC_DEFAULT_CIPHER_ID; break; case MACSEC_GCM_AES_256_SAK_LEN: csid = secy->xpn ? MACSEC_CIPHER_ID_GCM_AES_XPN_256 : MACSEC_CIPHER_ID_GCM_AES_256; break; default: goto nla_put_failure; } if (nla_put_sci(skb, IFLA_MACSEC_SCI, secy->sci, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ICV_LEN, secy->icv_len) || nla_put_u64_64bit(skb, IFLA_MACSEC_CIPHER_SUITE, csid, IFLA_MACSEC_PAD) || nla_put_u8(skb, IFLA_MACSEC_ENCODING_SA, tx_sc->encoding_sa) || nla_put_u8(skb, IFLA_MACSEC_ENCRYPT, tx_sc->encrypt) || nla_put_u8(skb, IFLA_MACSEC_PROTECT, secy->protect_frames) || nla_put_u8(skb, IFLA_MACSEC_INC_SCI, tx_sc->send_sci) || nla_put_u8(skb, IFLA_MACSEC_ES, tx_sc->end_station) || nla_put_u8(skb, IFLA_MACSEC_SCB, tx_sc->scb) || nla_put_u8(skb, IFLA_MACSEC_REPLAY_PROTECT, secy->replay_protect) || nla_put_u8(skb, IFLA_MACSEC_VALIDATION, secy->validate_frames) || nla_put_u8(skb, IFLA_MACSEC_OFFLOAD, macsec->offload) || 0) goto nla_put_failure; if (secy->replay_protect) { if (nla_put_u32(skb, IFLA_MACSEC_WINDOW, secy->replay_window)) goto nla_put_failure; } return 0; nla_put_failure: return -EMSGSIZE; } static struct rtnl_link_ops macsec_link_ops __read_mostly = { .kind = "macsec", .priv_size = sizeof(struct macsec_dev), .maxtype = IFLA_MACSEC_MAX, .policy = macsec_rtnl_policy, .setup = macsec_setup, .validate = macsec_validate_attr, .newlink = macsec_newlink, .changelink = macsec_changelink, .dellink = macsec_dellink, .get_size = macsec_get_size, .fill_info = macsec_fill_info, .get_link_net = macsec_get_link_net, }; static bool is_macsec_master(struct net_device *dev) { return rcu_access_pointer(dev->rx_handler) == macsec_handle_frame; } static int macsec_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *real_dev = netdev_notifier_info_to_dev(ptr); struct macsec_rxh_data *rxd; struct macsec_dev *m, *n; LIST_HEAD(head); if (!is_macsec_master(real_dev)) return NOTIFY_DONE; rxd = macsec_data_rtnl(real_dev); switch (event) { case NETDEV_DOWN: case NETDEV_UP: case NETDEV_CHANGE: list_for_each_entry_safe(m, n, &rxd->secys, secys) { struct net_device *dev = m->secy.netdev; netif_stacked_transfer_operstate(real_dev, dev); } break; case NETDEV_UNREGISTER: list_for_each_entry_safe(m, n, &rxd->secys, secys) { macsec_common_dellink(m->secy.netdev, &head); } netdev_rx_handler_unregister(real_dev); kfree(rxd); unregister_netdevice_many(&head); break; case NETDEV_CHANGEMTU: list_for_each_entry(m, &rxd->secys, secys) { struct net_device *dev = m->secy.netdev; unsigned int mtu = real_dev->mtu - (m->secy.icv_len + macsec_extra_len(true)); if (dev->mtu > mtu) dev_set_mtu(dev, mtu); } break; case NETDEV_FEAT_CHANGE: list_for_each_entry(m, &rxd->secys, secys) { macsec_inherit_tso_max(m->secy.netdev); netdev_update_features(m->secy.netdev); } break; } return NOTIFY_OK; } static struct notifier_block macsec_notifier = { .notifier_call = macsec_notify, }; static int __init macsec_init(void) { int err; pr_info("MACsec IEEE 802.1AE\n"); err = register_netdevice_notifier(&macsec_notifier); if (err) return err; err = rtnl_link_register(&macsec_link_ops); if (err) goto notifier; err = genl_register_family(&macsec_fam); if (err) goto rtnl; return 0; rtnl: rtnl_link_unregister(&macsec_link_ops); notifier: unregister_netdevice_notifier(&macsec_notifier); return err; } static void __exit macsec_exit(void) { genl_unregister_family(&macsec_fam); rtnl_link_unregister(&macsec_link_ops); unregister_netdevice_notifier(&macsec_notifier); rcu_barrier(); } module_init(macsec_init); module_exit(macsec_exit); MODULE_ALIAS_RTNL_LINK("macsec"); MODULE_ALIAS_GENL_FAMILY("macsec"); MODULE_DESCRIPTION("MACsec IEEE 802.1AE"); MODULE_LICENSE("GPL v2");
75 75 75 75 75 75 75 115 115 115 14 3 11 11 11 75 75 75 75 75 29 173 108 75 29 510 513 512 1 1 14 14 20 8 3 1 8 12 12 12 10 2 12 75 10 2 1 3 2 2 5 3 3 15 15 15 15 6 3 1 6 10 3 3 3 3 5 5 5 5 5 47 47 58 58 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/workqueue.h> #include <linux/rtnetlink.h> #include <linux/cache.h> #include <linux/slab.h> #include <linux/list.h> #include <linux/delay.h> #include <linux/sched.h> #include <linux/idr.h> #include <linux/rculist.h> #include <linux/nsproxy.h> #include <linux/fs.h> #include <linux/proc_ns.h> #include <linux/file.h> #include <linux/export.h> #include <linux/user_namespace.h> #include <linux/net_namespace.h> #include <linux/sched/task.h> #include <linux/uidgid.h> #include <linux/cookie.h> #include <linux/proc_fs.h> #include <net/sock.h> #include <net/netlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> /* * Our network namespace constructor/destructor lists */ static LIST_HEAD(pernet_list); static struct list_head *first_device = &pernet_list; LIST_HEAD(net_namespace_list); EXPORT_SYMBOL_GPL(net_namespace_list); /* Protects net_namespace_list. Nests iside rtnl_lock() */ DECLARE_RWSEM(net_rwsem); EXPORT_SYMBOL_GPL(net_rwsem); #ifdef CONFIG_KEYS static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) }; #endif struct net init_net; EXPORT_SYMBOL(init_net); static bool init_net_initialized; /* * pernet_ops_rwsem: protects: pernet_list, net_generic_ids, * init_net_initialized and first_device pointer. * This is internal net namespace object. Please, don't use it * outside. */ DECLARE_RWSEM(pernet_ops_rwsem); #define MIN_PERNET_OPS_ID \ ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; DEFINE_COOKIE(net_cookie); static struct net_generic *net_alloc_generic(void) { unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs); unsigned int generic_size; struct net_generic *ng; generic_size = offsetof(struct net_generic, ptr[gen_ptrs]); ng = kzalloc(generic_size, GFP_KERNEL); if (ng) ng->s.len = gen_ptrs; return ng; } static int net_assign_generic(struct net *net, unsigned int id, void *data) { struct net_generic *ng, *old_ng; BUG_ON(id < MIN_PERNET_OPS_ID); old_ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); if (old_ng->s.len > id) { old_ng->ptr[id] = data; return 0; } ng = net_alloc_generic(); if (!ng) return -ENOMEM; /* * Some synchronisation notes: * * The net_generic explores the net->gen array inside rcu * read section. Besides once set the net->gen->ptr[x] * pointer never changes (see rules in netns/generic.h). * * That said, we simply duplicate this array and schedule * the old copy for kfree after a grace period. */ memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); ng->ptr[id] = data; rcu_assign_pointer(net->gen, ng); kfree_rcu(old_ng, s.rcu); return 0; } static int ops_init(const struct pernet_operations *ops, struct net *net) { struct net_generic *ng; int err = -ENOMEM; void *data = NULL; if (ops->id) { data = kzalloc(ops->size, GFP_KERNEL); if (!data) goto out; err = net_assign_generic(net, *ops->id, data); if (err) goto cleanup; } err = 0; if (ops->init) err = ops->init(net); if (!err) return 0; if (ops->id) { ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); ng->ptr[*ops->id] = NULL; } cleanup: kfree(data); out: return err; } static void ops_pre_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { struct net *net; if (ops->pre_exit) { list_for_each_entry(net, net_exit_list, exit_list) ops->pre_exit(net); } } static void ops_exit_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { struct net *net; if (ops->exit) { list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); cond_resched(); } } if (ops->exit_batch) ops->exit_batch(net_exit_list); } static void ops_free_list(const struct pernet_operations *ops, struct list_head *net_exit_list) { struct net *net; if (ops->id) { list_for_each_entry(net, net_exit_list, exit_list) kfree(net_generic(net, *ops->id)); } } /* should be called with nsid_lock held */ static int alloc_netid(struct net *net, struct net *peer, int reqid) { int min = 0, max = 0; if (reqid >= 0) { min = reqid; max = reqid + 1; } return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); } /* This function is used by idr_for_each(). If net is equal to peer, the * function returns the id so that idr_for_each() stops. Because we cannot * returns the id 0 (idr_for_each() will not stop), we return the magic value * NET_ID_ZERO (-1) for it. */ #define NET_ID_ZERO -1 static int net_eq_idr(int id, void *net, void *peer) { if (net_eq(net, peer)) return id ? : NET_ID_ZERO; return 0; } /* Must be called from RCU-critical section or with nsid_lock held */ static int __peernet2id(const struct net *net, struct net *peer) { int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); /* Magic value for id 0. */ if (id == NET_ID_ZERO) return 0; if (id > 0) return id; return NETNSA_NSID_NOT_ASSIGNED; } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct nlmsghdr *nlh, gfp_t gfp); /* This function returns the id of a peer netns. If no id is assigned, one will * be allocated and returned. */ int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp) { int id; if (refcount_read(&net->ns.count) == 0) return NETNSA_NSID_NOT_ASSIGNED; spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); if (id >= 0) { spin_unlock_bh(&net->nsid_lock); return id; } /* When peer is obtained from RCU lists, we may race with * its cleanup. Check whether it's alive, and this guarantees * we never hash a peer back to net->netns_ids, after it has * just been idr_remove()'d from there in cleanup_net(). */ if (!maybe_get_net(peer)) { spin_unlock_bh(&net->nsid_lock); return NETNSA_NSID_NOT_ASSIGNED; } id = alloc_netid(net, peer, -1); spin_unlock_bh(&net->nsid_lock); put_net(peer); if (id < 0) return NETNSA_NSID_NOT_ASSIGNED; rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp); return id; } EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(const struct net *net, struct net *peer) { int id; rcu_read_lock(); id = __peernet2id(net, peer); rcu_read_unlock(); return id; } EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. */ bool peernet_has_id(const struct net *net, struct net *peer) { return peernet2id(net, peer) >= 0; } struct net *get_net_ns_by_id(const struct net *net, int id) { struct net *peer; if (id < 0) return NULL; rcu_read_lock(); peer = idr_find(&net->netns_ids, id); if (peer) peer = maybe_get_net(peer); rcu_read_unlock(); return peer; } EXPORT_SYMBOL_GPL(get_net_ns_by_id); static __net_init void preinit_net_sysctl(struct net *net) { net->core.sysctl_somaxconn = SOMAXCONN; /* Limits per socket sk_omem_alloc usage. * TCP zerocopy regular usage needs 128 KB. */ net->core.sysctl_optmem_max = 128 * 1024; net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED; net->core.sysctl_tstamp_allow_data = 1; } /* init code that must occur even if setup_net() is not called. */ static __net_init void preinit_net(struct net *net, struct user_namespace *user_ns) { refcount_set(&net->passive, 1); refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt"); get_random_bytes(&net->hash_mix, sizeof(u32)); net->dev_base_seq = 1; net->user_ns = user_ns; idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); mutex_init(&net->ipv4.ra_mutex); #ifdef CONFIG_DEBUG_NET_SMALL_RTNL mutex_init(&net->rtnl_mutex); lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL); #endif preinit_net_sysctl(net); } /* * setup_net runs the initializers for the network namespace object. */ static __net_init int setup_net(struct net *net) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; LIST_HEAD(net_exit_list); LIST_HEAD(dev_kill_list); int error = 0; preempt_disable(); net->net_cookie = gen_cookie_next(&net_cookie); preempt_enable(); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); if (error < 0) goto out_undo; } down_write(&net_rwsem); list_add_tail_rcu(&net->list, &net_namespace_list); up_write(&net_rwsem); out: return error; out_undo: /* Walk through the list backwards calling the exit functions * for the pernet modules whose init functions did not fail. */ list_add(&net->exit_list, &net_exit_list); saved_ops = ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_pre_exit_list(ops, &net_exit_list); synchronize_rcu(); ops = saved_ops; rtnl_lock(); list_for_each_entry_continue_reverse(ops, &pernet_list, list) { if (ops->exit_batch_rtnl) ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); } unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); rcu_barrier(); goto out; } #ifdef CONFIG_NET_NS static struct ucounts *inc_net_namespaces(struct user_namespace *ns) { return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); } static void dec_net_namespaces(struct ucounts *ucounts) { dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); } static struct kmem_cache *net_cachep __ro_after_init; static struct workqueue_struct *netns_wq; static struct net *net_alloc(void) { struct net *net = NULL; struct net_generic *ng; ng = net_alloc_generic(); if (!ng) goto out; net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); if (!net) goto out_free; #ifdef CONFIG_KEYS net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL); if (!net->key_domain) goto out_free_2; refcount_set(&net->key_domain->usage, 1); #endif rcu_assign_pointer(net->gen, ng); out: return net; #ifdef CONFIG_KEYS out_free_2: kmem_cache_free(net_cachep, net); net = NULL; #endif out_free: kfree(ng); goto out; } static LLIST_HEAD(defer_free_list); static void net_complete_free(void) { struct llist_node *kill_list; struct net *net, *next; /* Get the list of namespaces to free from last round. */ kill_list = llist_del_all(&defer_free_list); llist_for_each_entry_safe(net, next, kill_list, defer_free_list) kmem_cache_free(net_cachep, net); } void net_passive_dec(struct net *net) { if (refcount_dec_and_test(&net->passive)) { kfree(rcu_access_pointer(net->gen)); /* There should not be any trackers left there. */ ref_tracker_dir_exit(&net->notrefcnt_tracker); /* Wait for an extra rcu_barrier() before final free. */ llist_add(&net->defer_free_list, &defer_free_list); } } void net_drop_ns(void *p) { struct net *net = (struct net *)p; if (net) net_passive_dec(net); } struct net *copy_net_ns(unsigned long flags, struct user_namespace *user_ns, struct net *old_net) { struct ucounts *ucounts; struct net *net; int rv; if (!(flags & CLONE_NEWNET)) return get_net(old_net); ucounts = inc_net_namespaces(user_ns); if (!ucounts) return ERR_PTR(-ENOSPC); net = net_alloc(); if (!net) { rv = -ENOMEM; goto dec_ucounts; } preinit_net(net, user_ns); net->ucounts = ucounts; get_user_ns(user_ns); rv = down_read_killable(&pernet_ops_rwsem); if (rv < 0) goto put_userns; rv = setup_net(net); up_read(&pernet_ops_rwsem); if (rv < 0) { put_userns: #ifdef CONFIG_KEYS key_remove_domain(net->key_domain); #endif put_user_ns(user_ns); net_passive_dec(net); dec_ucounts: dec_net_namespaces(ucounts); return ERR_PTR(rv); } return net; } /** * net_ns_get_ownership - get sysfs ownership data for @net * @net: network namespace in question (can be NULL) * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects * * Returns the uid/gid pair of root in the user namespace associated with the * given network namespace. */ void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid) { if (net) { kuid_t ns_root_uid = make_kuid(net->user_ns, 0); kgid_t ns_root_gid = make_kgid(net->user_ns, 0); if (uid_valid(ns_root_uid)) *uid = ns_root_uid; if (gid_valid(ns_root_gid)) *gid = ns_root_gid; } else { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; } } EXPORT_SYMBOL_GPL(net_ns_get_ownership); static void unhash_nsid(struct net *net, struct net *last) { struct net *tmp; /* This function is only called from cleanup_net() work, * and this work is the only process, that may delete * a net from net_namespace_list. So, when the below * is executing, the list may only grow. Thus, we do not * use for_each_net_rcu() or net_rwsem. */ for_each_net(tmp) { int id; spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL, GFP_KERNEL); if (tmp == last) break; } spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); spin_unlock_bh(&net->nsid_lock); } static LLIST_HEAD(cleanup_list); struct task_struct *cleanup_net_task; static void cleanup_net(struct work_struct *work) { const struct pernet_operations *ops; struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); LIST_HEAD(dev_kill_list); cleanup_net_task = current; /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); down_read(&pernet_ops_rwsem); /* Don't let anyone else find us. */ down_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) list_del_rcu(&net->list); /* Cache last net. After we unlock rtnl, no one new net * added to net_namespace_list can assign nsid pointer * to a net from net_kill_list (see peernet2id_alloc()). * So, we skip them in unhash_nsid(). * * Note, that unhash_nsid() does not delete nsid links * between net_kill_list's nets, as they've already * deleted from net_namespace_list. But, this would be * useless anyway, as netns_ids are destroyed there. */ last = list_last_entry(&net_namespace_list, struct net, list); up_write(&net_rwsem); llist_for_each_entry(net, net_kill_list, cleanup_list) { unhash_nsid(net, last); list_add_tail(&net->exit_list, &net_exit_list); } /* Run all of the network namespace pre_exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_pre_exit_list(ops, &net_exit_list); /* * Another CPU might be rcu-iterating the list, wait for it. * This needs to be before calling the exit() notifiers, so * the rcu_barrier() below isn't sufficient alone. * Also the pre_exit() and exit() methods need this barrier. */ synchronize_rcu_expedited(); rtnl_lock(); list_for_each_entry_reverse(ops, &pernet_list, list) { if (ops->exit_batch_rtnl) ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); } unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); /* Free the net generic variables */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_free_list(ops, &net_exit_list); up_read(&pernet_ops_rwsem); /* Ensure there are no outstanding rcu callbacks using this * network namespace. */ rcu_barrier(); net_complete_free(); /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); #ifdef CONFIG_KEYS key_remove_domain(net->key_domain); #endif put_user_ns(net->user_ns); net_passive_dec(net); } cleanup_net_task = NULL; } /** * net_ns_barrier - wait until concurrent net_cleanup_work is done * * cleanup_net runs from work queue and will first remove namespaces * from the global list, then run net exit functions. * * Call this in module exit path to make sure that all netns * ->exit ops have been invoked before the function is removed. */ void net_ns_barrier(void) { down_write(&pernet_ops_rwsem); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL(net_ns_barrier); static DECLARE_WORK(net_cleanup_work, cleanup_net); void __put_net(struct net *net) { ref_tracker_dir_exit(&net->refcnt_tracker); /* Cleanup the network namespace in process context */ if (llist_add(&net->cleanup_list, &cleanup_list)) queue_work(netns_wq, &net_cleanup_work); } EXPORT_SYMBOL_GPL(__put_net); /** * get_net_ns - increment the refcount of the network namespace * @ns: common namespace (net) * * Returns the net's common namespace or ERR_PTR() if ref is zero. */ struct ns_common *get_net_ns(struct ns_common *ns) { struct net *net; net = maybe_get_net(container_of(ns, struct net, ns)); if (net) return &net->ns; return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(get_net_ns); struct net *get_net_ns_by_fd(int fd) { CLASS(fd, f)(fd); if (fd_empty(f)) return ERR_PTR(-EBADF); if (proc_ns_file(fd_file(f))) { struct ns_common *ns = get_proc_ns(file_inode(fd_file(f))); if (ns->ops == &netns_operations) return get_net(container_of(ns, struct net, ns)); } return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(get_net_ns_by_fd); #endif struct net *get_net_ns_by_pid(pid_t pid) { struct task_struct *tsk; struct net *net; /* Lookup the network namespace */ net = ERR_PTR(-ESRCH); rcu_read_lock(); tsk = find_task_by_vpid(pid); if (tsk) { struct nsproxy *nsproxy; task_lock(tsk); nsproxy = tsk->nsproxy; if (nsproxy) net = get_net(nsproxy->net_ns); task_unlock(tsk); } rcu_read_unlock(); return net; } EXPORT_SYMBOL_GPL(get_net_ns_by_pid); static __net_init int net_ns_net_init(struct net *net) { #ifdef CONFIG_NET_NS net->ns.ops = &netns_operations; #endif return ns_alloc_inum(&net->ns); } static __net_exit void net_ns_net_exit(struct net *net) { ns_free_inum(&net->ns); } static struct pernet_operations __net_initdata net_ns_ops = { .init = net_ns_net_init, .exit = net_ns_net_exit, }; static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { [NETNSA_NONE] = { .type = NLA_UNSPEC }, [NETNSA_NSID] = { .type = NLA_S32 }, [NETNSA_PID] = { .type = NLA_U32 }, [NETNSA_FD] = { .type = NLA_U32 }, [NETNSA_TARGET_NSID] = { .type = NLA_S32 }, }; static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; struct nlattr *nla; struct net *peer; int nsid, err; err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); if (err < 0) return err; if (!tb[NETNSA_NSID]) { NL_SET_ERR_MSG(extack, "nsid is missing"); return -EINVAL; } nsid = nla_get_s32(tb[NETNSA_NSID]); if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); nla = tb[NETNSA_PID]; } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); nla = tb[NETNSA_FD]; } else { NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; } if (IS_ERR(peer)) { NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); } spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { spin_unlock_bh(&net->nsid_lock); err = -EEXIST; NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, "Peer netns already has a nsid assigned"); goto out; } err = alloc_netid(net, peer, nsid); spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid, nlh, GFP_KERNEL); err = 0; } else if (err == -ENOSPC && nsid >= 0) { err = -EEXIST; NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); NL_SET_ERR_MSG(extack, "The specified nsid is already used"); } out: put_net(peer); return err; } static int rtnl_net_get_size(void) { return NLMSG_ALIGN(sizeof(struct rtgenmsg)) + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */ ; } struct net_fill_args { u32 portid; u32 seq; int flags; int cmd; int nsid; bool add_ref; int ref_nsid; }; static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args) { struct nlmsghdr *nlh; struct rtgenmsg *rth; nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth), args->flags); if (!nlh) return -EMSGSIZE; rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; if (nla_put_s32(skb, NETNSA_NSID, args->nsid)) goto nla_put_failure; if (args->add_ref && nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int rtnl_net_valid_getid_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { int i, err; if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); if (err) return err; for (i = 0; i <= NETNSA_MAX; i++) { if (!tb[i]) continue; switch (i) { case NETNSA_PID: case NETNSA_FD: case NETNSA_NSID: case NETNSA_TARGET_NSID: break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request"); return -EINVAL; } } return 0; } static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; struct net_fill_args fillargs = { .portid = NETLINK_CB(skb).portid, .seq = nlh->nlmsg_seq, .cmd = RTM_NEWNSID, }; struct net *peer, *target = net; struct nlattr *nla; struct sk_buff *msg; int err; err = rtnl_net_valid_getid_req(skb, nlh, tb, extack); if (err < 0) return err; if (tb[NETNSA_PID]) { peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); nla = tb[NETNSA_PID]; } else if (tb[NETNSA_FD]) { peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); nla = tb[NETNSA_FD]; } else if (tb[NETNSA_NSID]) { peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID])); if (!peer) peer = ERR_PTR(-ENOENT); nla = tb[NETNSA_NSID]; } else { NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); return -EINVAL; } if (IS_ERR(peer)) { NL_SET_BAD_ATTR(extack, nla); NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); return PTR_ERR(peer); } if (tb[NETNSA_TARGET_NSID]) { int id = nla_get_s32(tb[NETNSA_TARGET_NSID]); target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id); if (IS_ERR(target)) { NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]); NL_SET_ERR_MSG(extack, "Target netns reference is invalid"); err = PTR_ERR(target); goto out; } fillargs.add_ref = true; fillargs.ref_nsid = peernet2id(net, peer); } msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); if (!msg) { err = -ENOMEM; goto out; } fillargs.nsid = peernet2id(target, peer); err = rtnl_net_fill(msg, &fillargs); if (err < 0) goto err_out; err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); goto out; err_out: nlmsg_free(msg); out: if (fillargs.add_ref) put_net(target); put_net(peer); return err; } struct rtnl_net_dump_cb { struct net *tgt_net; struct net *ref_net; struct sk_buff *skb; struct net_fill_args fillargs; int idx; int s_idx; }; /* Runs in RCU-critical section. */ static int rtnl_net_dumpid_one(int id, void *peer, void *data) { struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; int ret; if (net_cb->idx < net_cb->s_idx) goto cont; net_cb->fillargs.nsid = id; if (net_cb->fillargs.add_ref) net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer); ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs); if (ret < 0) return ret; cont: net_cb->idx++; return 0; } static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk, struct rtnl_net_dump_cb *net_cb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct nlattr *tb[NETNSA_MAX + 1]; int err, i; err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy, extack); if (err < 0) return err; for (i = 0; i <= NETNSA_MAX; i++) { if (!tb[i]) continue; if (i == NETNSA_TARGET_NSID) { struct net *net; net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i])); if (IS_ERR(net)) { NL_SET_BAD_ATTR(extack, tb[i]); NL_SET_ERR_MSG(extack, "Invalid target network namespace id"); return PTR_ERR(net); } net_cb->fillargs.add_ref = true; net_cb->ref_net = net_cb->tgt_net; net_cb->tgt_net = net; } else { NL_SET_BAD_ATTR(extack, tb[i]); NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request"); return -EINVAL; } } return 0; } static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) { struct rtnl_net_dump_cb net_cb = { .tgt_net = sock_net(skb->sk), .skb = skb, .fillargs = { .portid = NETLINK_CB(cb->skb).portid, .seq = cb->nlh->nlmsg_seq, .flags = NLM_F_MULTI, .cmd = RTM_NEWNSID, }, .idx = 0, .s_idx = cb->args[0], }; int err = 0; if (cb->strict_check) { err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb); if (err < 0) goto end; } rcu_read_lock(); idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb); rcu_read_unlock(); cb->args[0] = net_cb.idx; end: if (net_cb.fillargs.add_ref) put_net(net_cb.tgt_net); return err; } static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid, struct nlmsghdr *nlh, gfp_t gfp) { struct net_fill_args fillargs = { .portid = portid, .seq = nlh ? nlh->nlmsg_seq : 0, .cmd = cmd, .nsid = id, }; struct sk_buff *msg; int err = -ENOMEM; msg = nlmsg_new(rtnl_net_get_size(), gfp); if (!msg) goto out; err = rtnl_net_fill(msg, &fillargs); if (err < 0) goto err_out; rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp); return; err_out: nlmsg_free(msg); out: rtnl_set_sk_err(net, RTNLGRP_NSID, err); } #ifdef CONFIG_NET_NS static void __init netns_ipv4_struct_check(void) { /* TX readonly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_early_retrans); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_tso_win_divisor); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_tso_rtt_log); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_autocorking); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_min_snd_mss); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_notsent_lowat); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_limit_output_bytes); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_min_rtt_wlen); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_tcp_wmem); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_tx, sysctl_ip_fwd_use_pmtu); CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_tx, 33); /* TXRX readonly hotpath cache lines */ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_txrx, sysctl_tcp_moderate_rcvbuf); CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_txrx, 1); /* RX readonly hotpath cache line */ CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_ip_early_demux); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_early_demux); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_l3mdev_accept); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_reordering); CACHELINE_ASSERT_GROUP_MEMBER(struct netns_ipv4, netns_ipv4_read_rx, sysctl_tcp_rmem); CACHELINE_ASSERT_GROUP_SIZE(struct netns_ipv4, netns_ipv4_read_rx, 22); } #endif static const struct rtnl_msg_handler net_ns_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWNSID, .doit = rtnl_net_newid, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.msgtype = RTM_GETNSID, .doit = rtnl_net_getid, .dumpit = rtnl_net_dumpid, .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, }; void __init net_ns_init(void) { struct net_generic *ng; #ifdef CONFIG_NET_NS netns_ipv4_struct_check(); net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC|SLAB_ACCOUNT, NULL); /* Create workqueue for cleanup */ netns_wq = create_singlethread_workqueue("netns"); if (!netns_wq) panic("Could not create netns workq"); #endif ng = net_alloc_generic(); if (!ng) panic("Could not allocate generic netns"); rcu_assign_pointer(init_net.gen, ng); #ifdef CONFIG_KEYS init_net.key_domain = &init_net_key_domain; #endif preinit_net(&init_net, &init_user_ns); down_write(&pernet_ops_rwsem); if (setup_net(&init_net)) panic("Could not setup the initial network namespace"); init_net_initialized = true; up_write(&pernet_ops_rwsem); if (register_pernet_subsys(&net_ns_ops)) panic("Could not register network namespace subsystems"); rtnl_register_many(net_ns_rtnl_msg_handlers); } static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list) { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); if (ops->exit_batch_rtnl) { LIST_HEAD(dev_kill_list); rtnl_lock(); ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); unregister_netdevice_many(&dev_kill_list); rtnl_unlock(); } ops_exit_list(ops, net_exit_list); ops_free_list(ops, net_exit_list); } #ifdef CONFIG_NET_NS static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { struct net *net; int error; LIST_HEAD(net_exit_list); list_add_tail(&ops->list, list); if (ops->init || ops->id) { /* We held write locked pernet_ops_rwsem, and parallel * setup_net() and cleanup_net() are not possible. */ for_each_net(net) { error = ops_init(ops, net); if (error) goto out_undo; list_add_tail(&net->exit_list, &net_exit_list); } } return 0; out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); free_exit_list(ops, &net_exit_list); return error; } static void __unregister_pernet_operations(struct pernet_operations *ops) { struct net *net; LIST_HEAD(net_exit_list); list_del(&ops->list); /* See comment in __register_pernet_operations() */ for_each_net(net) list_add_tail(&net->exit_list, &net_exit_list); free_exit_list(ops, &net_exit_list); } #else static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { if (!init_net_initialized) { list_add_tail(&ops->list, list); return 0; } return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) { if (!init_net_initialized) { list_del(&ops->list); } else { LIST_HEAD(net_exit_list); list_add(&init_net.exit_list, &net_exit_list); free_exit_list(ops, &net_exit_list); } } #endif /* CONFIG_NET_NS */ static DEFINE_IDA(net_generic_ids); static int register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { int error; if (WARN_ON(!!ops->id ^ !!ops->size)) return -EINVAL; if (ops->id) { error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID, GFP_KERNEL); if (error < 0) return error; *ops->id = error; /* This does not require READ_ONCE as writers already hold * pernet_ops_rwsem. But WRITE_ONCE is needed to protect * net_alloc_generic. */ WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1)); } error = __register_pernet_operations(list, ops); if (error) { rcu_barrier(); if (ops->id) ida_free(&net_generic_ids, *ops->id); } return error; } static void unregister_pernet_operations(struct pernet_operations *ops) { __unregister_pernet_operations(ops); rcu_barrier(); if (ops->id) ida_free(&net_generic_ids, *ops->id); } /** * register_pernet_subsys - register a network namespace subsystem * @ops: pernet operations structure for the subsystem * * Register a subsystem which has init and exit functions * that are called when network namespaces are created and * destroyed respectively. * * When registered all network namespace init functions are * called for every existing network namespace. Allowing kernel * modules to have a race free view of the set of network namespaces. * * When a new network namespace is created all of the init * methods are called in the order in which they were registered. * * When a network namespace is destroyed all of the exit methods * are called in the reverse of the order with which they were * registered. */ int register_pernet_subsys(struct pernet_operations *ops) { int error; down_write(&pernet_ops_rwsem); error = register_pernet_operations(first_device, ops); up_write(&pernet_ops_rwsem); return error; } EXPORT_SYMBOL_GPL(register_pernet_subsys); /** * unregister_pernet_subsys - unregister a network namespace subsystem * @ops: pernet operations structure to manipulate * * Remove the pernet operations structure from the list to be * used when network namespaces are created or destroyed. In * addition run the exit method for all existing network * namespaces. */ void unregister_pernet_subsys(struct pernet_operations *ops) { down_write(&pernet_ops_rwsem); unregister_pernet_operations(ops); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem * * Register a device which has init and exit functions * that are called when network namespaces are created and * destroyed respectively. * * When registered all network namespace init functions are * called for every existing network namespace. Allowing kernel * modules to have a race free view of the set of network namespaces. * * When a new network namespace is created all of the init * methods are called in the order in which they were registered. * * When a network namespace is destroyed all of the exit methods * are called in the reverse of the order with which they were * registered. */ int register_pernet_device(struct pernet_operations *ops) { int error; down_write(&pernet_ops_rwsem); error = register_pernet_operations(&pernet_list, ops); if (!error && (first_device == &pernet_list)) first_device = &ops->list; up_write(&pernet_ops_rwsem); return error; } EXPORT_SYMBOL_GPL(register_pernet_device); /** * unregister_pernet_device - unregister a network namespace netdevice * @ops: pernet operations structure to manipulate * * Remove the pernet operations structure from the list to be * used when network namespaces are created or destroyed. In * addition run the exit method for all existing network * namespaces. */ void unregister_pernet_device(struct pernet_operations *ops) { down_write(&pernet_ops_rwsem); if (&ops->list == first_device) first_device = first_device->next; unregister_pernet_operations(ops); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(unregister_pernet_device); #ifdef CONFIG_NET_NS static struct ns_common *netns_get(struct task_struct *task) { struct net *net = NULL; struct nsproxy *nsproxy; task_lock(task); nsproxy = task->nsproxy; if (nsproxy) net = get_net(nsproxy->net_ns); task_unlock(task); return net ? &net->ns : NULL; } static inline struct net *to_net_ns(struct ns_common *ns) { return container_of(ns, struct net, ns); } static void netns_put(struct ns_common *ns) { put_net(to_net_ns(ns)); } static int netns_install(struct nsset *nsset, struct ns_common *ns) { struct nsproxy *nsproxy = nsset->nsproxy; struct net *net = to_net_ns(ns); if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) return -EPERM; put_net(nsproxy->net_ns); nsproxy->net_ns = get_net(net); return 0; } static struct user_namespace *netns_owner(struct ns_common *ns) { return to_net_ns(ns)->user_ns; } const struct proc_ns_operations netns_operations = { .name = "net", .type = CLONE_NEWNET, .get = netns_get, .put = netns_put, .install = netns_install, .owner = netns_owner, }; #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #ifndef _BR_PRIVATE_STP_H #define _BR_PRIVATE_STP_H #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 /* IEEE 802.1D-1998 timer values */ #define BR_MIN_HELLO_TIME (1*HZ) #define BR_MAX_HELLO_TIME (10*HZ) #define BR_MIN_FORWARD_DELAY (2*HZ) #define BR_MAX_FORWARD_DELAY (30*HZ) #define BR_MIN_MAX_AGE (6*HZ) #define BR_MAX_MAX_AGE (40*HZ) #define BR_MIN_PATH_COST 1 #define BR_MAX_PATH_COST 65535 struct br_config_bpdu { unsigned int topology_change:1; unsigned int topology_change_ack:1; bridge_id root; int root_path_cost; bridge_id bridge_id; port_id port_id; int message_age; int max_age; int hello_time; int forward_delay; }; /* called under bridge lock */ static inline int br_is_designated_port(const struct net_bridge_port *p) { return !memcmp(&p->designated_bridge, &p->br->bridge_id, 8) && (p->designated_port == p->port_id); } /* br_stp.c */ void br_become_root_bridge(struct net_bridge *br); void br_config_bpdu_generation(struct net_bridge *); void br_configuration_update(struct net_bridge *); void br_port_state_selection(struct net_bridge *); void br_received_config_bpdu(struct net_bridge_port *p, const struct br_config_bpdu *bpdu); void br_received_tcn_bpdu(struct net_bridge_port *p); void br_transmit_config(struct net_bridge_port *p); void br_transmit_tcn(struct net_bridge *br); void br_topology_change_detection(struct net_bridge *br); void __br_set_topology_change(struct net_bridge *br, unsigned char val); /* br_stp_bpdu.c */ void br_send_config_bpdu(struct net_bridge_port *, struct br_config_bpdu *); void br_send_tcn_bpdu(struct net_bridge_port *); #endif
110 110 110 110 110 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 // SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/char/misc.c * * Generic misc open routine by Johan Myreen * * Based on code from Linus * * Teemu Rantanen's Microsoft Busmouse support and Derrick Cole's * changes incorporated into 0.97pl4 * by Peter Cervasio (pete%q106fm.uucp@wupost.wustl.edu) (08SEP92) * See busmouse.c for particulars. * * Made things a lot mode modular - easy to compile in just one or two * of the misc drivers, as they are now completely independent. Linus. * * Support for loadable modules. 8-Sep-95 Philip Blundell <pjb27@cam.ac.uk> * * Fixed a failing symbol register to free the device registration * Alan Cox <alan@lxorguk.ukuu.org.uk> 21-Jan-96 * * Dynamic minors and /proc/mice by Alessandro Rubini. 26-Mar-96 * * Renamed to misc and miscdevice to be more accurate. Alan Cox 26-Mar-96 * * Handling of mouse minor numbers for kerneld: * Idea by Jacques Gelinas <jack@solucorp.qc.ca>, * adapted by Bjorn Ekwall <bj0rn@blox.se> * corrected by Alan Cox <alan@lxorguk.ukuu.org.uk> * * Changes for kmod (from kerneld): * Cyrus Durgin <cider@speakeasy.org> * * Added devfs support. Richard Gooch <rgooch@atnf.csiro.au> 10-Jan-1998 */ #include <linux/module.h> #include <linux/fs.h> #include <linux/errno.h> #include <linux/miscdevice.h> #include <linux/kernel.h> #include <linux/major.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/device.h> #include <linux/tty.h> #include <linux/kmod.h> #include <linux/gfp.h> /* * Head entry for the doubly linked miscdevice list */ static LIST_HEAD(misc_list); static DEFINE_MUTEX(misc_mtx); /* * Assigned numbers, used for dynamic minors */ #define DYNAMIC_MINORS 128 /* like dynamic majors */ static DEFINE_IDA(misc_minors_ida); static int misc_minor_alloc(int minor) { int ret = 0; if (minor == MISC_DYNAMIC_MINOR) { /* allocate free id */ ret = ida_alloc_max(&misc_minors_ida, DYNAMIC_MINORS - 1, GFP_KERNEL); if (ret >= 0) { ret = DYNAMIC_MINORS - ret - 1; } else { ret = ida_alloc_range(&misc_minors_ida, MISC_DYNAMIC_MINOR + 1, MINORMASK, GFP_KERNEL); } } else { /* specific minor, check if it is in dynamic or misc dynamic range */ if (minor < DYNAMIC_MINORS) { minor = DYNAMIC_MINORS - minor - 1; ret = ida_alloc_range(&misc_minors_ida, minor, minor, GFP_KERNEL); } else if (minor > MISC_DYNAMIC_MINOR) { ret = ida_alloc_range(&misc_minors_ida, minor, minor, GFP_KERNEL); } else { /* case of non-dynamic minors, no need to allocate id */ ret = 0; } } return ret; } static void misc_minor_free(int minor) { if (minor < DYNAMIC_MINORS) ida_free(&misc_minors_ida, DYNAMIC_MINORS - minor - 1); else if (minor > MISC_DYNAMIC_MINOR) ida_free(&misc_minors_ida, minor); } #ifdef CONFIG_PROC_FS static void *misc_seq_start(struct seq_file *seq, loff_t *pos) { mutex_lock(&misc_mtx); return seq_list_start(&misc_list, *pos); } static void *misc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_list_next(v, &misc_list, pos); } static void misc_seq_stop(struct seq_file *seq, void *v) { mutex_unlock(&misc_mtx); } static int misc_seq_show(struct seq_file *seq, void *v) { const struct miscdevice *p = list_entry(v, struct miscdevice, list); seq_printf(seq, "%3i %s\n", p->minor, p->name ? p->name : ""); return 0; } static const struct seq_operations misc_seq_ops = { .start = misc_seq_start, .next = misc_seq_next, .stop = misc_seq_stop, .show = misc_seq_show, }; #endif static int misc_open(struct inode *inode, struct file *file) { int minor = iminor(inode); struct miscdevice *c = NULL, *iter; int err = -ENODEV; const struct file_operations *new_fops = NULL; mutex_lock(&misc_mtx); list_for_each_entry(iter, &misc_list, list) { if (iter->minor != minor) continue; c = iter; new_fops = fops_get(iter->fops); break; } if (!new_fops) { mutex_unlock(&misc_mtx); request_module("char-major-%d-%d", MISC_MAJOR, minor); mutex_lock(&misc_mtx); list_for_each_entry(iter, &misc_list, list) { if (iter->minor != minor) continue; c = iter; new_fops = fops_get(iter->fops); break; } if (!new_fops) goto fail; } /* * Place the miscdevice in the file's * private_data so it can be used by the * file operations, including f_op->open below */ file->private_data = c; err = 0; replace_fops(file, new_fops); if (file->f_op->open) err = file->f_op->open(inode, file); fail: mutex_unlock(&misc_mtx); return err; } static char *misc_devnode(const struct device *dev, umode_t *mode) { const struct miscdevice *c = dev_get_drvdata(dev); if (mode && c->mode) *mode = c->mode; if (c->nodename) return kstrdup(c->nodename, GFP_KERNEL); return NULL; } static const struct class misc_class = { .name = "misc", .devnode = misc_devnode, }; static const struct file_operations misc_fops = { .owner = THIS_MODULE, .open = misc_open, .llseek = noop_llseek, }; /** * misc_register - register a miscellaneous device * @misc: device structure * * Register a miscellaneous device with the kernel. If the minor * number is set to %MISC_DYNAMIC_MINOR a minor number is assigned * and placed in the minor field of the structure. For other cases * the minor number requested is used. * * The structure passed is linked into the kernel and may not be * destroyed until it has been unregistered. By default, an open() * syscall to the device sets file->private_data to point to the * structure. Drivers don't need open in fops for this. * * A zero is returned on success and a negative errno code for * failure. */ int misc_register(struct miscdevice *misc) { dev_t dev; int err = 0; bool is_dynamic = (misc->minor == MISC_DYNAMIC_MINOR); INIT_LIST_HEAD(&misc->list); mutex_lock(&misc_mtx); if (is_dynamic) { int i = misc_minor_alloc(misc->minor); if (i < 0) { err = -EBUSY; goto out; } misc->minor = i; } else { struct miscdevice *c; int i; list_for_each_entry(c, &misc_list, list) { if (c->minor == misc->minor) { err = -EBUSY; goto out; } } i = misc_minor_alloc(misc->minor); if (i < 0) { err = -EBUSY; goto out; } } dev = MKDEV(MISC_MAJOR, misc->minor); misc->this_device = device_create_with_groups(&misc_class, misc->parent, dev, misc, misc->groups, "%s", misc->name); if (IS_ERR(misc->this_device)) { if (is_dynamic) { misc_minor_free(misc->minor); misc->minor = MISC_DYNAMIC_MINOR; } err = PTR_ERR(misc->this_device); goto out; } /* * Add it to the front, so that later devices can "override" * earlier defaults */ list_add(&misc->list, &misc_list); out: mutex_unlock(&misc_mtx); return err; } EXPORT_SYMBOL(misc_register); /** * misc_deregister - unregister a miscellaneous device * @misc: device to unregister * * Unregister a miscellaneous device that was previously * successfully registered with misc_register(). */ void misc_deregister(struct miscdevice *misc) { if (WARN_ON(list_empty(&misc->list))) return; mutex_lock(&misc_mtx); list_del(&misc->list); device_destroy(&misc_class, MKDEV(MISC_MAJOR, misc->minor)); misc_minor_free(misc->minor); mutex_unlock(&misc_mtx); } EXPORT_SYMBOL(misc_deregister); static int __init misc_init(void) { int err; struct proc_dir_entry *ret; ret = proc_create_seq("misc", 0, NULL, &misc_seq_ops); err = class_register(&misc_class); if (err) goto fail_remove; err = -EIO; if (register_chrdev(MISC_MAJOR, "misc", &misc_fops)) goto fail_printk; return 0; fail_printk: pr_err("unable to get major %d for misc devices\n", MISC_MAJOR); class_unregister(&misc_class); fail_remove: if (ret) remove_proc_entry("misc", NULL); return err; } subsys_initcall(misc_init);
4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016 Mellanox Technologies. All rights reserved. * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com> */ #include "devl_internal.h" static const struct devlink_param devlink_param_generic[] = { { .id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET, .name = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_NAME, .type = DEVLINK_PARAM_GENERIC_INT_ERR_RESET_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_MAX_MACS, .name = DEVLINK_PARAM_GENERIC_MAX_MACS_NAME, .type = DEVLINK_PARAM_GENERIC_MAX_MACS_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_SRIOV, .name = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_SRIOV_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_REGION_SNAPSHOT, .name = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_NAME, .type = DEVLINK_PARAM_GENERIC_REGION_SNAPSHOT_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_IGNORE_ARI, .name = DEVLINK_PARAM_GENERIC_IGNORE_ARI_NAME, .type = DEVLINK_PARAM_GENERIC_IGNORE_ARI_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MAX, .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_NAME, .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MAX_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_MSIX_VEC_PER_PF_MIN, .name = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_NAME, .type = DEVLINK_PARAM_GENERIC_MSIX_VEC_PER_PF_MIN_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_FW_LOAD_POLICY, .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME, .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, .name = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_ROCE_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_REMOTE_DEV_RESET, .name = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_REMOTE_DEV_RESET_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH, .name = DEVLINK_PARAM_GENERIC_ENABLE_ETH_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_ETH_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_RDMA, .name = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_RDMA_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_VNET, .name = DEVLINK_PARAM_GENERIC_ENABLE_VNET_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_VNET_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_ENABLE_IWARP, .name = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_NAME, .type = DEVLINK_PARAM_GENERIC_ENABLE_IWARP_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_IO_EQ_SIZE, .name = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_NAME, .type = DEVLINK_PARAM_GENERIC_IO_EQ_SIZE_TYPE, }, { .id = DEVLINK_PARAM_GENERIC_ID_EVENT_EQ_SIZE, .name = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_NAME, .type = DEVLINK_PARAM_GENERIC_EVENT_EQ_SIZE_TYPE, }, }; static int devlink_param_generic_verify(const struct devlink_param *param) { /* verify it match generic parameter by id and name */ if (param->id > DEVLINK_PARAM_GENERIC_ID_MAX) return -EINVAL; if (strcmp(param->name, devlink_param_generic[param->id].name)) return -ENOENT; WARN_ON(param->type != devlink_param_generic[param->id].type); return 0; } static int devlink_param_driver_verify(const struct devlink_param *param) { int i; if (param->id <= DEVLINK_PARAM_GENERIC_ID_MAX) return -EINVAL; /* verify no such name in generic params */ for (i = 0; i <= DEVLINK_PARAM_GENERIC_ID_MAX; i++) if (!strcmp(param->name, devlink_param_generic[i].name)) return -EEXIST; return 0; } static struct devlink_param_item * devlink_param_find_by_name(struct xarray *params, const char *param_name) { struct devlink_param_item *param_item; unsigned long param_id; xa_for_each(params, param_id, param_item) { if (!strcmp(param_item->param->name, param_name)) return param_item; } return NULL; } static struct devlink_param_item * devlink_param_find_by_id(struct xarray *params, u32 param_id) { return xa_load(params, param_id); } static bool devlink_param_cmode_is_supported(const struct devlink_param *param, enum devlink_param_cmode cmode) { return test_bit(cmode, &param->supported_cmodes); } static int devlink_param_get(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx) { if (!param->get) return -EOPNOTSUPP; return param->get(devlink, param->id, ctx); } static int devlink_param_set(struct devlink *devlink, const struct devlink_param *param, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack) { if (!param->set) return -EOPNOTSUPP; return param->set(devlink, param->id, ctx, extack); } static int devlink_param_type_to_nla_type(enum devlink_param_type param_type) { switch (param_type) { case DEVLINK_PARAM_TYPE_U8: return NLA_U8; case DEVLINK_PARAM_TYPE_U16: return NLA_U16; case DEVLINK_PARAM_TYPE_U32: return NLA_U32; case DEVLINK_PARAM_TYPE_STRING: return NLA_STRING; case DEVLINK_PARAM_TYPE_BOOL: return NLA_FLAG; default: return -EINVAL; } } static int devlink_nl_param_value_fill_one(struct sk_buff *msg, enum devlink_param_type type, enum devlink_param_cmode cmode, union devlink_param_value val) { struct nlattr *param_value_attr; param_value_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM_VALUE); if (!param_value_attr) goto nla_put_failure; if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_CMODE, cmode)) goto value_nest_cancel; switch (type) { case DEVLINK_PARAM_TYPE_U8: if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu8)) goto value_nest_cancel; break; case DEVLINK_PARAM_TYPE_U16: if (nla_put_u16(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu16)) goto value_nest_cancel; break; case DEVLINK_PARAM_TYPE_U32: if (nla_put_u32(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vu32)) goto value_nest_cancel; break; case DEVLINK_PARAM_TYPE_STRING: if (nla_put_string(msg, DEVLINK_ATTR_PARAM_VALUE_DATA, val.vstr)) goto value_nest_cancel; break; case DEVLINK_PARAM_TYPE_BOOL: if (val.vbool && nla_put_flag(msg, DEVLINK_ATTR_PARAM_VALUE_DATA)) goto value_nest_cancel; break; } nla_nest_end(msg, param_value_attr); return 0; value_nest_cancel: nla_nest_cancel(msg, param_value_attr); nla_put_failure: return -EMSGSIZE; } static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink, unsigned int port_index, struct devlink_param_item *param_item, enum devlink_command cmd, u32 portid, u32 seq, int flags) { union devlink_param_value param_value[DEVLINK_PARAM_CMODE_MAX + 1]; bool param_value_set[DEVLINK_PARAM_CMODE_MAX + 1] = {}; const struct devlink_param *param = param_item->param; struct devlink_param_gset_ctx ctx; struct nlattr *param_values_list; struct nlattr *param_attr; int nla_type; void *hdr; int err; int i; /* Get value from driver part to driverinit configuration mode */ for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) { if (!devlink_param_cmode_is_supported(param, i)) continue; if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) { if (param_item->driverinit_value_new_valid) param_value[i] = param_item->driverinit_value_new; else if (param_item->driverinit_value_valid) param_value[i] = param_item->driverinit_value; else return -EOPNOTSUPP; } else { ctx.cmode = i; err = devlink_param_get(devlink, param, &ctx); if (err) return err; param_value[i] = ctx.val; } param_value_set[i] = true; } hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); if (!hdr) return -EMSGSIZE; if (devlink_nl_put_handle(msg, devlink)) goto genlmsg_cancel; if (cmd == DEVLINK_CMD_PORT_PARAM_GET || cmd == DEVLINK_CMD_PORT_PARAM_NEW || cmd == DEVLINK_CMD_PORT_PARAM_DEL) if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, port_index)) goto genlmsg_cancel; param_attr = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM); if (!param_attr) goto genlmsg_cancel; if (nla_put_string(msg, DEVLINK_ATTR_PARAM_NAME, param->name)) goto param_nest_cancel; if (param->generic && nla_put_flag(msg, DEVLINK_ATTR_PARAM_GENERIC)) goto param_nest_cancel; nla_type = devlink_param_type_to_nla_type(param->type); if (nla_type < 0) goto param_nest_cancel; if (nla_put_u8(msg, DEVLINK_ATTR_PARAM_TYPE, nla_type)) goto param_nest_cancel; param_values_list = nla_nest_start_noflag(msg, DEVLINK_ATTR_PARAM_VALUES_LIST); if (!param_values_list) goto param_nest_cancel; for (i = 0; i <= DEVLINK_PARAM_CMODE_MAX; i++) { if (!param_value_set[i]) continue; err = devlink_nl_param_value_fill_one(msg, param->type, i, param_value[i]); if (err) goto values_list_nest_cancel; } nla_nest_end(msg, param_values_list); nla_nest_end(msg, param_attr); genlmsg_end(msg, hdr); return 0; values_list_nest_cancel: nla_nest_end(msg, param_values_list); param_nest_cancel: nla_nest_cancel(msg, param_attr); genlmsg_cancel: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } static void devlink_param_notify(struct devlink *devlink, unsigned int port_index, struct devlink_param_item *param_item, enum devlink_command cmd) { struct sk_buff *msg; int err; WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL && cmd != DEVLINK_CMD_PORT_PARAM_NEW && cmd != DEVLINK_CMD_PORT_PARAM_DEL); /* devlink_notify_register() / devlink_notify_unregister() * will replay the notifications if the params are added/removed * outside of the lifetime of the instance. */ if (!devl_is_registered(devlink) || !devlink_nl_notify_need(devlink)) return; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return; err = devlink_nl_param_fill(msg, devlink, port_index, param_item, cmd, 0, 0, 0); if (err) { nlmsg_free(msg); return; } devlink_nl_notify_send(devlink, msg); } static void devlink_params_notify(struct devlink *devlink, enum devlink_command cmd) { struct devlink_param_item *param_item; unsigned long param_id; xa_for_each(&devlink->params, param_id, param_item) devlink_param_notify(devlink, 0, param_item, cmd); } void devlink_params_notify_register(struct devlink *devlink) { devlink_params_notify(devlink, DEVLINK_CMD_PARAM_NEW); } void devlink_params_notify_unregister(struct devlink *devlink) { devlink_params_notify(devlink, DEVLINK_CMD_PARAM_DEL); } static int devlink_nl_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink, struct netlink_callback *cb, int flags) { struct devlink_nl_dump_state *state = devlink_dump_state(cb); struct devlink_param_item *param_item; unsigned long param_id; int err = 0; xa_for_each_start(&devlink->params, param_id, param_item, state->idx) { err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err == -EOPNOTSUPP) { err = 0; } else if (err) { state->idx = param_id; break; } } return err; } int devlink_nl_param_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { return devlink_nl_dumpit(skb, cb, devlink_nl_param_get_dump_one); } static int devlink_param_type_get_from_info(struct genl_info *info, enum devlink_param_type *param_type) { if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_TYPE)) return -EINVAL; switch (nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_TYPE])) { case NLA_U8: *param_type = DEVLINK_PARAM_TYPE_U8; break; case NLA_U16: *param_type = DEVLINK_PARAM_TYPE_U16; break; case NLA_U32: *param_type = DEVLINK_PARAM_TYPE_U32; break; case NLA_STRING: *param_type = DEVLINK_PARAM_TYPE_STRING; break; case NLA_FLAG: *param_type = DEVLINK_PARAM_TYPE_BOOL; break; default: return -EINVAL; } return 0; } static int devlink_param_value_get_from_info(const struct devlink_param *param, struct genl_info *info, union devlink_param_value *value) { struct nlattr *param_data; int len; param_data = info->attrs[DEVLINK_ATTR_PARAM_VALUE_DATA]; if (param->type != DEVLINK_PARAM_TYPE_BOOL && !param_data) return -EINVAL; switch (param->type) { case DEVLINK_PARAM_TYPE_U8: if (nla_len(param_data) != sizeof(u8)) return -EINVAL; value->vu8 = nla_get_u8(param_data); break; case DEVLINK_PARAM_TYPE_U16: if (nla_len(param_data) != sizeof(u16)) return -EINVAL; value->vu16 = nla_get_u16(param_data); break; case DEVLINK_PARAM_TYPE_U32: if (nla_len(param_data) != sizeof(u32)) return -EINVAL; value->vu32 = nla_get_u32(param_data); break; case DEVLINK_PARAM_TYPE_STRING: len = strnlen(nla_data(param_data), nla_len(param_data)); if (len == nla_len(param_data) || len >= __DEVLINK_PARAM_MAX_STRING_VALUE) return -EINVAL; strcpy(value->vstr, nla_data(param_data)); break; case DEVLINK_PARAM_TYPE_BOOL: if (param_data && nla_len(param_data)) return -EINVAL; value->vbool = nla_get_flag(param_data); break; } return 0; } static struct devlink_param_item * devlink_param_get_from_info(struct xarray *params, struct genl_info *info) { char *param_name; if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_NAME)) return NULL; param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]); return devlink_param_find_by_name(params, param_name); } int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; struct devlink_param_item *param_item; struct sk_buff *msg; int err; param_item = devlink_param_get_from_info(&devlink->params, info); if (!param_item) return -EINVAL; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; err = devlink_nl_param_fill(msg, devlink, 0, param_item, DEVLINK_CMD_PARAM_GET, info->snd_portid, info->snd_seq, 0); if (err) { nlmsg_free(msg); return err; } return genlmsg_reply(msg, info); } static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink, unsigned int port_index, struct xarray *params, struct genl_info *info, enum devlink_command cmd) { enum devlink_param_type param_type; struct devlink_param_gset_ctx ctx; enum devlink_param_cmode cmode; struct devlink_param_item *param_item; const struct devlink_param *param; union devlink_param_value value; int err = 0; param_item = devlink_param_get_from_info(params, info); if (!param_item) return -EINVAL; param = param_item->param; err = devlink_param_type_get_from_info(info, &param_type); if (err) return err; if (param_type != param->type) return -EINVAL; err = devlink_param_value_get_from_info(param, info, &value); if (err) return err; if (param->validate) { err = param->validate(devlink, param->id, value, info->extack); if (err) return err; } if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PARAM_VALUE_CMODE)) return -EINVAL; cmode = nla_get_u8(info->attrs[DEVLINK_ATTR_PARAM_VALUE_CMODE]); if (!devlink_param_cmode_is_supported(param, cmode)) return -EOPNOTSUPP; if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) { param_item->driverinit_value_new = value; param_item->driverinit_value_new_valid = true; } else { if (!param->set) return -EOPNOTSUPP; ctx.val = value; ctx.cmode = cmode; err = devlink_param_set(devlink, param, &ctx, info->extack); if (err) return err; } devlink_param_notify(devlink, port_index, param_item, cmd); return 0; } int devlink_nl_param_set_doit(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params, info, DEVLINK_CMD_PARAM_NEW); } int devlink_nl_port_param_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb) { NL_SET_ERR_MSG(cb->extack, "Port params are not supported"); return msg->len; } int devlink_nl_port_param_get_doit(struct sk_buff *skb, struct genl_info *info) { NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; } int devlink_nl_port_param_set_doit(struct sk_buff *skb, struct genl_info *info) { NL_SET_ERR_MSG(info->extack, "Port params are not supported"); return -EINVAL; } static int devlink_param_verify(const struct devlink_param *param) { if (!param || !param->name || !param->supported_cmodes) return -EINVAL; if (param->generic) return devlink_param_generic_verify(param); else return devlink_param_driver_verify(param); } static int devlink_param_register(struct devlink *devlink, const struct devlink_param *param) { struct devlink_param_item *param_item; int err; WARN_ON(devlink_param_verify(param)); WARN_ON(devlink_param_find_by_name(&devlink->params, param->name)); if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT)) WARN_ON(param->get || param->set); else WARN_ON(!param->get || !param->set); param_item = kzalloc(sizeof(*param_item), GFP_KERNEL); if (!param_item) return -ENOMEM; param_item->param = param; err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL); if (err) goto err_xa_insert; devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); return 0; err_xa_insert: kfree(param_item); return err; } static void devlink_param_unregister(struct devlink *devlink, const struct devlink_param *param) { struct devlink_param_item *param_item; param_item = devlink_param_find_by_id(&devlink->params, param->id); if (WARN_ON(!param_item)) return; devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL); xa_erase(&devlink->params, param->id); kfree(param_item); } /** * devl_params_register - register configuration parameters * * @devlink: devlink * @params: configuration parameters array * @params_count: number of parameters provided * * Register the configuration parameters supported by the driver. */ int devl_params_register(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { const struct devlink_param *param = params; int i, err; lockdep_assert_held(&devlink->lock); for (i = 0; i < params_count; i++, param++) { err = devlink_param_register(devlink, param); if (err) goto rollback; } return 0; rollback: if (!i) return err; for (param--; i > 0; i--, param--) devlink_param_unregister(devlink, param); return err; } EXPORT_SYMBOL_GPL(devl_params_register); int devlink_params_register(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { int err; devl_lock(devlink); err = devl_params_register(devlink, params, params_count); devl_unlock(devlink); return err; } EXPORT_SYMBOL_GPL(devlink_params_register); /** * devl_params_unregister - unregister configuration parameters * @devlink: devlink * @params: configuration parameters to unregister * @params_count: number of parameters provided */ void devl_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { const struct devlink_param *param = params; int i; lockdep_assert_held(&devlink->lock); for (i = 0; i < params_count; i++, param++) devlink_param_unregister(devlink, param); } EXPORT_SYMBOL_GPL(devl_params_unregister); void devlink_params_unregister(struct devlink *devlink, const struct devlink_param *params, size_t params_count) { devl_lock(devlink); devl_params_unregister(devlink, params, params_count); devl_unlock(devlink); } EXPORT_SYMBOL_GPL(devlink_params_unregister); /** * devl_param_driverinit_value_get - get configuration parameter * value for driver initializing * * @devlink: devlink * @param_id: parameter ID * @val: pointer to store the value of parameter in driverinit * configuration mode * * This function should be used by the driver to get driverinit * configuration for initialization after reload command. * * Note that lockless call of this function relies on the * driver to maintain following basic sane behavior: * 1) Driver ensures a call to this function cannot race with * registering/unregistering the parameter with the same parameter ID. * 2) Driver ensures a call to this function cannot race with * devl_param_driverinit_value_set() call with the same parameter ID. * 3) Driver ensures a call to this function cannot race with * reload operation. * If the driver is not able to comply, it has to take the devlink->lock * while calling this. */ int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *val) { struct devlink_param_item *param_item; if (WARN_ON(!devlink_reload_supported(devlink->ops))) return -EOPNOTSUPP; param_item = devlink_param_find_by_id(&devlink->params, param_id); if (!param_item) return -EINVAL; if (!param_item->driverinit_value_valid) return -EOPNOTSUPP; if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param, DEVLINK_PARAM_CMODE_DRIVERINIT))) return -EOPNOTSUPP; *val = param_item->driverinit_value; return 0; } EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get); /** * devl_param_driverinit_value_set - set value of configuration * parameter for driverinit * configuration mode * * @devlink: devlink * @param_id: parameter ID * @init_val: value of parameter to set for driverinit configuration mode * * This function should be used by the driver to set driverinit * configuration mode default value. */ void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id, union devlink_param_value init_val) { struct devlink_param_item *param_item; devl_assert_locked(devlink); param_item = devlink_param_find_by_id(&devlink->params, param_id); if (WARN_ON(!param_item)) return; if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param, DEVLINK_PARAM_CMODE_DRIVERINIT))) return; param_item->driverinit_value = init_val; param_item->driverinit_value_valid = true; devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set); void devlink_params_driverinit_load_new(struct devlink *devlink) { struct devlink_param_item *param_item; unsigned long param_id; xa_for_each(&devlink->params, param_id, param_item) { if (!devlink_param_cmode_is_supported(param_item->param, DEVLINK_PARAM_CMODE_DRIVERINIT) || !param_item->driverinit_value_new_valid) continue; param_item->driverinit_value = param_item->driverinit_value_new; param_item->driverinit_value_valid = true; param_item->driverinit_value_new_valid = false; } } /** * devl_param_value_changed - notify devlink on a parameter's value * change. Should be called by the driver * right after the change. * * @devlink: devlink * @param_id: parameter ID * * This function should be used by the driver to notify devlink on value * change, excluding driverinit configuration mode. * For driverinit configuration mode driver should use the function */ void devl_param_value_changed(struct devlink *devlink, u32 param_id) { struct devlink_param_item *param_item; param_item = devlink_param_find_by_id(&devlink->params, param_id); WARN_ON(!param_item); devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW); } EXPORT_SYMBOL_GPL(devl_param_value_changed);
4457 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ipi #if !defined(_TRACE_IPI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_IPI_H #include <linux/tracepoint.h> /** * ipi_raise - called when a smp cross call is made * * @mask: mask of recipient CPUs for the IPI * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string. */ TRACE_EVENT(ipi_raise, TP_PROTO(const struct cpumask *mask, const char *reason), TP_ARGS(mask, reason), TP_STRUCT__entry( __bitmask(target_cpus, nr_cpumask_bits) __field(const char *, reason) ), TP_fast_assign( __assign_bitmask(target_cpus, cpumask_bits(mask), nr_cpumask_bits); __entry->reason = reason; ), TP_printk("target_mask=%s (%s)", __get_bitmask(target_cpus), __entry->reason) ); TRACE_EVENT(ipi_send_cpu, TP_PROTO(const unsigned int cpu, unsigned long callsite, void *callback), TP_ARGS(cpu, callsite, callback), TP_STRUCT__entry( __field(unsigned int, cpu) __field(void *, callsite) __field(void *, callback) ), TP_fast_assign( __entry->cpu = cpu; __entry->callsite = (void *)callsite; __entry->callback = callback; ), TP_printk("cpu=%u callsite=%pS callback=%pS", __entry->cpu, __entry->callsite, __entry->callback) ); TRACE_EVENT(ipi_send_cpumask, TP_PROTO(const struct cpumask *cpumask, unsigned long callsite, void *callback), TP_ARGS(cpumask, callsite, callback), TP_STRUCT__entry( __cpumask(cpumask) __field(void *, callsite) __field(void *, callback) ), TP_fast_assign( __assign_cpumask(cpumask, cpumask_bits(cpumask)); __entry->callsite = (void *)callsite; __entry->callback = callback; ), TP_printk("cpumask=%s callsite=%pS callback=%pS", __get_cpumask(cpumask), __entry->callsite, __entry->callback) ); DECLARE_EVENT_CLASS(ipi_handler, TP_PROTO(const char *reason), TP_ARGS(reason), TP_STRUCT__entry( __field(const char *, reason) ), TP_fast_assign( __entry->reason = reason; ), TP_printk("(%s)", __entry->reason) ); /** * ipi_entry - called immediately before the IPI handler * * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string, ideally the same as used with trace_ipi_raise * for that IPI. */ DEFINE_EVENT(ipi_handler, ipi_entry, TP_PROTO(const char *reason), TP_ARGS(reason) ); /** * ipi_exit - called immediately after the IPI handler returns * * @reason: string identifying the IPI purpose * * It is necessary for @reason to be a static string declared with * __tracepoint_string, ideally the same as used with trace_ipi_raise for * that IPI. */ DEFINE_EVENT(ipi_handler, ipi_exit, TP_PROTO(const char *reason), TP_ARGS(reason) ); #endif /* _TRACE_IPI_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1057 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM bpf_test_run #if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_BPF_TEST_RUN_H #include <linux/tracepoint.h> TRACE_EVENT(bpf_trigger_tp, TP_PROTO(int nonce), TP_ARGS(nonce), TP_STRUCT__entry( __field(int, nonce) ), TP_fast_assign( __entry->nonce = nonce; ), TP_printk("nonce %d", __entry->nonce) ); DECLARE_EVENT_CLASS(bpf_test_finish, TP_PROTO(int *err), TP_ARGS(err), TP_STRUCT__entry( __field(int, err) ), TP_fast_assign( __entry->err = *err; ), TP_printk("bpf_test_finish with err=%d", __entry->err) ); #ifdef DEFINE_EVENT_WRITABLE #undef BPF_TEST_RUN_DEFINE_EVENT #define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \ PARAMS(args), size) #else #undef BPF_TEST_RUN_DEFINE_EVENT #define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \ DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args)) #endif BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish, TP_PROTO(int *err), TP_ARGS(err), sizeof(int) ); #endif /* This part must be outside protection */ #include <trace/define_trace.h>
6947 550 6946 227 230 403 330 403 402 237 225 237 237 237 6 6 3 3 90 90 69 90 90 69 69 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 // SPDX-License-Identifier: GPL-2.0 /* * Fast batching percpu counters. */ #include <linux/percpu_counter.h> #include <linux/mutex.h> #include <linux/init.h> #include <linux/cpu.h> #include <linux/module.h> #include <linux/debugobjects.h> #ifdef CONFIG_HOTPLUG_CPU static LIST_HEAD(percpu_counters); static DEFINE_SPINLOCK(percpu_counters_lock); #endif #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER static const struct debug_obj_descr percpu_counter_debug_descr; static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state) { struct percpu_counter *fbc = addr; switch (state) { case ODEBUG_STATE_ACTIVE: percpu_counter_destroy(fbc); debug_object_free(fbc, &percpu_counter_debug_descr); return true; default: return false; } } static const struct debug_obj_descr percpu_counter_debug_descr = { .name = "percpu_counter", .fixup_free = percpu_counter_fixup_free, }; static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) { debug_object_init(fbc, &percpu_counter_debug_descr); debug_object_activate(fbc, &percpu_counter_debug_descr); } static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) { debug_object_deactivate(fbc, &percpu_counter_debug_descr); debug_object_free(fbc, &percpu_counter_debug_descr); } #else /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ static inline void debug_percpu_counter_activate(struct percpu_counter *fbc) { } static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) { } #endif /* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { int cpu; unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); for_each_possible_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); *pcount = 0; } fbc->count = amount; raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_set); /* * Add to a counter while respecting batch size. * * There are 2 implementations, both dealing with the following problem: * * The decision slow path/fast path and the actual update must be atomic. * Otherwise a call in process context could check the current values and * decide that the fast path can be used. If now an interrupt occurs before * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters), * then the this_cpu_add() that is executed after the interrupt has completed * can produce values larger than "batch" or even overflows. */ #ifdef CONFIG_HAVE_CMPXCHG_LOCAL /* * Safety against interrupts is achieved in 2 ways: * 1. the fast path uses local cmpxchg (note: no lock prefix) * 2. the slow path operates with interrupts disabled */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; unsigned long flags; count = this_cpu_read(*fbc->counters); do { if (unlikely(abs(count + amount) >= batch)) { raw_spin_lock_irqsave(&fbc->lock, flags); /* * Note: by now we might have migrated to another CPU * or the value might have changed. */ count = __this_cpu_read(*fbc->counters); fbc->count += count + amount; __this_cpu_sub(*fbc->counters, count); raw_spin_unlock_irqrestore(&fbc->lock, flags); return; } } while (!this_cpu_try_cmpxchg(*fbc->counters, &count, count + amount)); } #else /* * local_irq_save() is used to make the function irq safe: * - The slow path would be ok as protected by an irq-safe spinlock. * - this_cpu_add would be ok as it is irq-safe by definition. */ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; unsigned long flags; local_irq_save(flags); count = __this_cpu_read(*fbc->counters) + amount; if (abs(count) >= batch) { raw_spin_lock(&fbc->lock); fbc->count += count; __this_cpu_sub(*fbc->counters, count - amount); raw_spin_unlock(&fbc->lock); } else { this_cpu_add(*fbc->counters, amount); } local_irq_restore(flags); } #endif EXPORT_SYMBOL(percpu_counter_add_batch); /* * For percpu_counter with a big batch, the devication of its count could * be big, and there is requirement to reduce the deviation, like when the * counter's batch could be runtime decreased to get a better accuracy, * which can be achieved by running this sync function on each CPU. */ void percpu_counter_sync(struct percpu_counter *fbc) { unsigned long flags; s64 count; raw_spin_lock_irqsave(&fbc->lock, flags); count = __this_cpu_read(*fbc->counters); fbc->count += count; __this_cpu_sub(*fbc->counters, count); raw_spin_unlock_irqrestore(&fbc->lock, flags); } EXPORT_SYMBOL(percpu_counter_sync); /* * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive(). * * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums * from CPUs that are in the process of being taken offline. Dying cpus have * been removed from the online mask, but may not have had the hotplug dead * notifier called to fold the percpu count back into the global counter sum. * By including dying CPUs in the iteration mask, we avoid this race condition * so __percpu_counter_sum() just does the right thing when CPUs are being taken * offline. */ s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); ret = fbc->count; for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; } raw_spin_unlock_irqrestore(&fbc->lock, flags); return ret; } EXPORT_SYMBOL(__percpu_counter_sum); int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, u32 nr_counters, struct lock_class_key *key) { unsigned long flags __maybe_unused; size_t counter_size; s32 __percpu *counters; u32 i; counter_size = ALIGN(sizeof(*counters), __alignof__(*counters)); counters = __alloc_percpu_gfp(nr_counters * counter_size, __alignof__(*counters), gfp); if (!counters) { fbc[0].counters = NULL; return -ENOMEM; } for (i = 0; i < nr_counters; i++) { raw_spin_lock_init(&fbc[i].lock); lockdep_set_class(&fbc[i].lock, key); #ifdef CONFIG_HOTPLUG_CPU INIT_LIST_HEAD(&fbc[i].list); #endif fbc[i].count = amount; fbc[i].counters = (void __percpu *)counters + i * counter_size; debug_percpu_counter_activate(&fbc[i]); } #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); for (i = 0; i < nr_counters; i++) list_add(&fbc[i].list, &percpu_counters); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif return 0; } EXPORT_SYMBOL(__percpu_counter_init_many); void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) { unsigned long flags __maybe_unused; u32 i; if (WARN_ON_ONCE(!fbc)) return; if (!fbc[0].counters) return; for (i = 0; i < nr_counters; i++) debug_percpu_counter_deactivate(&fbc[i]); #ifdef CONFIG_HOTPLUG_CPU spin_lock_irqsave(&percpu_counters_lock, flags); for (i = 0; i < nr_counters; i++) list_del(&fbc[i].list); spin_unlock_irqrestore(&percpu_counters_lock, flags); #endif free_percpu(fbc[0].counters); for (i = 0; i < nr_counters; i++) fbc[i].counters = NULL; } EXPORT_SYMBOL(percpu_counter_destroy_many); int percpu_counter_batch __read_mostly = 32; EXPORT_SYMBOL(percpu_counter_batch); static int compute_batch_value(unsigned int cpu) { int nr = num_online_cpus(); percpu_counter_batch = max(32, nr*2); return 0; } static int percpu_counter_cpu_dead(unsigned int cpu) { #ifdef CONFIG_HOTPLUG_CPU struct percpu_counter *fbc; compute_batch_value(cpu); spin_lock_irq(&percpu_counters_lock); list_for_each_entry(fbc, &percpu_counters, list) { s32 *pcount; raw_spin_lock(&fbc->lock); pcount = per_cpu_ptr(fbc->counters, cpu); fbc->count += *pcount; *pcount = 0; raw_spin_unlock(&fbc->lock); } spin_unlock_irq(&percpu_counters_lock); #endif return 0; } /* * Compare counter against given value. * Return 1 if greater, 0 if equal and -1 if less */ int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { s64 count; count = percpu_counter_read(fbc); /* Check to see if rough count will be sufficient for comparison */ if (abs(count - rhs) > (batch * num_online_cpus())) { if (count > rhs) return 1; else return -1; } /* Need to use precise count */ count = percpu_counter_sum(fbc); if (count > rhs) return 1; else if (count < rhs) return -1; else return 0; } EXPORT_SYMBOL(__percpu_counter_compare); /* * Compare counter, and add amount if total is: less than or equal to limit if * amount is positive, or greater than or equal to limit if amount is negative. * Return true if amount is added, or false if total would be beyond the limit. * * Negative limit is allowed, but unusual. * When negative amounts (subs) are given to percpu_counter_limited_add(), * the limit would most naturally be 0 - but other limits are also allowed. * * Overflow beyond S64_MAX is not allowed for: counter, limit and amount * are all assumed to be sane (far from S64_MIN and S64_MAX). */ bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount, s32 batch) { s64 count; s64 unknown; unsigned long flags; bool good = false; if (amount == 0) return true; local_irq_save(flags); unknown = batch * num_online_cpus(); count = __this_cpu_read(*fbc->counters); /* Skip taking the lock when safe */ if (abs(count + amount) <= batch && ((amount > 0 && fbc->count + unknown <= limit) || (amount < 0 && fbc->count - unknown >= limit))) { this_cpu_add(*fbc->counters, amount); local_irq_restore(flags); return true; } raw_spin_lock(&fbc->lock); count = fbc->count + amount; /* Skip percpu_counter_sum() when safe */ if (amount > 0) { if (count - unknown > limit) goto out; if (count + unknown <= limit) good = true; } else { if (count + unknown < limit) goto out; if (count - unknown >= limit) good = true; } if (!good) { s32 *pcount; int cpu; for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) { pcount = per_cpu_ptr(fbc->counters, cpu); count += *pcount; } if (amount > 0) { if (count > limit) goto out; } else { if (count < limit) goto out; } good = true; } count = __this_cpu_read(*fbc->counters); fbc->count += count + amount; __this_cpu_sub(*fbc->counters, count); out: raw_spin_unlock(&fbc->lock); local_irq_restore(flags); return good; } static int __init percpu_counter_startup(void) { int ret; ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online", compute_batch_value, NULL); WARN_ON(ret < 0); ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD, "lib/percpu_cnt:dead", NULL, percpu_counter_cpu_dead); WARN_ON(ret < 0); return 0; } module_init(percpu_counter_startup);
8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 // SPDX-License-Identifier: GPL-2.0-or-later /* * Asynchronous Cryptographic Hash operations. * * This is the implementation of the ahash (asynchronous hash) API. It differs * from shash (synchronous hash) in that ahash supports asynchronous operations, * and it hashes data from scatterlists instead of virtually addressed buffers. * * The ahash API provides access to both ahash and shash algorithms. The shash * API only provides access to shash algorithms. * * Copyright (c) 2008 Loc Ho <lho@amcc.com> */ #include <crypto/scatterwalk.h> #include <linux/cryptouser.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <linux/string.h> #include <net/netlink.h> #include "hash.h" #define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e struct crypto_hash_walk { char *data; unsigned int offset; unsigned int flags; struct page *pg; unsigned int entrylen; unsigned int total; struct scatterlist *sg; }; static int hash_walk_next(struct crypto_hash_walk *walk) { unsigned int offset = walk->offset; unsigned int nbytes = min(walk->entrylen, ((unsigned int)(PAGE_SIZE)) - offset); walk->data = kmap_local_page(walk->pg); walk->data += offset; walk->entrylen -= nbytes; return nbytes; } static int hash_walk_new_entry(struct crypto_hash_walk *walk) { struct scatterlist *sg; sg = walk->sg; walk->offset = sg->offset; walk->pg = sg_page(walk->sg) + (walk->offset >> PAGE_SHIFT); walk->offset = offset_in_page(walk->offset); walk->entrylen = sg->length; if (walk->entrylen > walk->total) walk->entrylen = walk->total; walk->total -= walk->entrylen; return hash_walk_next(walk); } static int crypto_hash_walk_first(struct ahash_request *req, struct crypto_hash_walk *walk) { walk->total = req->nbytes; if (!walk->total) { walk->entrylen = 0; return 0; } walk->sg = req->src; walk->flags = req->base.flags; return hash_walk_new_entry(walk); } static int crypto_hash_walk_done(struct crypto_hash_walk *walk, int err) { walk->data -= walk->offset; kunmap_local(walk->data); crypto_yield(walk->flags); if (err) return err; if (walk->entrylen) { walk->offset = 0; walk->pg++; return hash_walk_next(walk); } if (!walk->total) return 0; walk->sg = sg_next(walk->sg); return hash_walk_new_entry(walk); } static inline int crypto_hash_walk_last(struct crypto_hash_walk *walk) { return !(walk->entrylen | walk->total); } /* * For an ahash tfm that is using an shash algorithm (instead of an ahash * algorithm), this returns the underlying shash tfm. */ static inline struct crypto_shash *ahash_to_shash(struct crypto_ahash *tfm) { return *(struct crypto_shash **)crypto_ahash_ctx(tfm); } static inline struct shash_desc *prepare_shash_desc(struct ahash_request *req, struct crypto_ahash *tfm) { struct shash_desc *desc = ahash_request_ctx(req); desc->tfm = ahash_to_shash(tfm); return desc; } int shash_ahash_update(struct ahash_request *req, struct shash_desc *desc) { struct crypto_hash_walk walk; int nbytes; for (nbytes = crypto_hash_walk_first(req, &walk); nbytes > 0; nbytes = crypto_hash_walk_done(&walk, nbytes)) nbytes = crypto_shash_update(desc, walk.data, nbytes); return nbytes; } EXPORT_SYMBOL_GPL(shash_ahash_update); int shash_ahash_finup(struct ahash_request *req, struct shash_desc *desc) { struct crypto_hash_walk walk; int nbytes; nbytes = crypto_hash_walk_first(req, &walk); if (!nbytes) return crypto_shash_final(desc, req->result); do { nbytes = crypto_hash_walk_last(&walk) ? crypto_shash_finup(desc, walk.data, nbytes, req->result) : crypto_shash_update(desc, walk.data, nbytes); nbytes = crypto_hash_walk_done(&walk, nbytes); } while (nbytes > 0); return nbytes; } EXPORT_SYMBOL_GPL(shash_ahash_finup); int shash_ahash_digest(struct ahash_request *req, struct shash_desc *desc) { unsigned int nbytes = req->nbytes; struct scatterlist *sg; unsigned int offset; int err; if (nbytes && (sg = req->src, offset = sg->offset, nbytes <= min(sg->length, ((unsigned int)(PAGE_SIZE)) - offset))) { void *data; data = kmap_local_page(sg_page(sg)); err = crypto_shash_digest(desc, data + offset, nbytes, req->result); kunmap_local(data); } else err = crypto_shash_init(desc) ?: shash_ahash_finup(req, desc); return err; } EXPORT_SYMBOL_GPL(shash_ahash_digest); static void crypto_exit_ahash_using_shash(struct crypto_tfm *tfm) { struct crypto_shash **ctx = crypto_tfm_ctx(tfm); crypto_free_shash(*ctx); } static int crypto_init_ahash_using_shash(struct crypto_tfm *tfm) { struct crypto_alg *calg = tfm->__crt_alg; struct crypto_ahash *crt = __crypto_ahash_cast(tfm); struct crypto_shash **ctx = crypto_tfm_ctx(tfm); struct crypto_shash *shash; if (!crypto_mod_get(calg)) return -EAGAIN; shash = crypto_create_tfm(calg, &crypto_shash_type); if (IS_ERR(shash)) { crypto_mod_put(calg); return PTR_ERR(shash); } crt->using_shash = true; *ctx = shash; tfm->exit = crypto_exit_ahash_using_shash; crypto_ahash_set_flags(crt, crypto_shash_get_flags(shash) & CRYPTO_TFM_NEED_KEY); crt->reqsize = sizeof(struct shash_desc) + crypto_shash_descsize(shash); return 0; } static int ahash_nosetkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { return -ENOSYS; } static void ahash_set_needkey(struct crypto_ahash *tfm, struct ahash_alg *alg) { if (alg->setkey != ahash_nosetkey && !(alg->halg.base.cra_flags & CRYPTO_ALG_OPTIONAL_KEY)) crypto_ahash_set_flags(tfm, CRYPTO_TFM_NEED_KEY); } int crypto_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int keylen) { if (likely(tfm->using_shash)) { struct crypto_shash *shash = ahash_to_shash(tfm); int err; err = crypto_shash_setkey(shash, key, keylen); if (unlikely(err)) { crypto_ahash_set_flags(tfm, crypto_shash_get_flags(shash) & CRYPTO_TFM_NEED_KEY); return err; } } else { struct ahash_alg *alg = crypto_ahash_alg(tfm); int err; err = alg->setkey(tfm, key, keylen); if (unlikely(err)) { ahash_set_needkey(tfm, alg); return err; } } crypto_ahash_clear_flags(tfm, CRYPTO_TFM_NEED_KEY); return 0; } EXPORT_SYMBOL_GPL(crypto_ahash_setkey); int crypto_ahash_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return crypto_shash_init(prepare_shash_desc(req, tfm)); if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return crypto_ahash_alg(tfm)->init(req); } EXPORT_SYMBOL_GPL(crypto_ahash_init); static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt, bool has_state) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); unsigned int ds = crypto_ahash_digestsize(tfm); struct ahash_request *subreq; unsigned int subreq_size; unsigned int reqsize; u8 *result; gfp_t gfp; u32 flags; subreq_size = sizeof(*subreq); reqsize = crypto_ahash_reqsize(tfm); reqsize = ALIGN(reqsize, crypto_tfm_ctx_alignment()); subreq_size += reqsize; subreq_size += ds; flags = ahash_request_flags(req); gfp = (flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; subreq = kmalloc(subreq_size, gfp); if (!subreq) return -ENOMEM; ahash_request_set_tfm(subreq, tfm); ahash_request_set_callback(subreq, flags, cplt, req); result = (u8 *)(subreq + 1) + reqsize; ahash_request_set_crypt(subreq, req->src, result, req->nbytes); if (has_state) { void *state; state = kmalloc(crypto_ahash_statesize(tfm), gfp); if (!state) { kfree(subreq); return -ENOMEM; } crypto_ahash_export(req, state); crypto_ahash_import(subreq, state); kfree_sensitive(state); } req->priv = subreq; return 0; } static void ahash_restore_req(struct ahash_request *req, int err) { struct ahash_request *subreq = req->priv; if (!err) memcpy(req->result, subreq->result, crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); req->priv = NULL; kfree_sensitive(subreq); } int crypto_ahash_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return shash_ahash_update(req, ahash_request_ctx(req)); return crypto_ahash_alg(tfm)->update(req); } EXPORT_SYMBOL_GPL(crypto_ahash_update); int crypto_ahash_final(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return crypto_shash_final(ahash_request_ctx(req), req->result); return crypto_ahash_alg(tfm)->final(req); } EXPORT_SYMBOL_GPL(crypto_ahash_final); int crypto_ahash_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return shash_ahash_finup(req, ahash_request_ctx(req)); return crypto_ahash_alg(tfm)->finup(req); } EXPORT_SYMBOL_GPL(crypto_ahash_finup); int crypto_ahash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return shash_ahash_digest(req, prepare_shash_desc(req, tfm)); if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return crypto_ahash_alg(tfm)->digest(req); } EXPORT_SYMBOL_GPL(crypto_ahash_digest); static void ahash_def_finup_done2(void *data, int err) { struct ahash_request *areq = data; if (err == -EINPROGRESS) return; ahash_restore_req(areq, err); ahash_request_complete(areq, err); } static int ahash_def_finup_finish1(struct ahash_request *req, int err) { struct ahash_request *subreq = req->priv; if (err) goto out; subreq->base.complete = ahash_def_finup_done2; err = crypto_ahash_alg(crypto_ahash_reqtfm(req))->final(subreq); if (err == -EINPROGRESS || err == -EBUSY) return err; out: ahash_restore_req(req, err); return err; } static void ahash_def_finup_done1(void *data, int err) { struct ahash_request *areq = data; struct ahash_request *subreq; if (err == -EINPROGRESS) goto out; subreq = areq->priv; subreq->base.flags &= CRYPTO_TFM_REQ_MAY_BACKLOG; err = ahash_def_finup_finish1(areq, err); if (err == -EINPROGRESS || err == -EBUSY) return; out: ahash_request_complete(areq, err); } static int ahash_def_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); int err; err = ahash_save_req(req, ahash_def_finup_done1, true); if (err) return err; err = crypto_ahash_alg(tfm)->update(req->priv); if (err == -EINPROGRESS || err == -EBUSY) return err; return ahash_def_finup_finish1(req, err); } int crypto_ahash_export(struct ahash_request *req, void *out) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return crypto_shash_export(ahash_request_ctx(req), out); return crypto_ahash_alg(tfm)->export(req, out); } EXPORT_SYMBOL_GPL(crypto_ahash_export); int crypto_ahash_import(struct ahash_request *req, const void *in) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); if (likely(tfm->using_shash)) return crypto_shash_import(prepare_shash_desc(req, tfm), in); if (crypto_ahash_get_flags(tfm) & CRYPTO_TFM_NEED_KEY) return -ENOKEY; return crypto_ahash_alg(tfm)->import(req, in); } EXPORT_SYMBOL_GPL(crypto_ahash_import); static void crypto_ahash_exit_tfm(struct crypto_tfm *tfm) { struct crypto_ahash *hash = __crypto_ahash_cast(tfm); struct ahash_alg *alg = crypto_ahash_alg(hash); alg->exit_tfm(hash); } static int crypto_ahash_init_tfm(struct crypto_tfm *tfm) { struct crypto_ahash *hash = __crypto_ahash_cast(tfm); struct ahash_alg *alg = crypto_ahash_alg(hash); crypto_ahash_set_statesize(hash, alg->halg.statesize); if (tfm->__crt_alg->cra_type == &crypto_shash_type) return crypto_init_ahash_using_shash(tfm); ahash_set_needkey(hash, alg); if (alg->exit_tfm) tfm->exit = crypto_ahash_exit_tfm; return alg->init_tfm ? alg->init_tfm(hash) : 0; } static unsigned int crypto_ahash_extsize(struct crypto_alg *alg) { if (alg->cra_type == &crypto_shash_type) return sizeof(struct crypto_shash *); return crypto_alg_extsize(alg); } static void crypto_ahash_free_instance(struct crypto_instance *inst) { struct ahash_instance *ahash = ahash_instance(inst); ahash->free(ahash); } static int __maybe_unused crypto_ahash_report( struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_hash rhash; memset(&rhash, 0, sizeof(rhash)); strscpy(rhash.type, "ahash", sizeof(rhash.type)); rhash.blocksize = alg->cra_blocksize; rhash.digestsize = __crypto_hash_alg_common(alg)->digestsize; return nla_put(skb, CRYPTOCFGA_REPORT_HASH, sizeof(rhash), &rhash); } static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_ahash_show(struct seq_file *m, struct crypto_alg *alg) { seq_printf(m, "type : ahash\n"); seq_printf(m, "async : %s\n", alg->cra_flags & CRYPTO_ALG_ASYNC ? "yes" : "no"); seq_printf(m, "blocksize : %u\n", alg->cra_blocksize); seq_printf(m, "digestsize : %u\n", __crypto_hash_alg_common(alg)->digestsize); } static const struct crypto_type crypto_ahash_type = { .extsize = crypto_ahash_extsize, .init_tfm = crypto_ahash_init_tfm, .free = crypto_ahash_free_instance, #ifdef CONFIG_PROC_FS .show = crypto_ahash_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_ahash_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_AHASH_MASK, .type = CRYPTO_ALG_TYPE_AHASH, .tfmsize = offsetof(struct crypto_ahash, base), }; int crypto_grab_ahash(struct crypto_ahash_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask) { spawn->base.frontend = &crypto_ahash_type; return crypto_grab_spawn(&spawn->base, inst, name, type, mask); } EXPORT_SYMBOL_GPL(crypto_grab_ahash); struct crypto_ahash *crypto_alloc_ahash(const char *alg_name, u32 type, u32 mask) { return crypto_alloc_tfm(alg_name, &crypto_ahash_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_alloc_ahash); int crypto_has_ahash(const char *alg_name, u32 type, u32 mask) { return crypto_type_has_alg(alg_name, &crypto_ahash_type, type, mask); } EXPORT_SYMBOL_GPL(crypto_has_ahash); static bool crypto_hash_alg_has_setkey(struct hash_alg_common *halg) { struct crypto_alg *alg = &halg->base; if (alg->cra_type == &crypto_shash_type) return crypto_shash_alg_has_setkey(__crypto_shash_alg(alg)); return __crypto_ahash_alg(alg)->setkey != ahash_nosetkey; } struct crypto_ahash *crypto_clone_ahash(struct crypto_ahash *hash) { struct hash_alg_common *halg = crypto_hash_alg_common(hash); struct crypto_tfm *tfm = crypto_ahash_tfm(hash); struct crypto_ahash *nhash; struct ahash_alg *alg; int err; if (!crypto_hash_alg_has_setkey(halg)) { tfm = crypto_tfm_get(tfm); if (IS_ERR(tfm)) return ERR_CAST(tfm); return hash; } nhash = crypto_clone_tfm(&crypto_ahash_type, tfm); if (IS_ERR(nhash)) return nhash; nhash->reqsize = hash->reqsize; nhash->statesize = hash->statesize; if (likely(hash->using_shash)) { struct crypto_shash **nctx = crypto_ahash_ctx(nhash); struct crypto_shash *shash; shash = crypto_clone_shash(ahash_to_shash(hash)); if (IS_ERR(shash)) { err = PTR_ERR(shash); goto out_free_nhash; } nhash->using_shash = true; *nctx = shash; return nhash; } err = -ENOSYS; alg = crypto_ahash_alg(hash); if (!alg->clone_tfm) goto out_free_nhash; err = alg->clone_tfm(nhash, hash); if (err) goto out_free_nhash; return nhash; out_free_nhash: crypto_free_ahash(nhash); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(crypto_clone_ahash); static int ahash_prepare_alg(struct ahash_alg *alg) { struct crypto_alg *base = &alg->halg.base; int err; if (alg->halg.statesize == 0) return -EINVAL; err = hash_prepare_alg(&alg->halg); if (err) return err; base->cra_type = &crypto_ahash_type; base->cra_flags |= CRYPTO_ALG_TYPE_AHASH; if (!alg->finup) alg->finup = ahash_def_finup; if (!alg->setkey) alg->setkey = ahash_nosetkey; return 0; } int crypto_register_ahash(struct ahash_alg *alg) { struct crypto_alg *base = &alg->halg.base; int err; err = ahash_prepare_alg(alg); if (err) return err; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_ahash); void crypto_unregister_ahash(struct ahash_alg *alg) { crypto_unregister_alg(&alg->halg.base); } EXPORT_SYMBOL_GPL(crypto_unregister_ahash); int crypto_register_ahashes(struct ahash_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_ahash(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_ahash(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_ahashes); void crypto_unregister_ahashes(struct ahash_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_ahash(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_ahashes); int ahash_register_instance(struct crypto_template *tmpl, struct ahash_instance *inst) { int err; if (WARN_ON(!inst->free)) return -EINVAL; err = ahash_prepare_alg(&inst->alg); if (err) return err; return crypto_register_instance(tmpl, ahash_crypto_instance(inst)); } EXPORT_SYMBOL_GPL(ahash_register_instance); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Asynchronous cryptographic hash type");
379 379 377 378 377 110 376 379 378 379 356 355 346 347 347 346 346 346 295 355 347 356 354 355 355 354 354 354 356 356 345 354 356 356 116 116 116 354 146 148 148 146 146 344 6 347 346 347 346 6 345 347 347 347 347 51 119 117 95 20 90 110 110 117 22 117 117 119 119 7 111 355 354 118 111 8 8 119 99 119 108 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 /* SPDX-License-Identifier: GPL-2.0 * * page_pool.c * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> * Copyright (C) 2016 Red Hat, Inc. */ #include <linux/error-injection.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/device.h> #include <net/netdev_rx_queue.h> #include <net/page_pool/helpers.h> #include <net/xdp.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/page-flags.h> #include <linux/mm.h> /* for put_page() */ #include <linux/poison.h> #include <linux/ethtool.h> #include <linux/netdevice.h> #include <trace/events/page_pool.h> #include "mp_dmabuf_devmem.h" #include "netmem_priv.h" #include "page_pool_priv.h" DEFINE_STATIC_KEY_FALSE(page_pool_mem_providers); #define DEFER_TIME (msecs_to_jiffies(1000)) #define DEFER_WARN_INTERVAL (60 * HZ) #define BIAS_MAX (LONG_MAX >> 1) #ifdef CONFIG_PAGE_POOL_STATS static DEFINE_PER_CPU(struct page_pool_recycle_stats, pp_system_recycle_stats); /* alloc_stat_inc is intended to be used in softirq context */ #define alloc_stat_inc(pool, __stat) (pool->alloc_stats.__stat++) /* recycle_stat_inc is safe to use when preemption is possible. */ #define recycle_stat_inc(pool, __stat) \ do { \ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ this_cpu_inc(s->__stat); \ } while (0) #define recycle_stat_add(pool, __stat, val) \ do { \ struct page_pool_recycle_stats __percpu *s = pool->recycle_stats; \ this_cpu_add(s->__stat, val); \ } while (0) static const char pp_stats[][ETH_GSTRING_LEN] = { "rx_pp_alloc_fast", "rx_pp_alloc_slow", "rx_pp_alloc_slow_ho", "rx_pp_alloc_empty", "rx_pp_alloc_refill", "rx_pp_alloc_waive", "rx_pp_recycle_cached", "rx_pp_recycle_cache_full", "rx_pp_recycle_ring", "rx_pp_recycle_ring_full", "rx_pp_recycle_released_ref", }; /** * page_pool_get_stats() - fetch page pool stats * @pool: pool from which page was allocated * @stats: struct page_pool_stats to fill in * * Retrieve statistics about the page_pool. This API is only available * if the kernel has been configured with ``CONFIG_PAGE_POOL_STATS=y``. * A pointer to a caller allocated struct page_pool_stats structure * is passed to this API which is filled in. The caller can then report * those stats to the user (perhaps via ethtool, debugfs, etc.). */ bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats) { int cpu = 0; if (!stats) return false; /* The caller is responsible to initialize stats. */ stats->alloc_stats.fast += pool->alloc_stats.fast; stats->alloc_stats.slow += pool->alloc_stats.slow; stats->alloc_stats.slow_high_order += pool->alloc_stats.slow_high_order; stats->alloc_stats.empty += pool->alloc_stats.empty; stats->alloc_stats.refill += pool->alloc_stats.refill; stats->alloc_stats.waive += pool->alloc_stats.waive; for_each_possible_cpu(cpu) { const struct page_pool_recycle_stats *pcpu = per_cpu_ptr(pool->recycle_stats, cpu); stats->recycle_stats.cached += pcpu->cached; stats->recycle_stats.cache_full += pcpu->cache_full; stats->recycle_stats.ring += pcpu->ring; stats->recycle_stats.ring_full += pcpu->ring_full; stats->recycle_stats.released_refcnt += pcpu->released_refcnt; } return true; } EXPORT_SYMBOL(page_pool_get_stats); u8 *page_pool_ethtool_stats_get_strings(u8 *data) { int i; for (i = 0; i < ARRAY_SIZE(pp_stats); i++) { memcpy(data, pp_stats[i], ETH_GSTRING_LEN); data += ETH_GSTRING_LEN; } return data; } EXPORT_SYMBOL(page_pool_ethtool_stats_get_strings); int page_pool_ethtool_stats_get_count(void) { return ARRAY_SIZE(pp_stats); } EXPORT_SYMBOL(page_pool_ethtool_stats_get_count); u64 *page_pool_ethtool_stats_get(u64 *data, const void *stats) { const struct page_pool_stats *pool_stats = stats; *data++ = pool_stats->alloc_stats.fast; *data++ = pool_stats->alloc_stats.slow; *data++ = pool_stats->alloc_stats.slow_high_order; *data++ = pool_stats->alloc_stats.empty; *data++ = pool_stats->alloc_stats.refill; *data++ = pool_stats->alloc_stats.waive; *data++ = pool_stats->recycle_stats.cached; *data++ = pool_stats->recycle_stats.cache_full; *data++ = pool_stats->recycle_stats.ring; *data++ = pool_stats->recycle_stats.ring_full; *data++ = pool_stats->recycle_stats.released_refcnt; return data; } EXPORT_SYMBOL(page_pool_ethtool_stats_get); #else #define alloc_stat_inc(pool, __stat) #define recycle_stat_inc(pool, __stat) #define recycle_stat_add(pool, __stat, val) #endif static bool page_pool_producer_lock(struct page_pool *pool) __acquires(&pool->ring.producer_lock) { bool in_softirq = in_softirq(); if (in_softirq) spin_lock(&pool->ring.producer_lock); else spin_lock_bh(&pool->ring.producer_lock); return in_softirq; } static void page_pool_producer_unlock(struct page_pool *pool, bool in_softirq) __releases(&pool->ring.producer_lock) { if (in_softirq) spin_unlock(&pool->ring.producer_lock); else spin_unlock_bh(&pool->ring.producer_lock); } static void page_pool_struct_check(void) { CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_users); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_page); CACHELINE_ASSERT_GROUP_MEMBER(struct page_pool, frag, frag_offset); CACHELINE_ASSERT_GROUP_SIZE(struct page_pool, frag, PAGE_POOL_FRAG_GROUP_ALIGN); } static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params, int cpuid) { unsigned int ring_qsize = 1024; /* Default */ struct netdev_rx_queue *rxq; int err; page_pool_struct_check(); memcpy(&pool->p, &params->fast, sizeof(pool->p)); memcpy(&pool->slow, &params->slow, sizeof(pool->slow)); pool->cpuid = cpuid; pool->dma_sync_for_cpu = true; /* Validate only known flags were used */ if (pool->slow.flags & ~PP_FLAG_ALL) return -EINVAL; if (pool->p.pool_size) ring_qsize = pool->p.pool_size; /* Sanity limit mem that can be pinned down */ if (ring_qsize > 32768) return -E2BIG; /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, * which is the XDP_TX use-case. */ if (pool->slow.flags & PP_FLAG_DMA_MAP) { if ((pool->p.dma_dir != DMA_FROM_DEVICE) && (pool->p.dma_dir != DMA_BIDIRECTIONAL)) return -EINVAL; pool->dma_map = true; } if (pool->slow.flags & PP_FLAG_DMA_SYNC_DEV) { /* In order to request DMA-sync-for-device the page * needs to be mapped */ if (!(pool->slow.flags & PP_FLAG_DMA_MAP)) return -EINVAL; if (!pool->p.max_len) return -EINVAL; pool->dma_sync = true; /* pool->p.offset has to be set according to the address * offset used by the DMA engine to start copying rx data */ } pool->has_init_callback = !!pool->slow.init_callback; #ifdef CONFIG_PAGE_POOL_STATS if (!(pool->slow.flags & PP_FLAG_SYSTEM_POOL)) { pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats); if (!pool->recycle_stats) return -ENOMEM; } else { /* For system page pool instance we use a singular stats object * instead of allocating a separate percpu variable for each * (also percpu) page pool instance. */ pool->recycle_stats = &pp_system_recycle_stats; pool->system = true; } #endif if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0) { #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) free_percpu(pool->recycle_stats); #endif return -ENOMEM; } atomic_set(&pool->pages_state_release_cnt, 0); /* Driver calling page_pool_create() also call page_pool_destroy() */ refcount_set(&pool->user_cnt, 1); if (pool->dma_map) get_device(pool->p.dev); if (pool->slow.flags & PP_FLAG_ALLOW_UNREADABLE_NETMEM) { /* We rely on rtnl_lock()ing to make sure netdev_rx_queue * configuration doesn't change while we're initializing * the page_pool. */ ASSERT_RTNL(); rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv; } if (pool->mp_priv) { if (!pool->dma_map || !pool->dma_sync) return -EOPNOTSUPP; err = mp_dmabuf_devmem_init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, err); goto free_ptr_ring; } static_branch_inc(&page_pool_mem_providers); } return 0; free_ptr_ring: ptr_ring_cleanup(&pool->ring, NULL); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) free_percpu(pool->recycle_stats); #endif return err; } static void page_pool_uninit(struct page_pool *pool) { ptr_ring_cleanup(&pool->ring, NULL); if (pool->dma_map) put_device(pool->p.dev); #ifdef CONFIG_PAGE_POOL_STATS if (!pool->system) free_percpu(pool->recycle_stats); #endif } /** * page_pool_create_percpu() - create a page pool for a given cpu. * @params: parameters, see struct page_pool_params * @cpuid: cpu identifier */ struct page_pool * page_pool_create_percpu(const struct page_pool_params *params, int cpuid) { struct page_pool *pool; int err; pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); if (!pool) return ERR_PTR(-ENOMEM); err = page_pool_init(pool, params, cpuid); if (err < 0) goto err_free; err = page_pool_list(pool); if (err) goto err_uninit; return pool; err_uninit: page_pool_uninit(pool); err_free: pr_warn("%s() gave up with errno %d\n", __func__, err); kfree(pool); return ERR_PTR(err); } EXPORT_SYMBOL(page_pool_create_percpu); /** * page_pool_create() - create a page pool * @params: parameters, see struct page_pool_params */ struct page_pool *page_pool_create(const struct page_pool_params *params) { return page_pool_create_percpu(params, -1); } EXPORT_SYMBOL(page_pool_create); static void page_pool_return_page(struct page_pool *pool, netmem_ref netmem); static noinline netmem_ref page_pool_refill_alloc_cache(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; netmem_ref netmem; int pref_nid; /* preferred NUMA node */ /* Quicker fallback, avoid locks when ring is empty */ if (__ptr_ring_empty(r)) { alloc_stat_inc(pool, empty); return 0; } /* Softirq guarantee CPU and thus NUMA node is stable. This, * assumes CPU refilling driver RX-ring will also run RX-NAPI. */ #ifdef CONFIG_NUMA pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid; #else /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */ pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */ #endif /* Refill alloc array, but only if NUMA match */ do { netmem = (__force netmem_ref)__ptr_ring_consume(r); if (unlikely(!netmem)) break; if (likely(netmem_is_pref_nid(netmem, pref_nid))) { pool->alloc.cache[pool->alloc.count++] = netmem; } else { /* NUMA mismatch; * (1) release 1 page to page-allocator and * (2) break out to fallthrough to alloc_pages_node. * This limit stress on page buddy alloactor. */ page_pool_return_page(pool, netmem); alloc_stat_inc(pool, waive); netmem = 0; break; } } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL); /* Return last page */ if (likely(pool->alloc.count > 0)) { netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, refill); } return netmem; } /* fast path */ static netmem_ref __page_pool_get_cached(struct page_pool *pool) { netmem_ref netmem; /* Caller MUST guarantee safe non-concurrent access, e.g. softirq */ if (likely(pool->alloc.count)) { /* Fast-path */ netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, fast); } else { netmem = page_pool_refill_alloc_cache(pool); } return netmem; } static void __page_pool_dma_sync_for_device(const struct page_pool *pool, netmem_ref netmem, u32 dma_sync_size) { #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) dma_addr_t dma_addr = page_pool_get_dma_addr_netmem(netmem); dma_sync_size = min(dma_sync_size, pool->p.max_len); __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset, dma_sync_size, pool->p.dma_dir); #endif } static __always_inline void page_pool_dma_sync_for_device(const struct page_pool *pool, netmem_ref netmem, u32 dma_sync_size) { if (pool->dma_sync && dma_dev_need_sync(pool->p.dev)) __page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); } static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem) { dma_addr_t dma; /* Setup DMA mapping: use 'struct page' area for storing DMA-addr * since dma_addr_t can be either 32 or 64 bits and does not always fit * into page private data (i.e 32bit cpu with 64bit DMA caps) * This mapping is kept for lifetime of page, until leaving pool. */ dma = dma_map_page_attrs(pool->p.dev, netmem_to_page(netmem), 0, (PAGE_SIZE << pool->p.order), pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); if (dma_mapping_error(pool->p.dev, dma)) return false; if (page_pool_set_dma_addr_netmem(netmem, dma)) goto unmap_failed; page_pool_dma_sync_for_device(pool, netmem, pool->p.max_len); return true; unmap_failed: WARN_ONCE(1, "unexpected DMA address, please report to netdev@"); dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); return false; } static struct page *__page_pool_alloc_page_order(struct page_pool *pool, gfp_t gfp) { struct page *page; gfp |= __GFP_COMP; page = alloc_pages_node(pool->p.nid, gfp, pool->p.order); if (unlikely(!page)) return NULL; if (pool->dma_map && unlikely(!page_pool_dma_map(pool, page_to_netmem(page)))) { put_page(page); return NULL; } alloc_stat_inc(pool, slow_high_order); page_pool_set_pp_info(pool, page_to_netmem(page)); /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; trace_page_pool_state_hold(pool, page_to_netmem(page), pool->pages_state_hold_cnt); return page; } /* slow path */ static noinline netmem_ref __page_pool_alloc_pages_slow(struct page_pool *pool, gfp_t gfp) { const int bulk = PP_ALLOC_CACHE_REFILL; unsigned int pp_order = pool->p.order; bool dma_map = pool->dma_map; netmem_ref netmem; int i, nr_pages; /* Don't support bulk alloc for high-order pages */ if (unlikely(pp_order)) return page_to_netmem(__page_pool_alloc_page_order(pool, gfp)); /* Unnecessary as alloc cache is empty, but guarantees zero count */ if (unlikely(pool->alloc.count > 0)) return pool->alloc.cache[--pool->alloc.count]; /* Mark empty alloc.cache slots "empty" for alloc_pages_bulk */ memset(&pool->alloc.cache, 0, sizeof(void *) * bulk); nr_pages = alloc_pages_bulk_node(gfp, pool->p.nid, bulk, (struct page **)pool->alloc.cache); if (unlikely(!nr_pages)) return 0; /* Pages have been filled into alloc.cache array, but count is zero and * page element have not been (possibly) DMA mapped. */ for (i = 0; i < nr_pages; i++) { netmem = pool->alloc.cache[i]; if (dma_map && unlikely(!page_pool_dma_map(pool, netmem))) { put_page(netmem_to_page(netmem)); continue; } page_pool_set_pp_info(pool, netmem); pool->alloc.cache[pool->alloc.count++] = netmem; /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; trace_page_pool_state_hold(pool, netmem, pool->pages_state_hold_cnt); } /* Return last page */ if (likely(pool->alloc.count > 0)) { netmem = pool->alloc.cache[--pool->alloc.count]; alloc_stat_inc(pool, slow); } else { netmem = 0; } /* When page just alloc'ed is should/must have refcnt 1. */ return netmem; } /* For using page_pool replace: alloc_pages() API calls, but provide * synchronization guarantee for allocation side. */ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) { netmem_ref netmem; /* Fast-path: Get a page from cache */ netmem = __page_pool_get_cached(pool); if (netmem) return netmem; /* Slow-path: cache empty, do real allocation */ if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp); else netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; } EXPORT_SYMBOL(page_pool_alloc_netmems); ALLOW_ERROR_INJECTION(page_pool_alloc_netmems, NULL); struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) { return netmem_to_page(page_pool_alloc_netmems(pool, gfp)); } EXPORT_SYMBOL(page_pool_alloc_pages); /* Calculate distance between two u32 values, valid if distance is below 2^(31) * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution */ #define _distance(a, b) (s32)((a) - (b)) s32 page_pool_inflight(const struct page_pool *pool, bool strict) { u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); s32 inflight; inflight = _distance(hold_cnt, release_cnt); if (strict) { trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); } else { inflight = max(0, inflight); } return inflight; } void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) { netmem_set_pp(netmem, pool); netmem_or_pp_magic(netmem, PP_SIGNATURE); /* Ensuring all pages have been split into one fragment initially: * page_pool_set_pp_info() is only called once for every page when it * is allocated from the page allocator and page_pool_fragment_page() * is dirtying the same cache line as the page->pp_magic above, so * the overhead is negligible. */ page_pool_fragment_netmem(netmem, 1); if (pool->has_init_callback) pool->slow.init_callback(netmem, pool->slow.init_arg); } void page_pool_clear_pp_info(netmem_ref netmem) { netmem_clear_pp_magic(netmem); netmem_set_pp(netmem, NULL); } static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, netmem_ref netmem) { dma_addr_t dma; if (!pool->dma_map) /* Always account for inflight pages, even if we didn't * map them */ return; dma = page_pool_get_dma_addr_netmem(netmem); /* When page is unmapped, it cannot be returned to our pool */ dma_unmap_page_attrs(pool->p.dev, dma, PAGE_SIZE << pool->p.order, pool->p.dma_dir, DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); page_pool_set_dma_addr_netmem(netmem, 0); } /* Disconnects a page (from a page_pool). API users can have a need * to disconnect a page (from a page_pool), to allow it to be used as * a regular page (that will eventually be returned to the normal * page-allocator via put_page). */ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) { int count; bool put; put = true; if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) put = mp_dmabuf_devmem_release_page(pool, netmem); else __page_pool_release_page_dma(pool, netmem); /* This may be the last page returned, releasing the pool, so * it is not safe to reference pool afterwards. */ count = atomic_inc_return_relaxed(&pool->pages_state_release_cnt); trace_page_pool_state_release(pool, netmem, count); if (put) { page_pool_clear_pp_info(netmem); put_page(netmem_to_page(netmem)); } /* An optimization would be to call __free_pages(page, pool->p.order) * knowing page is not part of page-cache (thus avoiding a * __page_cache_release() call). */ } static bool page_pool_recycle_in_ring(struct page_pool *pool, netmem_ref netmem) { int ret; /* BH protection not needed if current is softirq */ if (in_softirq()) ret = ptr_ring_produce(&pool->ring, (__force void *)netmem); else ret = ptr_ring_produce_bh(&pool->ring, (__force void *)netmem); if (!ret) { recycle_stat_inc(pool, ring); return true; } return false; } /* Only allow direct recycling in special circumstances, into the * alloc side cache. E.g. during RX-NAPI processing for XDP_DROP use-case. * * Caller must provide appropriate safe context. */ static bool page_pool_recycle_in_cache(netmem_ref netmem, struct page_pool *pool) { if (unlikely(pool->alloc.count == PP_ALLOC_CACHE_SIZE)) { recycle_stat_inc(pool, cache_full); return false; } /* Caller MUST have verified/know (page_ref_count(page) == 1) */ pool->alloc.cache[pool->alloc.count++] = netmem; recycle_stat_inc(pool, cached); return true; } static bool __page_pool_page_can_be_recycled(netmem_ref netmem) { return netmem_is_net_iov(netmem) || (page_ref_count(netmem_to_page(netmem)) == 1 && !page_is_pfmemalloc(netmem_to_page(netmem))); } /* If the page refcnt == 1, this will try to recycle the page. * If pool->dma_sync is set, we'll try to sync the DMA area for * the configured size min(dma_sync_size, pool->max_len). * If the page refcnt != 1, then the page will be returned to memory * subsystem. */ static __always_inline netmem_ref __page_pool_put_page(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { lockdep_assert_no_hardirq(); /* This allocator is optimized for the XDP mode that uses * one-frame-per-page, but have fallbacks that act like the * regular page allocator APIs. * * refcnt == 1 means page_pool owns page, and can recycle it. * * page is NOT reusable when allocated when system is under * some pressure. (page_is_pfmemalloc) */ if (likely(__page_pool_page_can_be_recycled(netmem))) { /* Read barrier done in page_ref_count / READ_ONCE */ page_pool_dma_sync_for_device(pool, netmem, dma_sync_size); if (allow_direct && page_pool_recycle_in_cache(netmem, pool)) return 0; /* Page found as candidate for recycling */ return netmem; } /* Fallback/non-XDP mode: API user have elevated refcnt. * * Many drivers split up the page into fragments, and some * want to keep doing this to save memory and do refcnt based * recycling. Support this use case too, to ease drivers * switching between XDP/non-XDP. * * In-case page_pool maintains the DMA mapping, API user must * call page_pool_put_page once. In this elevated refcnt * case, the DMA is unmapped/released, as driver is likely * doing refcnt based recycle tricks, meaning another process * will be invoking put_page. */ recycle_stat_inc(pool, released_refcnt); page_pool_return_page(pool, netmem); return 0; } static bool page_pool_napi_local(const struct page_pool *pool) { const struct napi_struct *napi; u32 cpuid; if (unlikely(!in_softirq())) return false; /* Allow direct recycle if we have reasons to believe that we are * in the same context as the consumer would run, so there's * no possible race. * __page_pool_put_page() makes sure we're not in hardirq context * and interrupts are enabled prior to accessing the cache. */ cpuid = smp_processor_id(); if (READ_ONCE(pool->cpuid) == cpuid) return true; napi = READ_ONCE(pool->p.napi); return napi && READ_ONCE(napi->list_owner) == cpuid; } void page_pool_put_unrefed_netmem(struct page_pool *pool, netmem_ref netmem, unsigned int dma_sync_size, bool allow_direct) { if (!allow_direct) allow_direct = page_pool_napi_local(pool); netmem = __page_pool_put_page(pool, netmem, dma_sync_size, allow_direct); if (netmem && !page_pool_recycle_in_ring(pool, netmem)) { /* Cache full, fallback to free pages */ recycle_stat_inc(pool, ring_full); page_pool_return_page(pool, netmem); } } EXPORT_SYMBOL(page_pool_put_unrefed_netmem); void page_pool_put_unrefed_page(struct page_pool *pool, struct page *page, unsigned int dma_sync_size, bool allow_direct) { page_pool_put_unrefed_netmem(pool, page_to_netmem(page), dma_sync_size, allow_direct); } EXPORT_SYMBOL(page_pool_put_unrefed_page); static void page_pool_recycle_ring_bulk(struct page_pool *pool, netmem_ref *bulk, u32 bulk_len) { bool in_softirq; u32 i; /* Bulk produce into ptr_ring page_pool cache */ in_softirq = page_pool_producer_lock(pool); for (i = 0; i < bulk_len; i++) { if (__ptr_ring_produce(&pool->ring, (__force void *)bulk[i])) { /* ring full */ recycle_stat_inc(pool, ring_full); break; } } page_pool_producer_unlock(pool, in_softirq); recycle_stat_add(pool, ring, i); /* Hopefully all pages were returned into ptr_ring */ if (likely(i == bulk_len)) return; /* * ptr_ring cache is full, free remaining pages outside producer lock * since put_page() with refcnt == 1 can be an expensive operation. */ for (; i < bulk_len; i++) page_pool_return_page(pool, bulk[i]); } /** * page_pool_put_netmem_bulk() - release references on multiple netmems * @data: array holding netmem references * @count: number of entries in @data * * Tries to refill a number of netmems into the ptr_ring cache holding ptr_ring * producer lock. If the ptr_ring is full, page_pool_put_netmem_bulk() * will release leftover netmems to the memory provider. * page_pool_put_netmem_bulk() is suitable to be run inside the driver NAPI tx * completion loop for the XDP_REDIRECT use case. * * Please note the caller must not use data area after running * page_pool_put_netmem_bulk(), as this function overwrites it. */ void page_pool_put_netmem_bulk(netmem_ref *data, u32 count) { u32 bulk_len = 0; for (u32 i = 0; i < count; i++) { netmem_ref netmem = netmem_compound_head(data[i]); if (page_pool_unref_and_test(netmem)) data[bulk_len++] = netmem; } count = bulk_len; while (count) { netmem_ref bulk[XDP_BULK_QUEUE_SIZE]; struct page_pool *pool = NULL; bool allow_direct; u32 foreign = 0; bulk_len = 0; for (u32 i = 0; i < count; i++) { struct page_pool *netmem_pp; netmem_ref netmem = data[i]; netmem_pp = netmem_get_pp(netmem); if (unlikely(!pool)) { pool = netmem_pp; allow_direct = page_pool_napi_local(pool); } else if (netmem_pp != pool) { /* * If the netmem belongs to a different * page_pool, save it for another round. */ data[foreign++] = netmem; continue; } netmem = __page_pool_put_page(pool, netmem, -1, allow_direct); /* Approved for bulk recycling in ptr_ring cache */ if (netmem) bulk[bulk_len++] = netmem; } if (bulk_len) page_pool_recycle_ring_bulk(pool, bulk, bulk_len); count = foreign; } } EXPORT_SYMBOL(page_pool_put_netmem_bulk); static netmem_ref page_pool_drain_frag(struct page_pool *pool, netmem_ref netmem) { long drain_count = BIAS_MAX - pool->frag_users; /* Some user is still using the page frag */ if (likely(page_pool_unref_netmem(netmem, drain_count))) return 0; if (__page_pool_page_can_be_recycled(netmem)) { page_pool_dma_sync_for_device(pool, netmem, -1); return netmem; } page_pool_return_page(pool, netmem); return 0; } static void page_pool_free_frag(struct page_pool *pool) { long drain_count = BIAS_MAX - pool->frag_users; netmem_ref netmem = pool->frag_page; pool->frag_page = 0; if (!netmem || page_pool_unref_netmem(netmem, drain_count)) return; page_pool_return_page(pool, netmem); } netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp) { unsigned int max_size = PAGE_SIZE << pool->p.order; netmem_ref netmem = pool->frag_page; if (WARN_ON(size > max_size)) return 0; size = ALIGN(size, dma_get_cache_alignment()); *offset = pool->frag_offset; if (netmem && *offset + size > max_size) { netmem = page_pool_drain_frag(pool, netmem); if (netmem) { recycle_stat_inc(pool, cached); alloc_stat_inc(pool, fast); goto frag_reset; } } if (!netmem) { netmem = page_pool_alloc_netmems(pool, gfp); if (unlikely(!netmem)) { pool->frag_page = 0; return 0; } pool->frag_page = netmem; frag_reset: pool->frag_users = 1; *offset = 0; pool->frag_offset = size; page_pool_fragment_netmem(netmem, BIAS_MAX); return netmem; } pool->frag_users++; pool->frag_offset = *offset + size; return netmem; } EXPORT_SYMBOL(page_pool_alloc_frag_netmem); struct page *page_pool_alloc_frag(struct page_pool *pool, unsigned int *offset, unsigned int size, gfp_t gfp) { return netmem_to_page(page_pool_alloc_frag_netmem(pool, offset, size, gfp)); } EXPORT_SYMBOL(page_pool_alloc_frag); static void page_pool_empty_ring(struct page_pool *pool) { netmem_ref netmem; /* Empty recycle ring */ while ((netmem = (__force netmem_ref)ptr_ring_consume_bh(&pool->ring))) { /* Verify the refcnt invariant of cached pages */ if (!(netmem_ref_count(netmem) == 1)) pr_crit("%s() page_pool refcnt %d violation\n", __func__, netmem_ref_count(netmem)); page_pool_return_page(pool, netmem); } } static void __page_pool_destroy(struct page_pool *pool) { if (pool->disconnect) pool->disconnect(pool); page_pool_unlist(pool); page_pool_uninit(pool); if (pool->mp_priv) { mp_dmabuf_devmem_destroy(pool); static_branch_dec(&page_pool_mem_providers); } kfree(pool); } static void page_pool_empty_alloc_cache_once(struct page_pool *pool) { netmem_ref netmem; if (pool->destroy_cnt) return; /* Empty alloc cache, assume caller made sure this is * no-longer in use, and page_pool_alloc_pages() cannot be * call concurrently. */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; page_pool_return_page(pool, netmem); } } static void page_pool_scrub(struct page_pool *pool) { page_pool_empty_alloc_cache_once(pool); pool->destroy_cnt++; /* No more consumers should exist, but producers could still * be in-flight. */ page_pool_empty_ring(pool); } static int page_pool_release(struct page_pool *pool) { int inflight; page_pool_scrub(pool); inflight = page_pool_inflight(pool, true); if (!inflight) __page_pool_destroy(pool); return inflight; } static void page_pool_release_retry(struct work_struct *wq) { struct delayed_work *dwq = to_delayed_work(wq); struct page_pool *pool = container_of(dwq, typeof(*pool), release_dw); void *netdev; int inflight; inflight = page_pool_release(pool); if (!inflight) return; /* Periodic warning for page pools the user can't see */ netdev = READ_ONCE(pool->slow.netdev); if (time_after_eq(jiffies, pool->defer_warn) && (!netdev || netdev == NET_PTR_POISON)) { int sec = (s32)((u32)jiffies - (u32)pool->defer_start) / HZ; pr_warn("%s() stalled pool shutdown: id %u, %d inflight %d sec\n", __func__, pool->user.id, inflight, sec); pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; } /* Still not ready to be disconnected, retry later */ schedule_delayed_work(&pool->release_dw, DEFER_TIME); } void page_pool_use_xdp_mem(struct page_pool *pool, void (*disconnect)(void *), const struct xdp_mem_info *mem) { refcount_inc(&pool->user_cnt); pool->disconnect = disconnect; pool->xdp_mem_id = mem->id; } void page_pool_disable_direct_recycling(struct page_pool *pool) { /* Disable direct recycling based on pool->cpuid. * Paired with READ_ONCE() in page_pool_napi_local(). */ WRITE_ONCE(pool->cpuid, -1); if (!pool->p.napi) return; /* To avoid races with recycling and additional barriers make sure * pool and NAPI are unlinked when NAPI is disabled. */ WARN_ON(!test_bit(NAPI_STATE_SCHED, &pool->p.napi->state)); WARN_ON(READ_ONCE(pool->p.napi->list_owner) != -1); mutex_lock(&page_pools_lock); WRITE_ONCE(pool->p.napi, NULL); mutex_unlock(&page_pools_lock); } EXPORT_SYMBOL(page_pool_disable_direct_recycling); void page_pool_destroy(struct page_pool *pool) { if (!pool) return; if (!page_pool_put(pool)) return; page_pool_disable_direct_recycling(pool); page_pool_free_frag(pool); if (!page_pool_release(pool)) return; page_pool_detached(pool); pool->defer_start = jiffies; pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; INIT_DELAYED_WORK(&pool->release_dw, page_pool_release_retry); schedule_delayed_work(&pool->release_dw, DEFER_TIME); } EXPORT_SYMBOL(page_pool_destroy); /* Caller must provide appropriate safe context, e.g. NAPI. */ void page_pool_update_nid(struct page_pool *pool, int new_nid) { netmem_ref netmem; trace_page_pool_update_nid(pool, new_nid); pool->p.nid = new_nid; /* Flush pool alloc cache, as refill will check NUMA node */ while (pool->alloc.count) { netmem = pool->alloc.cache[--pool->alloc.count]; page_pool_return_page(pool, netmem); } } EXPORT_SYMBOL(page_pool_update_nid);
41 41 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-or-later /* rxrpc network namespace handling. * * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/proc_fs.h> #include "ar-internal.h" unsigned int rxrpc_net_id; static void rxrpc_service_conn_reap_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = container_of(timer, struct rxrpc_net, service_conn_reap_timer); if (rxnet->live) rxrpc_queue_work(&rxnet->service_conn_reaper); } static void rxrpc_peer_keepalive_timeout(struct timer_list *timer) { struct rxrpc_net *rxnet = container_of(timer, struct rxrpc_net, peer_keepalive_timer); if (rxnet->live) rxrpc_queue_work(&rxnet->peer_keepalive_work); } /* * Initialise a per-network namespace record. */ static __net_init int rxrpc_init_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); int ret, i; rxnet->live = true; get_random_bytes(&rxnet->epoch, sizeof(rxnet->epoch)); rxnet->epoch |= RXRPC_RANDOM_EPOCH; INIT_LIST_HEAD(&rxnet->calls); spin_lock_init(&rxnet->call_lock); atomic_set(&rxnet->nr_calls, 1); atomic_set(&rxnet->nr_conns, 1); INIT_LIST_HEAD(&rxnet->bundle_proc_list); INIT_LIST_HEAD(&rxnet->conn_proc_list); INIT_LIST_HEAD(&rxnet->service_conns); rwlock_init(&rxnet->conn_lock); INIT_WORK(&rxnet->service_conn_reaper, rxrpc_service_connection_reaper); timer_setup(&rxnet->service_conn_reap_timer, rxrpc_service_conn_reap_timeout, 0); atomic_set(&rxnet->nr_client_conns, 0); INIT_HLIST_HEAD(&rxnet->local_endpoints); mutex_init(&rxnet->local_mutex); hash_init(rxnet->peer_hash); spin_lock_init(&rxnet->peer_hash_lock); for (i = 0; i < ARRAY_SIZE(rxnet->peer_keepalive); i++) INIT_LIST_HEAD(&rxnet->peer_keepalive[i]); INIT_LIST_HEAD(&rxnet->peer_keepalive_new); timer_setup(&rxnet->peer_keepalive_timer, rxrpc_peer_keepalive_timeout, 0); INIT_WORK(&rxnet->peer_keepalive_work, rxrpc_peer_keepalive_worker); rxnet->peer_keepalive_base = ktime_get_seconds(); ret = -ENOMEM; rxnet->proc_net = proc_net_mkdir(net, "rxrpc", net->proc_net); if (!rxnet->proc_net) goto err_proc; proc_create_net("calls", 0444, rxnet->proc_net, &rxrpc_call_seq_ops, sizeof(struct seq_net_private)); proc_create_net("conns", 0444, rxnet->proc_net, &rxrpc_connection_seq_ops, sizeof(struct seq_net_private)); proc_create_net("bundles", 0444, rxnet->proc_net, &rxrpc_bundle_seq_ops, sizeof(struct seq_net_private)); proc_create_net("peers", 0444, rxnet->proc_net, &rxrpc_peer_seq_ops, sizeof(struct seq_net_private)); proc_create_net("locals", 0444, rxnet->proc_net, &rxrpc_local_seq_ops, sizeof(struct seq_net_private)); proc_create_net_single_write("stats", S_IFREG | 0644, rxnet->proc_net, rxrpc_stats_show, rxrpc_stats_clear, NULL); return 0; err_proc: rxnet->live = false; return ret; } /* * Clean up a per-network namespace record. */ static __net_exit void rxrpc_exit_net(struct net *net) { struct rxrpc_net *rxnet = rxrpc_net(net); rxnet->live = false; del_timer_sync(&rxnet->peer_keepalive_timer); cancel_work_sync(&rxnet->peer_keepalive_work); /* Remove the timer again as the worker may have restarted it. */ del_timer_sync(&rxnet->peer_keepalive_timer); rxrpc_destroy_all_calls(rxnet); rxrpc_destroy_all_connections(rxnet); rxrpc_destroy_all_peers(rxnet); rxrpc_destroy_all_locals(rxnet); proc_remove(rxnet->proc_net); } struct pernet_operations rxrpc_net_ops = { .init = rxrpc_init_net, .exit = rxrpc_exit_net, .id = &rxrpc_net_id, .size = sizeof(struct rxrpc_net), };
420 420 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_TEXT_PATCHING_H #define _ASM_X86_TEXT_PATCHING_H #include <linux/types.h> #include <linux/stddef.h> #include <asm/ptrace.h> /* * Currently, the max observed size in the kernel code is * JUMP_LABEL_NOP_SIZE/RELATIVEJUMP_SIZE, which are 5. * Raise it if needed. */ #define POKE_MAX_OPCODE_SIZE 5 extern void text_poke_early(void *addr, const void *opcode, size_t len); extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len); /* * Clear and restore the kernel write-protection flag on the local CPU. * Allows the kernel to edit read-only pages. * Side-effect: any interrupt handler running between save and restore will have * the ability to write to read-only pages. * * Warning: * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and * no thread can be preempted in the instructions being modified (no iret to an * invalid instruction possible) or if the instructions are changed from a * consistent state to another consistent state atomically. * On the local CPU you need to be protected against NMI or MCE handlers seeing * an inconsistent instruction while you patch. */ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); #define text_poke_copy text_poke_copy extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); extern void text_poke_queue(void *addr, const void *opcode, size_t len, const void *emulate); extern void text_poke_finish(void); #define INT3_INSN_SIZE 1 #define INT3_INSN_OPCODE 0xCC #define RET_INSN_SIZE 1 #define RET_INSN_OPCODE 0xC3 #define CALL_INSN_SIZE 5 #define CALL_INSN_OPCODE 0xE8 #define JMP32_INSN_SIZE 5 #define JMP32_INSN_OPCODE 0xE9 #define JMP8_INSN_SIZE 2 #define JMP8_INSN_OPCODE 0xEB #define DISP32_SIZE 4 static __always_inline int text_opcode_size(u8 opcode) { int size = 0; #define __CASE(insn) \ case insn##_INSN_OPCODE: size = insn##_INSN_SIZE; break switch(opcode) { __CASE(INT3); __CASE(RET); __CASE(CALL); __CASE(JMP32); __CASE(JMP8); } #undef __CASE return size; } union text_poke_insn { u8 text[POKE_MAX_OPCODE_SIZE]; struct { u8 opcode; s32 disp; } __attribute__((packed)); }; static __always_inline void __text_gen_insn(void *buf, u8 opcode, const void *addr, const void *dest, int size) { union text_poke_insn *insn = buf; BUG_ON(size < text_opcode_size(opcode)); /* * Hide the addresses to avoid the compiler folding in constants when * referencing code, these can mess up annotations like * ANNOTATE_NOENDBR. */ OPTIMIZER_HIDE_VAR(insn); OPTIMIZER_HIDE_VAR(addr); OPTIMIZER_HIDE_VAR(dest); insn->opcode = opcode; if (size > 1) { insn->disp = (long)dest - (long)(addr + size); if (size == 2) { /* * Ensure that for JMP8 the displacement * actually fits the signed byte. */ BUG_ON((insn->disp >> 31) != (insn->disp >> 7)); } } } static __always_inline void *text_gen_insn(u8 opcode, const void *addr, const void *dest) { static union text_poke_insn insn; /* per instance */ __text_gen_insn(&insn, opcode, addr, dest, text_opcode_size(opcode)); return &insn.text; } extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; #ifndef CONFIG_UML_X86 static __always_inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip) { regs->ip = ip; } static __always_inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { /* * The int3 handler in entry_64.S adds a gap between the * stack where the break point happened, and the saving of * pt_regs. We can extend the original stack because of * this gap. See the idtentry macro's create_gap option. * * Similarly entry_32.S will have a gap on the stack for (any) hardware * exception and pt_regs; see FIXUP_FRAME. */ regs->sp -= sizeof(unsigned long); *(unsigned long *)regs->sp = val; } static __always_inline unsigned long int3_emulate_pop(struct pt_regs *regs) { unsigned long val = *(unsigned long *)regs->sp; regs->sp += sizeof(unsigned long); return val; } static __always_inline void int3_emulate_call(struct pt_regs *regs, unsigned long func) { int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE); int3_emulate_jmp(regs, func); } static __always_inline void int3_emulate_ret(struct pt_regs *regs) { unsigned long ip = int3_emulate_pop(regs); int3_emulate_jmp(regs, ip); } static __always_inline void int3_emulate_jcc(struct pt_regs *regs, u8 cc, unsigned long ip, unsigned long disp) { static const unsigned long jcc_mask[6] = { [0] = X86_EFLAGS_OF, [1] = X86_EFLAGS_CF, [2] = X86_EFLAGS_ZF, [3] = X86_EFLAGS_CF | X86_EFLAGS_ZF, [4] = X86_EFLAGS_SF, [5] = X86_EFLAGS_PF, }; bool invert = cc & 1; bool match; if (cc < 0xc) { match = regs->flags & jcc_mask[cc >> 1]; } else { match = ((regs->flags & X86_EFLAGS_SF) >> X86_EFLAGS_SF_BIT) ^ ((regs->flags & X86_EFLAGS_OF) >> X86_EFLAGS_OF_BIT); if (cc >= 0xe) match = match || (regs->flags & X86_EFLAGS_ZF); } if ((match && !invert) || (!match && invert)) ip += disp; int3_emulate_jmp(regs, ip); } #endif /* !CONFIG_UML_X86 */ #endif /* _ASM_X86_TEXT_PATCHING_H */
17 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Landlock LSM - Credential hooks * * Copyright © 2019-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2019-2020 ANSSI */ #ifndef _SECURITY_LANDLOCK_CRED_H #define _SECURITY_LANDLOCK_CRED_H #include <linux/cred.h> #include <linux/init.h> #include <linux/rcupdate.h> #include "ruleset.h" #include "setup.h" struct landlock_cred_security { struct landlock_ruleset *domain; }; static inline struct landlock_cred_security * landlock_cred(const struct cred *cred) { return cred->security + landlock_blob_sizes.lbs_cred; } static inline struct landlock_ruleset *landlock_get_current_domain(void) { return landlock_cred(current_cred())->domain; } /* * The call needs to come from an RCU read-side critical section. */ static inline const struct landlock_ruleset * landlock_get_task_domain(const struct task_struct *const task) { return landlock_cred(__task_cred(task))->domain; } static inline bool landlocked(const struct task_struct *const task) { bool has_dom; if (task == current) return !!landlock_get_current_domain(); rcu_read_lock(); has_dom = !!landlock_get_task_domain(task); rcu_read_unlock(); return has_dom; } __init void landlock_add_cred_hooks(void); #endif /* _SECURITY_LANDLOCK_CRED_H */
11 2847 2845 15932 121 11517 2850 2799 14924 119 14917 2931 14847 9073 14464 14454 5569 5569 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 /* SPDX-License-Identifier: GPL-2.0 */ /* * Variant of atomic_t specialized for reference counts. * * The interface matches the atomic_t interface (to aid in porting) but only * provides the few functions one should use for reference counting. * * Saturation semantics * ==================== * * refcount_t differs from atomic_t in that the counter saturates at * REFCOUNT_SATURATED and will not move once there. This avoids wrapping the * counter and causing 'spurious' use-after-free issues. In order to avoid the * cost associated with introducing cmpxchg() loops into all of the saturating * operations, we temporarily allow the counter to take on an unchecked value * and then explicitly set it to REFCOUNT_SATURATED on detecting that underflow * or overflow has occurred. Although this is racy when multiple threads * access the refcount concurrently, by placing REFCOUNT_SATURATED roughly * equidistant from 0 and INT_MAX we minimise the scope for error: * * INT_MAX REFCOUNT_SATURATED UINT_MAX * 0 (0x7fff_ffff) (0xc000_0000) (0xffff_ffff) * +--------------------------------+----------------+----------------+ * <---------- bad value! ----------> * * (in a signed view of the world, the "bad value" range corresponds to * a negative counter value). * * As an example, consider a refcount_inc() operation that causes the counter * to overflow: * * int old = atomic_fetch_add_relaxed(r); * // old is INT_MAX, refcount now INT_MIN (0x8000_0000) * if (old < 0) * atomic_set(r, REFCOUNT_SATURATED); * * If another thread also performs a refcount_inc() operation between the two * atomic operations, then the count will continue to edge closer to 0. If it * reaches a value of 1 before /any/ of the threads reset it to the saturated * value, then a concurrent refcount_dec_and_test() may erroneously free the * underlying object. * Linux limits the maximum number of tasks to PID_MAX_LIMIT, which is currently * 0x400000 (and can't easily be raised in the future beyond FUTEX_TID_MASK). * With the current PID limit, if no batched refcounting operations are used and * the attacker can't repeatedly trigger kernel oopses in the middle of refcount * operations, this makes it impossible for a saturated refcount to leave the * saturation range, even if it is possible for multiple uses of the same * refcount to nest in the context of a single task: * * (UINT_MAX+1-REFCOUNT_SATURATED) / PID_MAX_LIMIT = * 0x40000000 / 0x400000 = 0x100 = 256 * * If hundreds of references are added/removed with a single refcounting * operation, it may potentially be possible to leave the saturation range; but * given the precise timing details involved with the round-robin scheduling of * each thread manipulating the refcount and the need to hit the race multiple * times in succession, there doesn't appear to be a practical avenue of attack * even if using refcount_add() operations with larger increments. * * Memory ordering * =============== * * Memory ordering rules are slightly relaxed wrt regular atomic_t functions * and provide only what is strictly required for refcounts. * * The increments are fully relaxed; these will not provide ordering. The * rationale is that whatever is used to obtain the object we're increasing the * reference count on will provide the ordering. For locked data structures, * its the lock acquire, for RCU/lockless data structures its the dependent * load. * * Do note that inc_not_zero() provides a control dependency which will order * future stores against the inc, this ensures we'll never modify the object * if we did not in fact acquire a reference. * * The decrements will provide release order, such that all the prior loads and * stores will be issued before, it also provides a control dependency, which * will order us against the subsequent free(). * * The control dependency is against the load of the cmpxchg (ll/sc) that * succeeded. This means the stores aren't fully ordered, but this is fine * because the 1->0 transition indicates no concurrency. * * Note that the allocator is responsible for ordering things between free() * and alloc(). * * The decrements dec_and_test() and sub_and_test() also provide acquire * ordering on success. * */ #ifndef _LINUX_REFCOUNT_H #define _LINUX_REFCOUNT_H #include <linux/atomic.h> #include <linux/bug.h> #include <linux/compiler.h> #include <linux/limits.h> #include <linux/refcount_types.h> #include <linux/spinlock_types.h> struct mutex; #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } #define REFCOUNT_MAX INT_MAX #define REFCOUNT_SATURATED (INT_MIN / 2) enum refcount_saturation_type { REFCOUNT_ADD_NOT_ZERO_OVF, REFCOUNT_ADD_OVF, REFCOUNT_ADD_UAF, REFCOUNT_SUB_UAF, REFCOUNT_DEC_LEAK, }; void refcount_warn_saturate(refcount_t *r, enum refcount_saturation_type t); /** * refcount_set - set a refcount's value * @r: the refcount * @n: value to which the refcount will be set */ static inline void refcount_set(refcount_t *r, int n) { atomic_set(&r->refs, n); } /** * refcount_read - get a refcount's value * @r: the refcount * * Return: the refcount's value */ static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); } static inline __must_check __signed_wrap bool __refcount_add_not_zero(int i, refcount_t *r, int *oldp) { int old = refcount_read(r); do { if (!old) break; } while (!atomic_try_cmpxchg_relaxed(&r->refs, &old, old + i)); if (oldp) *oldp = old; if (unlikely(old < 0 || old + i < 0)) refcount_warn_saturate(r, REFCOUNT_ADD_NOT_ZERO_OVF); return old; } /** * refcount_add_not_zero - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * * Will saturate at REFCOUNT_SATURATED and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency * and thereby orders future stores. See the comment on top. * * Use of this function is not recommended for the normal reference counting * use case in which references are taken and released one at a time. In these * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. * * Return: false if the passed refcount is 0, true otherwise */ static inline __must_check bool refcount_add_not_zero(int i, refcount_t *r) { return __refcount_add_not_zero(i, r, NULL); } static inline __signed_wrap void __refcount_add(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_add_relaxed(i, &r->refs); if (oldp) *oldp = old; if (unlikely(!old)) refcount_warn_saturate(r, REFCOUNT_ADD_UAF); else if (unlikely(old < 0 || old + i < 0)) refcount_warn_saturate(r, REFCOUNT_ADD_OVF); } /** * refcount_add - add a value to a refcount * @i: the value to add to the refcount * @r: the refcount * * Similar to atomic_add(), but will saturate at REFCOUNT_SATURATED and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency * and thereby orders future stores. See the comment on top. * * Use of this function is not recommended for the normal reference counting * use case in which references are taken and released one at a time. In these * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. */ static inline void refcount_add(int i, refcount_t *r) { __refcount_add(i, r, NULL); } static inline __must_check bool __refcount_inc_not_zero(refcount_t *r, int *oldp) { return __refcount_add_not_zero(1, r, oldp); } /** * refcount_inc_not_zero - increment a refcount unless it is 0 * @r: the refcount to increment * * Similar to atomic_inc_not_zero(), but will saturate at REFCOUNT_SATURATED * and WARN. * * Provides no memory ordering, it is assumed the caller has guaranteed the * object memory to be stable (RCU, etc.). It does provide a control dependency * and thereby orders future stores. See the comment on top. * * Return: true if the increment was successful, false otherwise */ static inline __must_check bool refcount_inc_not_zero(refcount_t *r) { return __refcount_inc_not_zero(r, NULL); } static inline void __refcount_inc(refcount_t *r, int *oldp) { __refcount_add(1, r, oldp); } /** * refcount_inc - increment a refcount * @r: the refcount to increment * * Similar to atomic_inc(), but will saturate at REFCOUNT_SATURATED and WARN. * * Provides no memory ordering, it is assumed the caller already has a * reference on the object. * * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ static inline void refcount_inc(refcount_t *r) { __refcount_inc(r, NULL); } static inline __must_check __signed_wrap bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(i, &r->refs); if (oldp) *oldp = old; if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; } /** * refcount_sub_and_test - subtract from a refcount and test if it is 0 * @i: amount to subtract from the refcount * @r: the refcount * * Similar to atomic_dec_and_test(), but it will WARN, return false and * ultimately leak on underflow and will fail to decrement when saturated * at REFCOUNT_SATURATED. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides an acquire ordering on success such that free() * must come after. * * Use of this function is not recommended for the normal reference counting * use case in which references are taken and released one at a time. In these * cases, refcount_dec(), or one of its variants, should instead be used to * decrement a reference count. * * Return: true if the resulting refcount is 0, false otherwise */ static inline __must_check bool refcount_sub_and_test(int i, refcount_t *r) { return __refcount_sub_and_test(i, r, NULL); } static inline __must_check bool __refcount_dec_and_test(refcount_t *r, int *oldp) { return __refcount_sub_and_test(1, r, oldp); } /** * refcount_dec_and_test - decrement a refcount and test if it is 0 * @r: the refcount * * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to * decrement when saturated at REFCOUNT_SATURATED. * * Provides release memory ordering, such that prior loads and stores are done * before, and provides an acquire ordering on success such that free() * must come after. * * Return: true if the resulting refcount is 0, false otherwise */ static inline __must_check bool refcount_dec_and_test(refcount_t *r) { return __refcount_dec_and_test(r, NULL); } static inline void __refcount_dec(refcount_t *r, int *oldp) { int old = atomic_fetch_sub_release(1, &r->refs); if (oldp) *oldp = old; if (unlikely(old <= 1)) refcount_warn_saturate(r, REFCOUNT_DEC_LEAK); } /** * refcount_dec - decrement a refcount * @r: the refcount * * Similar to atomic_dec(), it will WARN on underflow and fail to decrement * when saturated at REFCOUNT_SATURATED. * * Provides release memory ordering, such that prior loads and stores are done * before. */ static inline void refcount_dec(refcount_t *r) { __refcount_dec(r, NULL); } extern __must_check bool refcount_dec_if_one(refcount_t *r); extern __must_check bool refcount_dec_not_one(refcount_t *r); extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock); extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock); extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, unsigned long *flags) __cond_acquires(lock); #endif /* _LINUX_REFCOUNT_H */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VMALLOC_H #define _LINUX_VMALLOC_H #include <linux/alloc_tag.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/list.h> #include <linux/llist.h> #include <asm/page.h> /* pgprot_t */ #include <linux/rbtree.h> #include <linux/overflow.h> #include <asm/vmalloc.h> struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ struct iov_iter; /* in uio.h */ /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ #define VM_ALLOC 0x00000002 /* vmalloc() */ #define VM_MAP 0x00000004 /* vmap()ed pages */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) #define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ #else #define VM_DEFER_KMEMLEAK 0 #endif #define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ /* * Maximum alignment for ioremap() regions. * Can be overridden by arch-specific value. */ #ifndef IOREMAP_MAX_ORDER #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ #endif struct vm_struct { struct vm_struct *next; void *addr; unsigned long size; unsigned long flags; struct page **pages; #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC unsigned int page_order; #endif unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; }; struct vmap_area { unsigned long va_start; unsigned long va_end; struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ /* * The following two variables can be packed, because * a vmap_area object can be either: * 1) in "free" tree (root is free_vmap_area_root) * 2) or "busy" tree (root is vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ }; unsigned long flags; /* mark type of vm_map_ram area */ }; /* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */ #ifndef arch_vmap_p4d_supported static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pmd_supported static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pte_range_map_size static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, u64 pfn, unsigned int max_page_shift) { return PAGE_SIZE; } #endif #ifndef arch_vmap_pte_supported_shift static inline int arch_vmap_pte_supported_shift(unsigned long size) { return PAGE_SHIFT; } #endif #ifndef arch_vmap_pgprot_tagged static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) { return prot; } #endif /* * Highlevel APIs for driver use */ extern void vm_unmap_ram(const void *mem, unsigned int count); extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void); extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); #define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) extern void *vzalloc_noprof(unsigned long size) __alloc_size(1); #define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__)) extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1); #define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__)) extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1); #define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__)) extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1); #define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__)) extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1); #define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__)) extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1); #define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__)) extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); #define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__)) extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) __alloc_size(1); #define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__)) void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); #define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__)) extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__)) extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) void * __must_check vrealloc_noprof(const void *p, size_t size, gfp_t flags) __realloc_size(2); #define vrealloc(...) alloc_hooks(vrealloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot); extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift); /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() * needs to be called. */ #ifndef ARCH_PAGE_TABLE_SYNC_MASK #define ARCH_PAGE_TABLE_SYNC_MASK 0 #endif /* * There is no default implementation for arch_sync_kernel_mappings(). It is * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK * is 0. */ void arch_sync_kernel_mappings(unsigned long start, unsigned long end); /* * Lowlevel-APIs (not for driver use!) */ static inline size_t get_vm_area_size(const struct vm_struct *area) { if (!(area->flags & VM_NO_GUARD)) /* return actual size without guard page */ return area->size - PAGE_SIZE; else return area->size; } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller); void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); struct vmap_area *find_vmap_area(unsigned long addr); static inline bool is_vm_area_hugepages(const void *addr) { /* * This may not 100% tell if the area is mapped with > PAGE_SIZE * page table entries, if for some reason the architecture indicates * larger sizes are available but decides not to use them, nothing * prevents that. This only indicates the size of the physical page * allocated in the vmalloc layer. */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC return find_vm_area(addr)->page_order > 0; #else return false; #endif } /* for /proc/kcore */ long vread_iter(struct iov_iter *iter, const char *addr, size_t count); /* * Internals. Don't use.. */ __init void vm_area_add_early(struct vm_struct *vm); __init void vm_area_register_early(struct vm_struct *vm, size_t align); int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); #ifdef CONFIG_MMU #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) unsigned long vmalloc_nr_pages(void); int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } #else /* !CONFIG_MMU */ #define VMALLOC_TOTAL 0UL static inline unsigned long vmalloc_nr_pages(void) { return 0; } static inline void set_vm_flush_reset_perms(void *addr) {} #endif /* CONFIG_MMU */ #if defined(CONFIG_MMU) && defined(CONFIG_SMP) struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); # else static inline struct vm_struct ** pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align) { return NULL; } static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) {} #endif #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else static inline bool vmalloc_dump_obj(void *object) { return false; } #endif #endif /* _LINUX_VMALLOC_H */
17 1 2 14 16 4 4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 /* * Routines to compress and uncompress tcp packets (for transmission * over low speed serial lines). * * Copyright (c) 1989 Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms are permitted * provided that the above copyright notice and this paragraph are * duplicated in all such forms and that any documentation, * advertising materials, and other materials related to such * distribution and use acknowledge that the software was developed * by the University of California, Berkeley. The name of the * University may not be used to endorse or promote products derived * from this software without specific prior written permission. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * * Van Jacobson (van@helios.ee.lbl.gov), Dec 31, 1989: * - Initial distribution. * * * modified for KA9Q Internet Software Package by * Katie Stevens (dkstevens@ucdavis.edu) * University of California, Davis * Computing Services * - 01-31-90 initial adaptation (from 1.19) * PPP.05 02-15-90 [ks] * PPP.08 05-02-90 [ks] use PPP protocol field to signal compression * PPP.15 09-90 [ks] improve mbuf handling * PPP.16 11-02 [karn] substantially rewritten to use NOS facilities * * - Feb 1991 Bill_Simpson@um.cc.umich.edu * variable number of conversation slots * allow zero or one slots * separate routines * status display * - Jul 1994 Dmitry Gorodchanin * Fixes for memory leaks. * - Oct 1994 Dmitry Gorodchanin * Modularization. * - Jan 1995 Bjorn Ekwall * Use ip_fast_csum from ip.h * - July 1995 Christos A. Polyzols * Spotted bug in tcp option checking * * * This module is a difficult issue. It's clearly inet code but it's also clearly * driver code belonging close to PPP and SLIP */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/kernel.h> #include <net/slhc_vj.h> #ifdef CONFIG_INET /* Entire module is for IP only */ #include <linux/mm.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/termios.h> #include <linux/in.h> #include <linux/fcntl.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/protocol.h> #include <net/icmp.h> #include <net/tcp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/timer.h> #include <linux/uaccess.h> #include <net/checksum.h> #include <linux/unaligned.h> static unsigned char *encode(unsigned char *cp, unsigned short n); static long decode(unsigned char **cpp); static unsigned char * put16(unsigned char *cp, unsigned short x); static unsigned short pull16(unsigned char **cpp); /* Allocate compression data structure * slots must be in range 0 to 255 (zero meaning no compression) * Returns pointer to structure or ERR_PTR() on error. */ struct slcompress * slhc_init(int rslots, int tslots) { short i; struct cstate *ts; struct slcompress *comp; if (rslots < 0 || rslots > 255 || tslots < 0 || tslots > 255) return ERR_PTR(-EINVAL); comp = kzalloc(sizeof(struct slcompress), GFP_KERNEL); if (! comp) goto out_fail; if (rslots > 0) { size_t rsize = rslots * sizeof(struct cstate); comp->rstate = kzalloc(rsize, GFP_KERNEL); if (! comp->rstate) goto out_free; comp->rslot_limit = rslots - 1; } if (tslots > 0) { size_t tsize = tslots * sizeof(struct cstate); comp->tstate = kzalloc(tsize, GFP_KERNEL); if (! comp->tstate) goto out_free2; comp->tslot_limit = tslots - 1; } comp->xmit_oldest = 0; comp->xmit_current = 255; comp->recv_current = 255; /* * don't accept any packets with implicit index until we get * one with an explicit index. Otherwise the uncompress code * will try to use connection 255, which is almost certainly * out of range */ comp->flags |= SLF_TOSS; if ( tslots > 0 ) { ts = comp->tstate; for(i = comp->tslot_limit; i > 0; --i){ ts[i].cs_this = i; ts[i].next = &(ts[i - 1]); } ts[0].next = &(ts[comp->tslot_limit]); ts[0].cs_this = 0; } return comp; out_free2: kfree(comp->rstate); out_free: kfree(comp); out_fail: return ERR_PTR(-ENOMEM); } /* Free a compression data structure */ void slhc_free(struct slcompress *comp) { if ( IS_ERR_OR_NULL(comp) ) return; if ( comp->tstate != NULLSLSTATE ) kfree( comp->tstate ); if ( comp->rstate != NULLSLSTATE ) kfree( comp->rstate ); kfree( comp ); } /* Put a short in host order into a char array in network order */ static inline unsigned char * put16(unsigned char *cp, unsigned short x) { *cp++ = x >> 8; *cp++ = x; return cp; } /* Encode a number */ static unsigned char * encode(unsigned char *cp, unsigned short n) { if(n >= 256 || n == 0){ *cp++ = 0; cp = put16(cp,n); } else { *cp++ = n; } return cp; } /* Pull a 16-bit integer in host order from buffer in network byte order */ static unsigned short pull16(unsigned char **cpp) { short rval; rval = *(*cpp)++; rval <<= 8; rval |= *(*cpp)++; return rval; } /* Decode a number */ static long decode(unsigned char **cpp) { int x; x = *(*cpp)++; if(x == 0){ return pull16(cpp) & 0xffff; /* pull16 returns -1 on error */ } else { return x & 0xff; /* -1 if PULLCHAR returned error */ } } /* * icp and isize are the original packet. * ocp is a place to put a copy if necessary. * cpp is initially a pointer to icp. If the copy is used, * change it to ocp. */ int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, unsigned char *ocp, unsigned char **cpp, int compress_cid) { struct cstate *ocs = &(comp->tstate[comp->xmit_oldest]); struct cstate *lcs = ocs; struct cstate *cs = lcs->next; unsigned long deltaS, deltaA; short changes = 0; int nlen, hlen; unsigned char new_seq[16]; unsigned char *cp = new_seq; struct iphdr *ip; struct tcphdr *th, *oth; __sum16 csum; /* * Don't play with runt packets. */ if(isize<sizeof(struct iphdr)) return isize; ip = (struct iphdr *) icp; if (ip->version != 4 || ip->ihl < 5) return isize; /* Bail if this packet isn't TCP, or is an IP fragment */ if (ip->protocol != IPPROTO_TCP || (ntohs(ip->frag_off) & 0x3fff)) { /* Send as regular IP */ if(ip->protocol != IPPROTO_TCP) comp->sls_o_nontcp++; else comp->sls_o_tcp++; return isize; } nlen = ip->ihl * 4; if (isize < nlen + sizeof(*th)) return isize; th = (struct tcphdr *)(icp + nlen); if (th->doff < sizeof(struct tcphdr) / 4) return isize; hlen = nlen + th->doff * 4; /* Bail if the TCP packet isn't `compressible' (i.e., ACK isn't set or * some other control bit is set). Also uncompressible if * it's a runt. */ if(hlen > isize || th->syn || th->fin || th->rst || ! (th->ack)){ /* TCP connection stuff; send as regular IP */ comp->sls_o_tcp++; return isize; } /* * Packet is compressible -- we're going to send either a * COMPRESSED_TCP or UNCOMPRESSED_TCP packet. Either way, * we need to locate (or create) the connection state. * * States are kept in a circularly linked list with * xmit_oldest pointing to the end of the list. The * list is kept in lru order by moving a state to the * head of the list whenever it is referenced. Since * the list is short and, empirically, the connection * we want is almost always near the front, we locate * states via linear search. If we don't find a state * for the datagram, the oldest state is (re-)used. */ for ( ; ; ) { if( ip->saddr == cs->cs_ip.saddr && ip->daddr == cs->cs_ip.daddr && th->source == cs->cs_tcp.source && th->dest == cs->cs_tcp.dest) goto found; /* if current equal oldest, at end of list */ if ( cs == ocs ) break; lcs = cs; cs = cs->next; comp->sls_o_searches++; } /* * Didn't find it -- re-use oldest cstate. Send an * uncompressed packet that tells the other side what * connection number we're using for this conversation. * * Note that since the state list is circular, the oldest * state points to the newest and we only need to set * xmit_oldest to update the lru linkage. */ comp->sls_o_misses++; comp->xmit_oldest = lcs->cs_this; goto uncompressed; found: /* * Found it -- move to the front on the connection list. */ if(lcs == ocs) { /* found at most recently used */ } else if (cs == ocs) { /* found at least recently used */ comp->xmit_oldest = lcs->cs_this; } else { /* more than 2 elements */ lcs->next = cs->next; cs->next = ocs->next; ocs->next = cs; } /* * Make sure that only what we expect to change changed. * Check the following: * IP protocol version, header length & type of service. * The "Don't fragment" bit. * The time-to-live field. * The TCP header length. * IP options, if any. * TCP options, if any. * If any of these things are different between the previous & * current datagram, we send the current datagram `uncompressed'. */ oth = &cs->cs_tcp; if(ip->version != cs->cs_ip.version || ip->ihl != cs->cs_ip.ihl || ip->tos != cs->cs_ip.tos || (ip->frag_off & htons(0x4000)) != (cs->cs_ip.frag_off & htons(0x4000)) || ip->ttl != cs->cs_ip.ttl || th->doff != cs->cs_tcp.doff || (ip->ihl > 5 && memcmp(ip+1,cs->cs_ipopt,((ip->ihl)-5)*4) != 0) || (th->doff > 5 && memcmp(th+1,cs->cs_tcpopt,((th->doff)-5)*4) != 0)){ goto uncompressed; } /* * Figure out which of the changing fields changed. The * receiver expects changes in the order: urgent, window, * ack, seq (the order minimizes the number of temporaries * needed in this section of code). */ if(th->urg){ deltaS = ntohs(th->urg_ptr); cp = encode(cp,deltaS); changes |= NEW_U; } else if(th->urg_ptr != oth->urg_ptr){ /* argh! URG not set but urp changed -- a sensible * implementation should never do this but RFC793 * doesn't prohibit the change so we have to deal * with it. */ goto uncompressed; } if((deltaS = ntohs(th->window) - ntohs(oth->window)) != 0){ cp = encode(cp,deltaS); changes |= NEW_W; } if((deltaA = ntohl(th->ack_seq) - ntohl(oth->ack_seq)) != 0L){ if(deltaA > 0x0000ffff) goto uncompressed; cp = encode(cp,deltaA); changes |= NEW_A; } if((deltaS = ntohl(th->seq) - ntohl(oth->seq)) != 0L){ if(deltaS > 0x0000ffff) goto uncompressed; cp = encode(cp,deltaS); changes |= NEW_S; } switch(changes){ case 0: /* Nothing changed. If this packet contains data and the * last one didn't, this is probably a data packet following * an ack (normal on an interactive connection) and we send * it compressed. Otherwise it's probably a retransmit, * retransmitted ack or window probe. Send it uncompressed * in case the other side missed the compressed version. */ if(ip->tot_len != cs->cs_ip.tot_len && ntohs(cs->cs_ip.tot_len) == hlen) break; goto uncompressed; case SPECIAL_I: case SPECIAL_D: /* actual changes match one of our special case encodings -- * send packet uncompressed. */ goto uncompressed; case NEW_S|NEW_A: if(deltaS == deltaA && deltaS == ntohs(cs->cs_ip.tot_len) - hlen){ /* special case for echoed terminal traffic */ changes = SPECIAL_I; cp = new_seq; } break; case NEW_S: if(deltaS == ntohs(cs->cs_ip.tot_len) - hlen){ /* special case for data xfer */ changes = SPECIAL_D; cp = new_seq; } break; } deltaS = ntohs(ip->id) - ntohs(cs->cs_ip.id); if(deltaS != 1){ cp = encode(cp,deltaS); changes |= NEW_I; } if(th->psh) changes |= TCP_PUSH_BIT; /* Grab the cksum before we overwrite it below. Then update our * state with this packet's header. */ csum = th->check; memcpy(&cs->cs_ip,ip,20); memcpy(&cs->cs_tcp,th,20); /* We want to use the original packet as our compressed packet. * (cp - new_seq) is the number of bytes we need for compressed * sequence numbers. In addition we need one byte for the change * mask, one for the connection id and two for the tcp checksum. * So, (cp - new_seq) + 4 bytes of header are needed. */ deltaS = cp - new_seq; if(compress_cid == 0 || comp->xmit_current != cs->cs_this){ cp = ocp; *cpp = ocp; *cp++ = changes | NEW_C; *cp++ = cs->cs_this; comp->xmit_current = cs->cs_this; } else { cp = ocp; *cpp = ocp; *cp++ = changes; } *(__sum16 *)cp = csum; cp += 2; /* deltaS is now the size of the change section of the compressed header */ memcpy(cp,new_seq,deltaS); /* Write list of deltas */ memcpy(cp+deltaS,icp+hlen,isize-hlen); comp->sls_o_compressed++; ocp[0] |= SL_TYPE_COMPRESSED_TCP; return isize - hlen + deltaS + (cp - ocp); /* Update connection state cs & send uncompressed packet (i.e., * a regular ip/tcp packet but with the 'conversation id' we hope * to use on future compressed packets in the protocol field). */ uncompressed: memcpy(&cs->cs_ip,ip,20); memcpy(&cs->cs_tcp,th,20); if (ip->ihl > 5) memcpy(cs->cs_ipopt, ip+1, ((ip->ihl) - 5) * 4); if (th->doff > 5) memcpy(cs->cs_tcpopt, th+1, ((th->doff) - 5) * 4); comp->xmit_current = cs->cs_this; comp->sls_o_uncompressed++; memcpy(ocp, icp, isize); *cpp = ocp; ocp[9] = cs->cs_this; ocp[0] |= SL_TYPE_UNCOMPRESSED_TCP; return isize; } int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) { int changes; long x; struct tcphdr *thp; struct iphdr *ip; struct cstate *cs; int len, hdrlen; unsigned char *cp = icp; /* We've got a compressed packet; read the change byte */ comp->sls_i_compressed++; if(isize < 3){ comp->sls_i_error++; return 0; } changes = *cp++; if(changes & NEW_C){ /* Make sure the state index is in range, then grab the state. * If we have a good state index, clear the 'discard' flag. */ x = *cp++; /* Read conn index */ if(x < 0 || x > comp->rslot_limit) goto bad; /* Check if the cstate is initialized */ if (!comp->rstate[x].initialized) goto bad; comp->flags &=~ SLF_TOSS; comp->recv_current = x; } else { /* this packet has an implicit state index. If we've * had a line error since the last time we got an * explicit state index, we have to toss the packet. */ if(comp->flags & SLF_TOSS){ comp->sls_i_tossed++; return 0; } } cs = &comp->rstate[comp->recv_current]; thp = &cs->cs_tcp; ip = &cs->cs_ip; thp->check = *(__sum16 *)cp; cp += 2; thp->psh = (changes & TCP_PUSH_BIT) ? 1 : 0; /* * we can use the same number for the length of the saved header and * the current one, because the packet wouldn't have been sent * as compressed unless the options were the same as the previous one */ hdrlen = ip->ihl * 4 + thp->doff * 4; switch(changes & SPECIALS_MASK){ case SPECIAL_I: /* Echoed terminal traffic */ { short i; i = ntohs(ip->tot_len) - hdrlen; thp->ack_seq = htonl( ntohl(thp->ack_seq) + i); thp->seq = htonl( ntohl(thp->seq) + i); } break; case SPECIAL_D: /* Unidirectional data */ thp->seq = htonl( ntohl(thp->seq) + ntohs(ip->tot_len) - hdrlen); break; default: if(changes & NEW_U){ thp->urg = 1; if((x = decode(&cp)) == -1) { goto bad; } thp->urg_ptr = htons(x); } else thp->urg = 0; if(changes & NEW_W){ if((x = decode(&cp)) == -1) { goto bad; } thp->window = htons( ntohs(thp->window) + x); } if(changes & NEW_A){ if((x = decode(&cp)) == -1) { goto bad; } thp->ack_seq = htonl( ntohl(thp->ack_seq) + x); } if(changes & NEW_S){ if((x = decode(&cp)) == -1) { goto bad; } thp->seq = htonl( ntohl(thp->seq) + x); } break; } if(changes & NEW_I){ if((x = decode(&cp)) == -1) { goto bad; } ip->id = htons (ntohs (ip->id) + x); } else ip->id = htons (ntohs (ip->id) + 1); /* * At this point, cp points to the first byte of data in the * packet. Put the reconstructed TCP and IP headers back on the * packet. Recalculate IP checksum (but not TCP checksum). */ len = isize - (cp - icp); if (len < 0) goto bad; len += hdrlen; ip->tot_len = htons(len); ip->check = 0; memmove(icp + hdrlen, cp, len - hdrlen); cp = icp; memcpy(cp, ip, 20); cp += 20; if (ip->ihl > 5) { memcpy(cp, cs->cs_ipopt, (ip->ihl - 5) * 4); cp += (ip->ihl - 5) * 4; } put_unaligned(ip_fast_csum(icp, ip->ihl), &((struct iphdr *)icp)->check); memcpy(cp, thp, 20); cp += 20; if (thp->doff > 5) { memcpy(cp, cs->cs_tcpopt, ((thp->doff) - 5) * 4); cp += ((thp->doff) - 5) * 4; } return len; bad: comp->sls_i_error++; return slhc_toss( comp ); } int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { const struct tcphdr *th; unsigned char index; struct iphdr *iph; struct cstate *cs; unsigned int ihl; /* The packet is shorter than a legal IP header. * Also make sure isize is positive. */ if (isize < (int)sizeof(struct iphdr)) { runt: comp->sls_i_runt++; return slhc_toss(comp); } iph = (struct iphdr *)icp; /* Peek at the IP header's IHL field to find its length */ ihl = iph->ihl; /* The IP header length field is too small, * or packet is shorter than the IP header followed * by minimal tcp header. */ if (ihl < 5 || isize < ihl * 4 + sizeof(struct tcphdr)) goto runt; index = iph->protocol; iph->protocol = IPPROTO_TCP; if (ip_fast_csum(icp, ihl)) { /* Bad IP header checksum; discard */ comp->sls_i_badcheck++; return slhc_toss(comp); } if (index > comp->rslot_limit) { comp->sls_i_error++; return slhc_toss(comp); } th = (struct tcphdr *)(icp + ihl * 4); if (th->doff < sizeof(struct tcphdr) / 4) goto runt; if (isize < ihl * 4 + th->doff * 4) goto runt; /* Update local state */ cs = &comp->rstate[comp->recv_current = index]; comp->flags &=~ SLF_TOSS; memcpy(&cs->cs_ip, iph, sizeof(*iph)); memcpy(&cs->cs_tcp, th, sizeof(*th)); if (ihl > 5) memcpy(cs->cs_ipopt, &iph[1], (ihl - 5) * 4); if (th->doff > 5) memcpy(cs->cs_tcpopt, &th[1], (th->doff - 5) * 4); cs->cs_hsize = ihl*2 + th->doff*2; cs->initialized = true; /* Put headers back on packet * Neither header checksum is recalculated */ comp->sls_i_uncompressed++; return isize; } int slhc_toss(struct slcompress *comp) { if ( comp == NULLSLCOMPR ) return 0; comp->flags |= SLF_TOSS; return 0; } #else /* CONFIG_INET */ int slhc_toss(struct slcompress *comp) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_toss"); return -EINVAL; } int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_uncompress"); return -EINVAL; } int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize, unsigned char *ocp, unsigned char **cpp, int compress_cid) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_compress"); return -EINVAL; } int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_remember"); return -EINVAL; } void slhc_free(struct slcompress *comp) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_free"); } struct slcompress * slhc_init(int rslots, int tslots) { printk(KERN_DEBUG "Called IP function on non IP-system: slhc_init"); return NULL; } #endif /* CONFIG_INET */ /* VJ header compression */ EXPORT_SYMBOL(slhc_init); EXPORT_SYMBOL(slhc_free); EXPORT_SYMBOL(slhc_remember); EXPORT_SYMBOL(slhc_compress); EXPORT_SYMBOL(slhc_uncompress); EXPORT_SYMBOL(slhc_toss); MODULE_DESCRIPTION("Compression helpers for SLIP (serial line)"); MODULE_LICENSE("Dual BSD/GPL");
67 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 // SPDX-License-Identifier: GPL-2.0 #include <net/genetlink.h> #include <net/netns/generic.h> #include <uapi/linux/genetlink.h> #include "ila.h" static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, }; static const struct genl_ops ila_nl_ops[] = { { .cmd = ILA_CMD_ADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_add_mapping, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_del_mapping, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_FLUSH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_flush, .flags = GENL_ADMIN_PERM, }, { .cmd = ILA_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ila_xlat_nl_cmd_get_mapping, .start = ila_xlat_nl_dump_start, .dumpit = ila_xlat_nl_dump, .done = ila_xlat_nl_dump_done, }, }; unsigned int ila_net_id; struct genl_family ila_nl_family __ro_after_init = { .hdrsize = 0, .name = ILA_GENL_NAME, .version = ILA_GENL_VERSION, .maxattr = ILA_ATTR_MAX, .policy = ila_nl_policy, .netnsok = true, .parallel_ops = true, .module = THIS_MODULE, .ops = ila_nl_ops, .n_ops = ARRAY_SIZE(ila_nl_ops), .resv_start_op = ILA_CMD_FLUSH + 1, }; static __net_init int ila_init_net(struct net *net) { int err; err = ila_xlat_init_net(net); if (err) goto ila_xlat_init_fail; return 0; ila_xlat_init_fail: return err; } static __net_exit void ila_pre_exit_net(struct net *net) { ila_xlat_pre_exit_net(net); } static __net_exit void ila_exit_net(struct net *net) { ila_xlat_exit_net(net); } static struct pernet_operations ila_net_ops = { .init = ila_init_net, .pre_exit = ila_pre_exit_net, .exit = ila_exit_net, .id = &ila_net_id, .size = sizeof(struct ila_net), }; static int __init ila_init(void) { int ret; ret = register_pernet_device(&ila_net_ops); if (ret) goto register_device_fail; ret = genl_register_family(&ila_nl_family); if (ret) goto register_family_fail; ret = ila_lwt_init(); if (ret) goto fail_lwt; return 0; fail_lwt: genl_unregister_family(&ila_nl_family); register_family_fail: unregister_pernet_device(&ila_net_ops); register_device_fail: return ret; } static void __exit ila_fini(void) { ila_lwt_fini(); genl_unregister_family(&ila_nl_family); unregister_pernet_device(&ila_net_ops); } module_init(ila_init); module_exit(ila_fini); MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IPv6: Identifier Locator Addressing (ILA)");
618 621 621 618 620 619 618 620 620 621 619 619 3 2 3 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 // SPDX-License-Identifier: GPL-2.0-only /* * Packet matching code. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/kmod.h> #include <linux/vmalloc.h> #include <linux/netdevice.h> #include <linux/module.h> #include <net/ip.h> #include <net/compat.h> #include <linux/uaccess.h> #include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <net/netfilter/nf_log.h> #include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); } EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool ip_packet_match(const struct iphdr *ip, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, int isfrag) { unsigned long ret; if (NF_INVF(ipinfo, IPT_INV_SRCIP, (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) || NF_INVF(ipinfo, IPT_INV_DSTIP, (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr)) return false; ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0)) return false; ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0)) return false; /* Check specific protocol */ if (ipinfo->proto && NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto)) return false; /* If we have a fragment rule but the packet is not a fragment * then we return zero */ if (NF_INVF(ipinfo, IPT_INV_FRAG, (ipinfo->flags & IPT_F_FRAG) && !isfrag)) return false; return true; } static bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) return false; if (ip->invflags & ~IPT_INV_MASK) return false; return true; } static unsigned int ipt_error(struct sk_buff *skb, const struct xt_action_param *par) { net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } /* Performance critical */ static inline struct ipt_entry * get_entry(const void *base, unsigned int offset) { return (struct ipt_entry *)(base + offset); } /* All zeroes == unconditional rule. */ /* Mildly perf critical (only if packet tracing is on) */ static inline bool unconditional(const struct ipt_entry *e) { static const struct ipt_ip uncond; return e->target_offset == sizeof(struct ipt_entry) && memcmp(&e->ip, &uncond, sizeof(uncond)) == 0; } /* for const-correctness */ static inline const struct xt_entry_target * ipt_get_target_c(const struct ipt_entry *e) { return ipt_get_target((struct ipt_entry *)e); } #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) static const char *const hooknames[] = { [NF_INET_PRE_ROUTING] = "PREROUTING", [NF_INET_LOCAL_IN] = "INPUT", [NF_INET_FORWARD] = "FORWARD", [NF_INET_LOCAL_OUT] = "OUTPUT", [NF_INET_POST_ROUTING] = "POSTROUTING", }; enum nf_ip_trace_comments { NF_IP_TRACE_COMMENT_RULE, NF_IP_TRACE_COMMENT_RETURN, NF_IP_TRACE_COMMENT_POLICY, }; static const char *const comments[] = { [NF_IP_TRACE_COMMENT_RULE] = "rule", [NF_IP_TRACE_COMMENT_RETURN] = "return", [NF_IP_TRACE_COMMENT_POLICY] = "policy", }; static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { .level = 4, .logflags = NF_LOG_DEFAULT_MASK, }, }, }; /* Mildly perf critical (only if packet tracing is on) */ static inline int get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, const char *hookname, const char **chainname, const char **comment, unsigned int *rulenum) { const struct xt_standard_target *t = (void *)ipt_get_target_c(s); if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { /* Head of user chain: ERROR target with chainname */ *chainname = t->target.data; (*rulenum) = 0; } else if (s == e) { (*rulenum)++; if (unconditional(s) && strcmp(t->target.u.kernel.target->name, XT_STANDARD_TARGET) == 0 && t->verdict < 0) { /* Tail of chains: STANDARD target (return/policy) */ *comment = *chainname == hookname ? comments[NF_IP_TRACE_COMMENT_POLICY] : comments[NF_IP_TRACE_COMMENT_RETURN]; } return 1; } else (*rulenum)++; return 0; } static void trace_packet(struct net *net, const struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, const char *tablename, const struct xt_table_info *private, const struct ipt_entry *e) { const struct ipt_entry *root; const char *hookname, *chainname, *comment; const struct ipt_entry *iter; unsigned int rulenum = 0; root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP_TRACE_COMMENT_RULE]; xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) if (get_chainname_rulenum(iter, e, hookname, &chainname, &comment, &rulenum) != 0) break; nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, "TRACE: %s:%s:%s:%u ", tablename, chainname, comment, rulenum); } #endif static inline struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) { return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int ipt_do_table(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { const struct xt_table *table = priv; unsigned int hook = state->hook; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct iphdr *ip; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; const void *table_base; struct ipt_entry *e, **jumpstack; unsigned int stackidx, cpu; const struct xt_table_info *private; struct xt_action_param acpar; unsigned int addend; /* Initialization */ stackidx = 0; ip = ip_hdr(skb); indev = state->in ? state->in->name : nulldevname; outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; acpar.thoff = ip_hdrlen(skb); acpar.hotdrop = false; acpar.state = state; WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; /* Switch to alternate jumpstack if we're being invoked via TEE. * TEE issues XT_CONTINUE verdict on original skb so we must not * clobber the jumpstack. * * For recursion via REJECT or SYNPROXY the stack will be clobbered * but it is no problem since absolute verdict is issued by these. */ if (static_key_false(&xt_tee_enabled)) jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); e = get_entry(table_base, private->hook_entry[hook]); do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; struct xt_counters *counter; WARN_ON(!e); if (!ip_packet_match(ip, indev, outdev, &e->ip, acpar.fragoff)) { no_match: e = ipt_next_entry(e); continue; } xt_ematch_foreach(ematch, e) { acpar.match = ematch->u.kernel.match; acpar.matchinfo = ematch->data; if (!acpar.match->match(skb, &acpar)) goto no_match; } counter = xt_get_this_cpu_counter(&e->counters); ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target_c(e); WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) trace_packet(state->net, skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ if (!t->u.kernel.target->target) { int v; v = ((struct xt_standard_target *)t)->verdict; if (v < 0) { /* Pop from stack? */ if (v != XT_RETURN) { verdict = (unsigned int)(-v) - 1; break; } if (stackidx == 0) { e = get_entry(table_base, private->underflow[hook]); } else { e = jumpstack[--stackidx]; e = ipt_next_entry(e); } continue; } if (table_base + v != ipt_next_entry(e) && !(e->ip.flags & IPT_F_GOTO)) { if (unlikely(stackidx >= private->stacksize)) { verdict = NF_DROP; break; } jumpstack[stackidx++] = e; } e = get_entry(table_base, v); continue; } acpar.target = t->u.kernel.target; acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); if (verdict == XT_CONTINUE) { /* Target might have changed stuff. */ ip = ip_hdr(skb); e = ipt_next_entry(e); } else { /* Verdict */ break; } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); if (acpar.hotdrop) return NF_DROP; else return verdict; } /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0, unsigned int *offsets) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; struct ipt_entry *e = entry0 + pos; if (!(valid_hooks & (1 << hook))) continue; /* Set initial back pointer. */ e->counters.pcnt = pos; for (;;) { const struct xt_standard_target *t = (void *)ipt_get_target_c(e); int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_INET_NUMHOOKS)) return 0; e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ if ((unconditional(e) && (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0) && t->verdict < 0) || visited) { unsigned int oldpos, size; /* Return: backtrack through the last big jump. */ do { e->comefrom ^= (1<<NF_INET_NUMHOOKS); oldpos = pos; pos = e->counters.pcnt; e->counters.pcnt = 0; /* We're at the start. */ if (pos == oldpos) goto next; e = entry0 + pos; } while (oldpos == pos + e->next_offset); /* Move along one */ size = e->next_offset; e = entry0 + pos + size; if (pos + size >= newinfo->size) return 0; e->counters.pcnt = pos; pos += size; } else { int newpos = t->verdict; if (strcmp(t->target.u.user.name, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ if (!xt_find_jump_offset(offsets, newpos, newinfo->number)) return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; if (newpos >= newinfo->size) return 0; } e = entry0 + newpos; e->counters.pcnt = pos; pos = newpos; } } next: ; } return 1; } static void cleanup_match(struct xt_entry_match *m, struct net *net) { struct xt_mtdtor_param par; par.net = net; par.match = m->u.kernel.match; par.matchinfo = m->data; par.family = NFPROTO_IPV4; if (par.match->destroy != NULL) par.match->destroy(&par); module_put(par.match->me); } static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { const struct ipt_ip *ip = par->entryinfo; par->match = m->u.kernel.match; par->matchinfo = m->data; return xt_check_match(par, m->u.match_size - sizeof(*m), ip->proto, ip->invflags & IPT_INV_PROTO); } static int find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { struct xt_match *match; int ret; match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; ret = check_match(m, par); if (ret) goto err; return 0; err: module_put(m->u.kernel.match->me); return ret; } static int check_target(struct ipt_entry *e, struct net *net, const char *name) { struct xt_entry_target *t = ipt_get_target(e); struct xt_tgchk_param par = { .net = net, .table = name, .entryinfo = e, .target = t->u.kernel.target, .targinfo = t->data, .hook_mask = e->comefrom, .family = NFPROTO_IPV4, }; return xt_check_target(&par, t->u.target_size - sizeof(*t), e->ip.proto, e->ip.invflags & IPT_INV_PROTO); } static int find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int size, struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; memset(&mtpar, 0, sizeof(mtpar)); mtpar.net = net; mtpar.table = name; mtpar.entryinfo = &e->ip; mtpar.hook_mask = e->comefrom; mtpar.family = NFPROTO_IPV4; xt_ematch_foreach(ematch, e) { ret = find_check_match(ematch, &mtpar); if (ret != 0) goto cleanup_matches; ++j; } t = ipt_get_target(e); target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; ret = check_target(e, net, name); if (ret) goto err; return 0; err: module_put(t->u.kernel.target->me); cleanup_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; cleanup_match(ematch, net); } xt_percpu_counter_free(&e->counters); return ret; } static bool check_underflow(const struct ipt_entry *e) { const struct xt_entry_target *t; unsigned int verdict; if (!unconditional(e)) return false; t = ipt_get_target_c(e); if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) return false; verdict = ((struct xt_standard_target *)t)->verdict; verdict = -verdict - 1; return verdict == NF_DROP || verdict == NF_ACCEPT; } static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, const unsigned char *base, const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, unsigned int valid_hooks) { unsigned int h; int err; if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) return -EINVAL; if (!ip_checkentry(&e->ip)) return -EINVAL; err = xt_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (err) return err; /* Check hooks & underflows */ for (h = 0; h < NF_INET_NUMHOOKS; h++) { if (!(valid_hooks & (1 << h))) continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; if ((unsigned char *)e - base == underflows[h]) { if (!check_underflow(e)) return -EINVAL; newinfo->underflow[h] = underflows[h]; } } /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; return 0; } static void cleanup_entry(struct ipt_entry *e, struct net *net) { struct xt_tgdtor_param par; struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) cleanup_match(ematch, net); t = ipt_get_target(e); par.net = net; par.target = t->u.kernel.target; par.targinfo = t->data; par.family = NFPROTO_IPV4; if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ipt_entry *iter; unsigned int *offsets; unsigned int i; int ret = 0; newinfo->size = repl->size; newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } offsets = xt_alloc_entry_offsets(newinfo->number); if (!offsets) return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { ret = check_entry_size_and_hooks(iter, newinfo, entry0, entry0 + repl->size, repl->hook_entry, repl->underflow, repl->valid_hooks); if (ret != 0) goto out_free; if (i < repl->num_entries) offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ipt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } ret = -EINVAL; if (i != repl->num_entries) goto out_free; ret = xt_check_table_hooks(newinfo, repl->valid_hooks); if (ret) goto out_free; if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { ret = -ELOOP; goto out_free; } kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { ret = find_check_entry(iter, net, repl->name, repl->size, &alloc_state); if (ret != 0) break; ++i; } if (ret != 0) { xt_entry_foreach(iter, entry0, newinfo->size) { if (i-- == 0) break; cleanup_entry(iter, net); } return ret; } return ret; out_free: kvfree(offsets); return ret; } static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ipt_entry *iter; unsigned int cpu; unsigned int i; for_each_possible_cpu(cpu) { seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; xt_entry_foreach(iter, t->entries, t->size) { struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); bcnt = tmp->bcnt; pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ cond_resched(); } } } static void get_old_counters(const struct xt_table_info *t, struct xt_counters counters[]) { struct ipt_entry *iter; unsigned int cpu, i; for_each_possible_cpu(cpu) { i = 0; xt_entry_foreach(iter, t->entries, t->size) { const struct xt_counters *tmp; tmp = xt_get_per_cpu_counter(&iter->counters, cpu); ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); ++i; /* macro does multi eval of i */ } cond_resched(); } } static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; counters = vzalloc(countersize); if (counters == NULL) return ERR_PTR(-ENOMEM); get_counters(private, counters); return counters; } static int copy_entries_to_user(unsigned int total_size, const struct xt_table *table, void __user *userptr) { unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); loc_cpu_entry = private->entries; /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; const struct xt_entry_match *m; const struct xt_entry_target *t; e = loc_cpu_entry + off; if (copy_to_user(userptr + off, e, sizeof(*e))) { ret = -EFAULT; goto free_counters; } if (copy_to_user(userptr + off + offsetof(struct ipt_entry, counters), &counters[num], sizeof(counters[num])) != 0) { ret = -EFAULT; goto free_counters; } for (i = sizeof(struct ipt_entry); i < e->target_offset; i += m->u.match_size) { m = (void *)e + i; if (xt_match_to_user(m, userptr + off + i)) { ret = -EFAULT; goto free_counters; } } t = ipt_get_target_c(e); if (xt_target_to_user(t, userptr + off + e->target_offset)) { ret = -EFAULT; goto free_counters; } } free_counters: vfree(counters); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT static void compat_standard_from_user(void *dst, const void *src) { int v = *(compat_int_t *)src; if (v > 0) v += xt_compat_calc_jump(AF_INET, v); memcpy(dst, &v, sizeof(v)); } static int compat_standard_to_user(void __user *dst, const void *src) { compat_int_t cv = *(int *)src; if (cv > 0) cv -= xt_compat_calc_jump(AF_INET, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } static int compat_calc_entry(const struct ipt_entry *e, const struct xt_table_info *info, const void *base, struct xt_table_info *newinfo) { const struct xt_entry_match *ematch; const struct xt_entry_target *t; unsigned int entry_offset; int off, i, ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - base; xt_ematch_foreach(ematch, e) off += xt_compat_match_offset(ematch->u.kernel.match); t = ipt_get_target_c(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); if (ret) return ret; for (i = 0; i < NF_INET_NUMHOOKS; i++) { if (info->hook_entry[i] && (e < (struct ipt_entry *)(base + info->hook_entry[i]))) newinfo->hook_entry[i] -= off; if (info->underflow[i] && (e < (struct ipt_entry *)(base + info->underflow[i]))) newinfo->underflow[i] -= off; } return 0; } static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ipt_entry *iter; const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; loc_cpu_entry = info->entries; ret = xt_compat_init_offsets(AF_INET, info->number); if (ret) return ret; xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); if (ret != 0) return ret; } return 0; } #endif static int get_info(struct net *net, void __user *user, const int *len) { char name[XT_TABLE_MAXNAMELEN]; struct xt_table *t; int ret; if (*len != sizeof(struct ipt_getinfo)) return -EINVAL; if (copy_from_user(name, user, sizeof(name)) != 0) return -EFAULT; name[XT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_lock(AF_INET); #endif t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; const struct xt_table_info *private = t->private; #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct xt_table_info tmp; if (in_compat_syscall()) { ret = compat_table_info(private, &tmp); xt_compat_flush_offsets(AF_INET); private = &tmp; } #endif memset(&info, 0, sizeof(info)); info.valid_hooks = t->valid_hooks; memcpy(info.hook_entry, private->hook_entry, sizeof(info.hook_entry)); memcpy(info.underflow, private->underflow, sizeof(info.underflow)); info.num_entries = private->number; info.size = private->size; strscpy(info.name, name); if (copy_to_user(user, &info, *len) != 0) ret = -EFAULT; else ret = 0; xt_table_unlock(t); module_put(t->me); } else ret = PTR_ERR(t); #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) xt_compat_unlock(AF_INET); #endif return ret; } static int get_entries(struct net *net, struct ipt_get_entries __user *uptr, const int *len) { int ret; struct ipt_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct ipt_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else ret = -EAGAIN; module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); return ret; } static int __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table_info *newinfo, unsigned int num_counters, void __user *counters_ptr) { int ret; struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; struct ipt_entry *iter; counters = xt_counters_alloc(num_counters); if (!counters) { ret = -ENOMEM; goto out; } t = xt_request_find_table_lock(net, AF_INET, name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free_newinfo_counters_untrans; } /* You lied! */ if (valid_hooks != t->valid_hooks) { ret = -EINVAL; goto put_module; } oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ if ((oldinfo->number > oldinfo->initial_entries) || (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); xt_table_unlock(t); get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, sizeof(struct xt_counters) * num_counters) != 0) { /* Silent error, can't fail, new table is already in place */ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); } vfree(counters); return 0; put_module: module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: vfree(counters); out: return ret; } static int do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ipt_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, tmp.counters); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) { unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; struct xt_table *t; const struct xt_table_info *private; int ret = 0; struct ipt_entry *iter; unsigned int addend; paddc = xt_copy_counters(arg, len, &tmp); if (IS_ERR(paddc)) return PTR_ERR(paddc); t = xt_find_table_lock(net, AF_INET, tmp.name); if (IS_ERR(t)) { ret = PTR_ERR(t); goto free; } local_bh_disable(); private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; addend = xt_write_recseq_begin(); xt_entry_foreach(iter, private->entries, private->size) { struct xt_counters *tmp; tmp = xt_get_this_cpu_counter(&iter->counters); ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); unlock_up_free: local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: vfree(paddc); return ret; } #ifdef CONFIG_NETFILTER_XTABLES_COMPAT struct compat_ipt_replace { char name[XT_TABLE_MAXNAMELEN]; u32 valid_hooks; u32 num_entries; u32 size; u32 hook_entry[NF_INET_NUMHOOKS]; u32 underflow[NF_INET_NUMHOOKS]; u32 num_counters; compat_uptr_t counters; /* struct xt_counters * */ struct compat_ipt_entry entries[]; }; static int compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, unsigned int *size, struct xt_counters *counters, unsigned int i) { struct xt_entry_target *t; struct compat_ipt_entry __user *ce; u_int16_t target_offset, next_offset; compat_uint_t origsize; const struct xt_entry_match *ematch; int ret = 0; origsize = *size; ce = *dstptr; if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || copy_to_user(&ce->counters, &counters[i], sizeof(counters[i])) != 0) return -EFAULT; *dstptr += sizeof(struct compat_ipt_entry); *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); xt_ematch_foreach(ematch, e) { ret = xt_compat_match_to_user(ematch, dstptr, size); if (ret != 0) return ret; } target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); ret = xt_compat_target_to_user(t, dstptr, size); if (ret) return ret; next_offset = e->next_offset - (origsize - *size); if (put_user(target_offset, &ce->target_offset) != 0 || put_user(next_offset, &ce->next_offset) != 0) return -EFAULT; return 0; } static int compat_find_calc_match(struct xt_entry_match *m, const struct ipt_ip *ip, int *size) { struct xt_match *match; match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, m->u.user.revision); if (IS_ERR(match)) return PTR_ERR(match); m->u.kernel.match = match; *size += xt_compat_match_offset(match); return 0; } static void compat_release_entry(struct compat_ipt_entry *e) { struct xt_entry_target *t; struct xt_entry_match *ematch; /* Cleanup all matches */ xt_ematch_foreach(ematch, e) module_put(ematch->u.kernel.match->me); t = compat_ipt_get_target(e); module_put(t->u.kernel.target->me); } static int check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, struct xt_table_info *newinfo, unsigned int *size, const unsigned char *base, const unsigned char *limit) { struct xt_entry_match *ematch; struct xt_entry_target *t; struct xt_target *target; unsigned int entry_offset; unsigned int j; int ret, off; if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit || (unsigned char *)e + e->next_offset > limit) return -EINVAL; if (e->next_offset < sizeof(struct compat_ipt_entry) + sizeof(struct compat_xt_entry_target)) return -EINVAL; if (!ip_checkentry(&e->ip)) return -EINVAL; ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset, e->next_offset); if (ret) return ret; off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { ret = compat_find_calc_match(ematch, &e->ip, &off); if (ret != 0) goto release_matches; ++j; } t = compat_ipt_get_target(e); target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, t->u.user.revision); if (IS_ERR(target)) { ret = PTR_ERR(target); goto release_matches; } t->u.kernel.target = target; off += xt_compat_target_offset(target); *size += off; ret = xt_compat_add_offset(AF_INET, entry_offset, off); if (ret) goto out; return 0; out: module_put(t->u.kernel.target->me); release_matches: xt_ematch_foreach(ematch, e) { if (j-- == 0) break; module_put(ematch->u.kernel.match->me); } return ret; } static void compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, unsigned int *size, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; struct ipt_entry *de; unsigned int origsize; int h; struct xt_entry_match *ematch; origsize = *size; de = *dstptr; memcpy(de, e, sizeof(struct ipt_entry)); memcpy(&de->counters, &e->counters, sizeof(e->counters)); *dstptr += sizeof(struct ipt_entry); *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); xt_ematch_foreach(ematch, e) xt_compat_match_from_user(ematch, dstptr, size); de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_INET_NUMHOOKS; h++) { if ((unsigned char *)de - base < newinfo->hook_entry[h]) newinfo->hook_entry[h] -= origsize - *size; if ((unsigned char *)de - base < newinfo->underflow[h]) newinfo->underflow[h] -= origsize - *size; } } static int translate_compat_table(struct net *net, struct xt_table_info **pinfo, void **pentry0, const struct compat_ipt_replace *compatr) { unsigned int i, j; struct xt_table_info *newinfo, *info; void *pos, *entry0, *entry1; struct compat_ipt_entry *iter0; struct ipt_replace repl; unsigned int size; int ret; info = *pinfo; entry0 = *pentry0; size = compatr->size; info->number = compatr->num_entries; j = 0; xt_compat_lock(AF_INET); ret = xt_compat_init_offsets(AF_INET, compatr->num_entries); if (ret) goto out_unlock; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter0, entry0, compatr->size) { ret = check_compat_entry_size_and_hooks(iter0, info, &size, entry0, entry0 + compatr->size); if (ret != 0) goto out_unlock; ++j; } ret = -EINVAL; if (j != compatr->num_entries) goto out_unlock; ret = -ENOMEM; newinfo = xt_alloc_table_info(size); if (!newinfo) goto out_unlock; memset(newinfo->entries, 0, size); newinfo->number = compatr->num_entries; for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = compatr->hook_entry[i]; newinfo->underflow[i] = compatr->underflow[i]; } entry1 = newinfo->entries; pos = entry1; size = compatr->size; xt_entry_foreach(iter0, entry0, compatr->size) compat_copy_entry_from_user(iter0, &pos, &size, newinfo, entry1); /* all module references in entry0 are now gone. * entry1/newinfo contains a 64bit ruleset that looks exactly as * generated by 64bit userspace. * * Call standard translate_table() to validate all hook_entrys, * underflows, check for loops, etc. */ xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); memcpy(&repl, compatr, sizeof(*compatr)); for (i = 0; i < NF_INET_NUMHOOKS; i++) { repl.hook_entry[i] = newinfo->hook_entry[i]; repl.underflow[i] = newinfo->underflow[i]; } repl.num_counters = 0; repl.counters = NULL; repl.size = newinfo->size; ret = translate_table(net, newinfo, entry1, &repl); if (ret) goto free_newinfo; *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); return 0; free_newinfo: xt_free_table_info(newinfo); return ret; out_unlock: xt_compat_flush_offsets(AF_INET); xt_compat_unlock(AF_INET); xt_entry_foreach(iter0, entry0, compatr->size) { if (j-- == 0) break; compat_release_entry(iter0); } return ret; } static int compat_do_replace(struct net *net, sockptr_t arg, unsigned int len) { int ret; struct compat_ipt_replace tmp; struct xt_table_info *newinfo; void *loc_cpu_entry; struct ipt_entry *iter; if (len < sizeof(tmp)) return -EINVAL; if (copy_from_sockptr(&tmp, arg, sizeof(tmp)) != 0) return -EFAULT; /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; if (tmp.num_counters == 0) return -EINVAL; if ((u64)len < (u64)tmp.size + sizeof(tmp)) return -EINVAL; tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; if (copy_from_sockptr_offset(loc_cpu_entry, arg, sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; goto free_newinfo; } ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp); if (ret != 0) goto free_newinfo; ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, tmp.num_counters, compat_ptr(tmp.counters)); if (ret) goto free_newinfo_untrans; return 0; free_newinfo_untrans: xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); free_newinfo: xt_free_table_info(newinfo); return ret; } struct compat_ipt_get_entries { char name[XT_TABLE_MAXNAMELEN]; compat_uint_t size; struct compat_ipt_entry entrytable[]; }; static int compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; unsigned int i = 0; struct ipt_entry *iter; counters = alloc_counters(table); if (IS_ERR(counters)) return PTR_ERR(counters); pos = userptr; size = total_size; xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) break; } vfree(counters); return ret; } static int compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, int *len) { int ret; struct compat_ipt_get_entries get; struct xt_table *t; if (*len < sizeof(get)) return -EINVAL; if (copy_from_user(&get, uptr, sizeof(get)) != 0) return -EFAULT; if (*len != sizeof(struct compat_ipt_get_entries) + get.size) return -EINVAL; get.name[sizeof(get.name) - 1] = '\0'; xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) ret = compat_copy_entries_to_user(private->size, t, uptr->entrytable); else if (!ret) ret = -EAGAIN; xt_compat_flush_offsets(AF_INET); module_put(t->me); xt_table_unlock(t); } else ret = PTR_ERR(t); xt_compat_unlock(AF_INET); return ret; } #endif static int do_ipt_set_ctl(struct sock *sk, int cmd, sockptr_t arg, unsigned int len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IPT_SO_SET_REPLACE: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_do_replace(sock_net(sk), arg, len); else #endif ret = do_replace(sock_net(sk), arg, len); break; case IPT_SO_SET_ADD_COUNTERS: ret = do_add_counters(sock_net(sk), arg, len); break; default: ret = -EINVAL; } return ret; } static int do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { int ret; if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IPT_SO_GET_INFO: ret = get_info(sock_net(sk), user, len); break; case IPT_SO_GET_ENTRIES: #ifdef CONFIG_NETFILTER_XTABLES_COMPAT if (in_compat_syscall()) ret = compat_get_entries(sock_net(sk), user, len); else #endif ret = get_entries(sock_net(sk), user, len); break; case IPT_SO_GET_REVISION_MATCH: case IPT_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; int target; if (*len != sizeof(rev)) { ret = -EINVAL; break; } if (copy_from_user(&rev, user, sizeof(rev)) != 0) { ret = -EFAULT; break; } rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; else target = 0; try_then_request_module(xt_find_revision(AF_INET, rev.name, rev.revision, target, &ret), "ipt_%s", rev.name); break; } default: ret = -EINVAL; } return ret; } static void __ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; struct module *table_owner = table->me; struct ipt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) module_put(table_owner); xt_free_table_info(private); } int ipt_register_table(struct net *net, const struct xt_table *table, const struct ipt_replace *repl, const struct nf_hook_ops *template_ops) { struct nf_hook_ops *ops; unsigned int num_ops; int ret, i; struct xt_table_info *newinfo; struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) return -ENOMEM; loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); if (ret != 0) { xt_free_table_info(newinfo); return ret; } new_table = xt_register_table(net, table, &bootstrap, newinfo); if (IS_ERR(new_table)) { struct ipt_entry *iter; xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) cleanup_entry(iter, net); xt_free_table_info(newinfo); return PTR_ERR(new_table); } /* No template? No need to do anything. This is used by 'nat' table, it registers * with the nat core instead of the netfilter core. */ if (!template_ops) return 0; num_ops = hweight32(table->valid_hooks); if (num_ops == 0) { ret = -EINVAL; goto out_free; } ops = kmemdup_array(template_ops, num_ops, sizeof(*ops), GFP_KERNEL); if (!ops) { ret = -ENOMEM; goto out_free; } for (i = 0; i < num_ops; i++) ops[i].priv = new_table; new_table->ops = ops; ret = nf_register_net_hooks(net, ops, num_ops); if (ret != 0) goto out_free; return ret; out_free: __ipt_unregister_table(net, new_table); return ret; } void ipt_unregister_table_pre_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); if (table) nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks)); } void ipt_unregister_table_exit(struct net *net, const char *name) { struct xt_table *table = xt_find_table(net, NFPROTO_IPV4, name); if (table) __ipt_unregister_table(net, table); } static struct xt_target ipt_builtin_tg[] __read_mostly = { { .name = XT_STANDARD_TARGET, .targetsize = sizeof(int), .family = NFPROTO_IPV4, #ifdef CONFIG_NETFILTER_XTABLES_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, .compat_to_user = compat_standard_to_user, #endif }, { .name = XT_ERROR_TARGET, .target = ipt_error, .targetsize = XT_FUNCTION_MAXNAMELEN, .family = NFPROTO_IPV4, }, }; static struct nf_sockopt_ops ipt_sockopts = { .pf = PF_INET, .set_optmin = IPT_BASE_CTL, .set_optmax = IPT_SO_SET_MAX+1, .set = do_ipt_set_ctl, .get_optmin = IPT_BASE_CTL, .get_optmax = IPT_SO_GET_MAX+1, .get = do_ipt_get_ctl, .owner = THIS_MODULE, }; static int __net_init ip_tables_net_init(struct net *net) { return xt_proto_init(net, NFPROTO_IPV4); } static void __net_exit ip_tables_net_exit(struct net *net) { xt_proto_fini(net, NFPROTO_IPV4); } static struct pernet_operations ip_tables_net_ops = { .init = ip_tables_net_init, .exit = ip_tables_net_exit, }; static int __init ip_tables_init(void) { int ret; ret = register_pernet_subsys(&ip_tables_net_ops); if (ret < 0) goto err1; /* No one else will be downing sem now, so we won't sleep */ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); if (ret < 0) goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); if (ret < 0) goto err4; return 0; err4: xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); err2: unregister_pernet_subsys(&ip_tables_net_ops); err1: return ret; } static void __exit ip_tables_fini(void) { nf_unregister_sockopt(&ipt_sockopts); xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); unregister_pernet_subsys(&ip_tables_net_ops); } EXPORT_SYMBOL(ipt_register_table); EXPORT_SYMBOL(ipt_unregister_table_pre_exit); EXPORT_SYMBOL(ipt_unregister_table_exit); EXPORT_SYMBOL(ipt_do_table); module_init(ip_tables_init); module_exit(ip_tables_fini);
286 285 287 286 287 285 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 /* * kmod - the kernel module loader * * Copyright (C) 2023 Luis Chamberlain <mcgrof@kernel.org> */ #include <linux/module.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/binfmts.h> #include <linux/syscalls.h> #include <linux/unistd.h> #include <linux/kmod.h> #include <linux/slab.h> #include <linux/completion.h> #include <linux/cred.h> #include <linux/file.h> #include <linux/workqueue.h> #include <linux/security.h> #include <linux/mount.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/resource.h> #include <linux/notifier.h> #include <linux/suspend.h> #include <linux/rwsem.h> #include <linux/ptrace.h> #include <linux/async.h> #include <linux/uaccess.h> #include <trace/events/module.h> #include "internal.h" /* * Assuming: * * threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, * (u64) THREAD_SIZE * 8UL); * * If you need less than 50 threads would mean we're dealing with systems * smaller than 3200 pages. This assumes you are capable of having ~13M memory, * and this would only be an upper limit, after which the OOM killer would take * effect. Systems like these are very unlikely if modules are enabled. */ #define MAX_KMOD_CONCURRENT 50 static DEFINE_SEMAPHORE(kmod_concurrent_max, MAX_KMOD_CONCURRENT); /* * This is a restriction on having *all* MAX_KMOD_CONCURRENT threads * running at the same time without returning. When this happens we * believe you've somehow ended up with a recursive module dependency * creating a loop. * * We have no option but to fail. * * Userspace should proactively try to detect and prevent these. */ #define MAX_KMOD_ALL_BUSY_TIMEOUT 5 /* modprobe_path is set via /proc/sys. */ char modprobe_path[KMOD_PATH_LEN] = CONFIG_MODPROBE_PATH; static void free_modprobe_argv(struct subprocess_info *info) { kfree(info->argv[3]); /* check call_modprobe() */ kfree(info->argv); } static int call_modprobe(char *orig_module_name, int wait) { struct subprocess_info *info; static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; char *module_name; int ret; char **argv = kmalloc(sizeof(char *[5]), GFP_KERNEL); if (!argv) goto out; module_name = kstrdup(orig_module_name, GFP_KERNEL); if (!module_name) goto free_argv; argv[0] = modprobe_path; argv[1] = "-q"; argv[2] = "--"; argv[3] = module_name; /* check free_modprobe_argv() */ argv[4] = NULL; info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL, NULL, free_modprobe_argv, NULL); if (!info) goto free_module_name; ret = call_usermodehelper_exec(info, wait | UMH_KILLABLE); kmod_dup_request_announce(orig_module_name, ret); return ret; free_module_name: kfree(module_name); free_argv: kfree(argv); out: kmod_dup_request_announce(orig_module_name, -ENOMEM); return -ENOMEM; } /** * __request_module - try to load a kernel module * @wait: wait (or not) for the operation to complete * @fmt: printf style format string for the name of the module * @...: arguments as specified in the format string * * Load a module using the user mode module loader. The function returns * zero on success or a negative errno code or positive exit code from * "modprobe" on failure. Note that a successful module load does not mean * the module did not then unload and exit on an error of its own. Callers * must check that the service they requested is now available not blindly * invoke it. * * If module auto-loading support is disabled then this function * simply returns -ENOENT. */ int __request_module(bool wait, const char *fmt, ...) { va_list args; char module_name[MODULE_NAME_LEN]; int ret, dup_ret; /* * We don't allow synchronous module loading from async. Module * init may invoke async_synchronize_full() which will end up * waiting for this task which already is waiting for the module * loading to complete, leading to a deadlock. */ WARN_ON_ONCE(wait && current_is_async()); if (!modprobe_path[0]) return -ENOENT; va_start(args, fmt); ret = vsnprintf(module_name, MODULE_NAME_LEN, fmt, args); va_end(args); if (ret >= MODULE_NAME_LEN) return -ENAMETOOLONG; ret = security_kernel_module_request(module_name); if (ret) return ret; ret = down_timeout(&kmod_concurrent_max, MAX_KMOD_ALL_BUSY_TIMEOUT * HZ); if (ret) { pr_warn_ratelimited("request_module: modprobe %s cannot be processed, kmod busy with %d threads for more than %d seconds now", module_name, MAX_KMOD_CONCURRENT, MAX_KMOD_ALL_BUSY_TIMEOUT); return ret; } trace_module_request(module_name, wait, _RET_IP_); if (kmod_dup_request_exists_wait(module_name, wait, &dup_ret)) { ret = dup_ret; goto out; } ret = call_modprobe(module_name, wait ? UMH_WAIT_PROC : UMH_WAIT_EXEC); out: up(&kmod_concurrent_max); return ret; } EXPORT_SYMBOL(__request_module);
9 2 7 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 #include <linux/errno.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/socket.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ip.h> #include <net/ip6_fib.h> #include <net/lwtunnel.h> #include <net/protocol.h> #include <uapi/linux/ila.h> #include "ila.h" void ila_init_saved_csum(struct ila_params *p) { if (!p->locator_match.v64) return; p->csum_diff = compute_csum_diff8( (__be32 *)&p->locator, (__be32 *)&p->locator_match); } static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p) { if (p->locator_match.v64) return p->csum_diff; else return compute_csum_diff8((__be32 *)&p->locator, (__be32 *)&iaddr->loc); } static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) { return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p); } static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr, struct ila_params *p) { __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff, fval; diff = get_csum_diff_iaddr(iaddr, p); fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); diff = csum_add(diff, fval); *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); /* Flip the csum-neutral bit. Either we are doing a SIR->ILA * translation with ILA_CSUM_NEUTRAL_MAP as the csum_method * and the C-bit is not set, or we are doing an ILA-SIR * tranlsation and the C-bit is set. */ iaddr->ident.csum_neutral ^= 1; } static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr, struct ila_params *p) { __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff; diff = get_csum_diff_iaddr(iaddr, p); *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); } static void ila_csum_adjust_transport(struct sk_buff *skb, struct ila_params *p) { size_t nhoff = sizeof(struct ipv6hdr); struct ipv6hdr *ip6h = ipv6_hdr(skb); __wsum diff; switch (ip6h->nexthdr) { case NEXTHDR_TCP: if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) { struct tcphdr *th = (struct tcphdr *) (skb_network_header(skb) + nhoff); diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&th->check, skb, diff, true); } break; case NEXTHDR_UDP: if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) { struct udphdr *uh = (struct udphdr *) (skb_network_header(skb) + nhoff); if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&uh->check, skb, diff, true); if (!uh->check) uh->check = CSUM_MANGLED_0; } } break; case NEXTHDR_ICMP: if (likely(pskb_may_pull(skb, nhoff + sizeof(struct icmp6hdr)))) { struct icmp6hdr *ih = (struct icmp6hdr *) (skb_network_header(skb) + nhoff); diff = get_csum_diff(ip6h, p); inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, diff, true); } break; } } void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, bool sir2ila) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); switch (p->csum_mode) { case ILA_CSUM_ADJUST_TRANSPORT: ila_csum_adjust_transport(skb, p); break; case ILA_CSUM_NEUTRAL_MAP: if (sir2ila) { if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) { /* Checksum flag should never be * set in a formatted SIR address. */ break; } } else if (!ila_csum_neutral_set(iaddr->ident)) { /* ILA to SIR translation and C-bit isn't * set so we're good. */ break; } ila_csum_do_neutral_fmt(iaddr, p); break; case ILA_CSUM_NEUTRAL_MAP_AUTO: ila_csum_do_neutral_nofmt(iaddr, p); break; case ILA_CSUM_NO_ACTION: break; } /* Now change destination address */ iaddr->loc = p->locator; }
5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0-only /* * linux/lib/crc-ccitt.c */ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-ccitt.h> /* * This mysterious table is just the CRC of each possible byte. It can be * computed using the standard bit-at-a-time methods. The polynomial can * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. * Add the implicit x^16, and you have the standard CRC-CCITT. */ u16 const crc_ccitt_table[256] = { 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 }; EXPORT_SYMBOL(crc_ccitt_table); /** * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer */ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) { while (len--) crc = crc_ccitt_byte(crc, *buffer++); return crc; } EXPORT_SYMBOL(crc_ccitt); MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL");
35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com> */ #ifndef __IPVLAN_H #define __IPVLAN_H #include <linux/kernel.h> #include <linux/types.h> #include <linux/module.h> #include <linux/init.h> #include <linux/rculist.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/if_link.h> #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/inetdevice.h> #include <linux/netfilter.h> #include <net/ip.h> #include <net/ip6_route.h> #include <net/netns/generic.h> #include <net/rtnetlink.h> #include <net/route.h> #include <net/addrconf.h> #include <net/l3mdev.h> #define IPVLAN_DRV "ipvlan" #define IPV_DRV_VER "0.1" #define IPVLAN_HASH_SIZE (1 << BITS_PER_BYTE) #define IPVLAN_HASH_MASK (IPVLAN_HASH_SIZE - 1) #define IPVLAN_MAC_FILTER_BITS 8 #define IPVLAN_MAC_FILTER_SIZE (1 << IPVLAN_MAC_FILTER_BITS) #define IPVLAN_MAC_FILTER_MASK (IPVLAN_MAC_FILTER_SIZE - 1) #define IPVLAN_QBACKLOG_LIMIT 1000 typedef enum { IPVL_IPV6 = 0, IPVL_ICMPV6, IPVL_IPV4, IPVL_ARP, } ipvl_hdr_type; struct ipvl_pcpu_stats { u64_stats_t rx_pkts; u64_stats_t rx_bytes; u64_stats_t rx_mcast; u64_stats_t tx_pkts; u64_stats_t tx_bytes; struct u64_stats_sync syncp; u32 rx_errs; u32 tx_drps; }; struct ipvl_port; struct ipvl_dev { struct net_device *dev; struct list_head pnode; struct ipvl_port *port; struct net_device *phy_dev; struct list_head addrs; struct ipvl_pcpu_stats __percpu *pcpu_stats; DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE); netdev_features_t sfeatures; u32 msg_enable; spinlock_t addrs_lock; }; struct ipvl_addr { struct ipvl_dev *master; /* Back pointer to master */ union { struct in6_addr ip6; /* IPv6 address on logical interface */ struct in_addr ip4; /* IPv4 address on logical interface */ } ipu; #define ip6addr ipu.ip6 #define ip4addr ipu.ip4 struct hlist_node hlnode; /* Hash-table linkage */ struct list_head anode; /* logical-interface linkage */ ipvl_hdr_type atype; struct rcu_head rcu; }; struct ipvl_port { struct net_device *dev; possible_net_t pnet; struct hlist_head hlhead[IPVLAN_HASH_SIZE]; struct list_head ipvlans; u16 mode; u16 flags; u16 dev_id_start; struct work_struct wq; struct sk_buff_head backlog; int count; struct ida ida; netdevice_tracker dev_tracker; }; struct ipvl_skb_cb { bool tx_pkt; }; #define IPVL_SKB_CB(_skb) ((struct ipvl_skb_cb *)&((_skb)->cb[0])) static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d) { return rcu_dereference(d->rx_handler_data); } static inline struct ipvl_port *ipvlan_port_get_rcu_bh(const struct net_device *d) { return rcu_dereference_bh(d->rx_handler_data); } static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d) { return rtnl_dereference(d->rx_handler_data); } static inline bool ipvlan_is_private(const struct ipvl_port *port) { return !!(port->flags & IPVLAN_F_PRIVATE); } static inline void ipvlan_mark_private(struct ipvl_port *port) { port->flags |= IPVLAN_F_PRIVATE; } static inline void ipvlan_clear_private(struct ipvl_port *port) { port->flags &= ~IPVLAN_F_PRIVATE; } static inline bool ipvlan_is_vepa(const struct ipvl_port *port) { return !!(port->flags & IPVLAN_F_VEPA); } static inline void ipvlan_mark_vepa(struct ipvl_port *port) { port->flags |= IPVLAN_F_VEPA; } static inline void ipvlan_clear_vepa(struct ipvl_port *port) { port->flags &= ~IPVLAN_F_VEPA; } void ipvlan_init_secret(void); unsigned int ipvlan_mac_hash(const unsigned char *addr); rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb); void ipvlan_process_multicast(struct work_struct *work); int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev); void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr); struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port, void *lyr3h, int addr_type, bool use_dest); void *ipvlan_get_L3_hdr(struct ipvl_port *port, struct sk_buff *skb, int *type); void ipvlan_count_rx(const struct ipvl_dev *ipvlan, unsigned int len, bool success, bool mcast); int ipvlan_link_new(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack); void ipvlan_link_delete(struct net_device *dev, struct list_head *head); void ipvlan_link_setup(struct net_device *dev); int ipvlan_link_register(struct rtnl_link_ops *ops); #ifdef CONFIG_IPVLAN_L3S int ipvlan_l3s_register(struct ipvl_port *port); void ipvlan_l3s_unregister(struct ipvl_port *port); void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet); int ipvlan_l3s_init(void); void ipvlan_l3s_cleanup(void); #else static inline int ipvlan_l3s_register(struct ipvl_port *port) { return -ENOTSUPP; } static inline void ipvlan_l3s_unregister(struct ipvl_port *port) { } static inline void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet) { } static inline int ipvlan_l3s_init(void) { return 0; } static inline void ipvlan_l3s_cleanup(void) { } #endif /* CONFIG_IPVLAN_L3S */ static inline bool netif_is_ipvlan_port(const struct net_device *dev) { return rcu_access_pointer(dev->rx_handler) == ipvlan_handle_frame; } #endif /* __IPVLAN_H */
12782 12788 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 // SPDX-License-Identifier: GPL-2.0-only /* * x86 APERF/MPERF KHz calculation for * /sys/.../cpufreq/scaling_cur_freq * * Copyright (C) 2017 Intel Corp. * Author: Len Brown <len.brown@intel.com> */ #include <linux/cpufreq.h> #include <linux/delay.h> #include <linux/ktime.h> #include <linux/math64.h> #include <linux/percpu.h> #include <linux/rcupdate.h> #include <linux/sched/isolation.h> #include <linux/sched/topology.h> #include <linux/smp.h> #include <linux/syscore_ops.h> #include <asm/cpu.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "cpu.h" struct aperfmperf { seqcount_t seq; unsigned long last_update; u64 acnt; u64 mcnt; u64 aperf; u64 mperf; }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct aperfmperf, cpu_samples) = { .seq = SEQCNT_ZERO(cpu_samples.seq) }; static void init_counter_refs(void) { u64 aperf, mperf; rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); this_cpu_write(cpu_samples.aperf, aperf); this_cpu_write(cpu_samples.mperf, mperf); } #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) /* * APERF/MPERF frequency ratio computation. * * The scheduler wants to do frequency invariant accounting and needs a <1 * ratio to account for the 'current' frequency, corresponding to * freq_curr / freq_max. * * Since the frequency freq_curr on x86 is controlled by micro-controller and * our P-state setting is little more than a request/hint, we need to observe * the effective frequency 'BusyMHz', i.e. the average frequency over a time * interval after discarding idle time. This is given by: * * BusyMHz = delta_APERF / delta_MPERF * freq_base * * where freq_base is the max non-turbo P-state. * * The freq_max term has to be set to a somewhat arbitrary value, because we * can't know which turbo states will be available at a given point in time: * it all depends on the thermal headroom of the entire package. We set it to * the turbo level with 4 cores active. * * Benchmarks show that's a good compromise between the 1C turbo ratio * (freq_curr/freq_max would rarely reach 1) and something close to freq_base, * which would ignore the entire turbo range (a conspicuous part, making * freq_curr/freq_max always maxed out). * * An exception to the heuristic above is the Atom uarch, where we choose the * highest turbo level for freq_max since Atom's are generally oriented towards * power efficiency. * * Setting freq_max to anything less than the 1C turbo ratio makes the ratio * freq_curr / freq_max to eventually grow >1, in which case we clip it to 1. */ DEFINE_STATIC_KEY_FALSE(arch_scale_freq_key); static u64 arch_turbo_freq_ratio = SCHED_CAPACITY_SCALE; static u64 arch_max_freq_ratio = SCHED_CAPACITY_SCALE; void arch_set_max_freq_ratio(bool turbo_disabled) { arch_max_freq_ratio = turbo_disabled ? SCHED_CAPACITY_SCALE : arch_turbo_freq_ratio; } EXPORT_SYMBOL_GPL(arch_set_max_freq_ratio); static bool __init turbo_disabled(void) { u64 misc_en; int err; err = rdmsrl_safe(MSR_IA32_MISC_ENABLE, &misc_en); if (err) return false; return (misc_en & MSR_IA32_MISC_ENABLE_TURBO_DISABLE); } static bool __init slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) { int err; err = rdmsrl_safe(MSR_ATOM_CORE_RATIOS, base_freq); if (err) return false; err = rdmsrl_safe(MSR_ATOM_CORE_TURBO_RATIOS, turbo_freq); if (err) return false; *base_freq = (*base_freq >> 16) & 0x3F; /* max P state */ *turbo_freq = *turbo_freq & 0x3F; /* 1C turbo */ return true; } #define X86_MATCH(vfm) \ X86_MATCH_VFM_FEATURE(vfm, X86_FEATURE_APERFMPERF, NULL) static const struct x86_cpu_id has_knl_turbo_ratio_limits[] __initconst = { X86_MATCH(INTEL_XEON_PHI_KNL), X86_MATCH(INTEL_XEON_PHI_KNM), {} }; static const struct x86_cpu_id has_skx_turbo_ratio_limits[] __initconst = { X86_MATCH(INTEL_SKYLAKE_X), {} }; static const struct x86_cpu_id has_glm_turbo_ratio_limits[] __initconst = { X86_MATCH(INTEL_ATOM_GOLDMONT), X86_MATCH(INTEL_ATOM_GOLDMONT_D), X86_MATCH(INTEL_ATOM_GOLDMONT_PLUS), {} }; static bool __init knl_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int num_delta_fratio) { int fratio, delta_fratio, found; int err, i; u64 msr; err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); if (err) return false; *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); if (err) return false; fratio = (msr >> 8) & 0xFF; i = 16; found = 0; do { if (found >= num_delta_fratio) { *turbo_freq = fratio; return true; } delta_fratio = (msr >> (i + 5)) & 0x7; if (delta_fratio) { found += 1; fratio -= delta_fratio; } i += 8; } while (i < 64); return true; } static bool __init skx_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq, int size) { u64 ratios, counts; u32 group_size; int err, i; err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); if (err) return false; *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &ratios); if (err) return false; err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT1, &counts); if (err) return false; for (i = 0; i < 64; i += 8) { group_size = (counts >> i) & 0xFF; if (group_size >= size) { *turbo_freq = (ratios >> i) & 0xFF; return true; } } return false; } static bool __init core_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) { u64 msr; int err; err = rdmsrl_safe(MSR_PLATFORM_INFO, base_freq); if (err) return false; err = rdmsrl_safe(MSR_TURBO_RATIO_LIMIT, &msr); if (err) return false; *base_freq = (*base_freq >> 8) & 0xFF; /* max P state */ *turbo_freq = (msr >> 24) & 0xFF; /* 4C turbo */ /* The CPU may have less than 4 cores */ if (!*turbo_freq) *turbo_freq = msr & 0xFF; /* 1C turbo */ return true; } static bool __init intel_set_max_freq_ratio(void) { u64 base_freq, turbo_freq; u64 turbo_ratio; if (slv_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out; if (x86_match_cpu(has_glm_turbo_ratio_limits) && skx_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) goto out; if (x86_match_cpu(has_knl_turbo_ratio_limits) && knl_set_max_freq_ratio(&base_freq, &turbo_freq, 1)) goto out; if (x86_match_cpu(has_skx_turbo_ratio_limits) && skx_set_max_freq_ratio(&base_freq, &turbo_freq, 4)) goto out; if (core_set_max_freq_ratio(&base_freq, &turbo_freq)) goto out; return false; out: /* * Some hypervisors advertise X86_FEATURE_APERFMPERF * but then fill all MSR's with zeroes. * Some CPUs have turbo boost but don't declare any turbo ratio * in MSR_TURBO_RATIO_LIMIT. */ if (!base_freq || !turbo_freq) { pr_debug("Couldn't determine cpu base or turbo frequency, necessary for scale-invariant accounting.\n"); return false; } turbo_ratio = div_u64(turbo_freq * SCHED_CAPACITY_SCALE, base_freq); if (!turbo_ratio) { pr_debug("Non-zero turbo and base frequencies led to a 0 ratio.\n"); return false; } arch_turbo_freq_ratio = turbo_ratio; arch_set_max_freq_ratio(turbo_disabled()); return true; } #ifdef CONFIG_PM_SLEEP static struct syscore_ops freq_invariance_syscore_ops = { .resume = init_counter_refs, }; static void register_freq_invariance_syscore_ops(void) { register_syscore_ops(&freq_invariance_syscore_ops); } #else static inline void register_freq_invariance_syscore_ops(void) {} #endif static void freq_invariance_enable(void) { if (static_branch_unlikely(&arch_scale_freq_key)) { WARN_ON_ONCE(1); return; } static_branch_enable_cpuslocked(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { arch_turbo_freq_ratio = ratio; arch_set_max_freq_ratio(turbo_disabled); freq_invariance_enable(); } static void __init bp_init_freq_invariance(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; if (intel_set_max_freq_ratio()) { guard(cpus_read_lock)(); freq_invariance_enable(); } } static void disable_freq_invariance_workfn(struct work_struct *work) { int cpu; static_branch_disable(&arch_scale_freq_key); /* * Set arch_freq_scale to a default value on all cpus * This negates the effect of scaling */ for_each_possible_cpu(cpu) per_cpu(arch_freq_scale, cpu) = SCHED_CAPACITY_SCALE; } static DECLARE_WORK(disable_freq_invariance_work, disable_freq_invariance_workfn); DEFINE_PER_CPU(unsigned long, arch_freq_scale) = SCHED_CAPACITY_SCALE; EXPORT_PER_CPU_SYMBOL_GPL(arch_freq_scale); static DEFINE_STATIC_KEY_FALSE(arch_hybrid_cap_scale_key); struct arch_hybrid_cpu_scale { unsigned long capacity; unsigned long freq_ratio; }; static struct arch_hybrid_cpu_scale __percpu *arch_cpu_scale; /** * arch_enable_hybrid_capacity_scale() - Enable hybrid CPU capacity scaling * * Allocate memory for per-CPU data used by hybrid CPU capacity scaling, * initialize it and set the static key controlling its code paths. * * Must be called before arch_set_cpu_capacity(). */ bool arch_enable_hybrid_capacity_scale(void) { int cpu; if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) { WARN_ONCE(1, "Hybrid CPU capacity scaling already enabled"); return true; } arch_cpu_scale = alloc_percpu(struct arch_hybrid_cpu_scale); if (!arch_cpu_scale) return false; for_each_possible_cpu(cpu) { per_cpu_ptr(arch_cpu_scale, cpu)->capacity = SCHED_CAPACITY_SCALE; per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio = arch_max_freq_ratio; } static_branch_enable(&arch_hybrid_cap_scale_key); pr_info("Hybrid CPU capacity scaling enabled\n"); return true; } /** * arch_set_cpu_capacity() - Set scale-invariance parameters for a CPU * @cpu: Target CPU. * @cap: Capacity of @cpu at its maximum frequency, relative to @max_cap. * @max_cap: System-wide maximum CPU capacity. * @cap_freq: Frequency of @cpu corresponding to @cap. * @base_freq: Frequency of @cpu at which MPERF counts. * * The units in which @cap and @max_cap are expressed do not matter, so long * as they are consistent, because the former is effectively divided by the * latter. Analogously for @cap_freq and @base_freq. * * After calling this function for all CPUs, call arch_rebuild_sched_domains() * to let the scheduler know that capacity-aware scheduling can be used going * forward. */ void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap, unsigned long cap_freq, unsigned long base_freq) { if (static_branch_likely(&arch_hybrid_cap_scale_key)) { WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity, div_u64(cap << SCHED_CAPACITY_SHIFT, max_cap)); WRITE_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->freq_ratio, div_u64(cap_freq << SCHED_CAPACITY_SHIFT, base_freq)); } else { WARN_ONCE(1, "Hybrid CPU capacity scaling not enabled"); } } unsigned long arch_scale_cpu_capacity(int cpu) { if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) return READ_ONCE(per_cpu_ptr(arch_cpu_scale, cpu)->capacity); return SCHED_CAPACITY_SCALE; } EXPORT_SYMBOL_GPL(arch_scale_cpu_capacity); static void scale_freq_tick(u64 acnt, u64 mcnt) { u64 freq_scale, freq_ratio; if (!arch_scale_freq_invariant()) return; if (check_shl_overflow(acnt, 2*SCHED_CAPACITY_SHIFT, &acnt)) goto error; if (static_branch_unlikely(&arch_hybrid_cap_scale_key)) freq_ratio = READ_ONCE(this_cpu_ptr(arch_cpu_scale)->freq_ratio); else freq_ratio = arch_max_freq_ratio; if (check_mul_overflow(mcnt, freq_ratio, &mcnt) || !mcnt) goto error; freq_scale = div64_u64(acnt, mcnt); if (!freq_scale) goto error; if (freq_scale > SCHED_CAPACITY_SCALE) freq_scale = SCHED_CAPACITY_SCALE; this_cpu_write(arch_freq_scale, freq_scale); return; error: pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); schedule_work(&disable_freq_invariance_work); } #else static inline void bp_init_freq_invariance(void) { } static inline void scale_freq_tick(u64 acnt, u64 mcnt) { } #endif /* CONFIG_X86_64 && CONFIG_SMP */ void arch_scale_freq_tick(void) { struct aperfmperf *s = this_cpu_ptr(&cpu_samples); u64 acnt, mcnt, aperf, mperf; if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return; rdmsrl(MSR_IA32_APERF, aperf); rdmsrl(MSR_IA32_MPERF, mperf); acnt = aperf - s->aperf; mcnt = mperf - s->mperf; s->aperf = aperf; s->mperf = mperf; raw_write_seqcount_begin(&s->seq); s->last_update = jiffies; s->acnt = acnt; s->mcnt = mcnt; raw_write_seqcount_end(&s->seq); scale_freq_tick(acnt, mcnt); } /* * Discard samples older than the define maximum sample age of 20ms. There * is no point in sending IPIs in such a case. If the scheduler tick was * not running then the CPU is either idle or isolated. */ #define MAX_SAMPLE_AGE ((unsigned long)HZ / 50) unsigned int arch_freq_get_on_cpu(int cpu) { struct aperfmperf *s = per_cpu_ptr(&cpu_samples, cpu); unsigned int seq, freq; unsigned long last; u64 acnt, mcnt; if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) goto fallback; do { seq = raw_read_seqcount_begin(&s->seq); last = s->last_update; acnt = s->acnt; mcnt = s->mcnt; } while (read_seqcount_retry(&s->seq, seq)); /* * Bail on invalid count and when the last update was too long ago, * which covers idle and NOHZ full CPUs. */ if (!mcnt || (jiffies - last) > MAX_SAMPLE_AGE) goto fallback; return div64_u64((cpu_khz * acnt), mcnt); fallback: freq = cpufreq_quick_get(cpu); return freq ? freq : cpu_khz; } static int __init bp_init_aperfmperf(void) { if (!cpu_feature_enabled(X86_FEATURE_APERFMPERF)) return 0; init_counter_refs(); bp_init_freq_invariance(); return 0; } early_initcall(bp_init_aperfmperf); void ap_init_aperfmperf(void) { if (cpu_feature_enabled(X86_FEATURE_APERFMPERF)) init_counter_refs(); }
38 38 38 38 38 38 28 10 50 50 28 38 38 25 25 25 25 25 28 8 8 3 9 1 8 10 8 4 4 4 1 1 1 1 1 2 1 2 11 1 4 3 4 4 3 3 1 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 // SPDX-License-Identifier: GPL-2.0 /* * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2020-2024 Intel Corporation */ #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/slab.h> #include <linux/export.h> #include <net/cfg80211.h> #include "wext-compat.h" #include "nl80211.h" #include "rdev-ops.h" void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_bss *bss; #ifdef CONFIG_CFG80211_WEXT union iwreq_data wrqu; #endif if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return; if (!wdev->u.ibss.ssid_len) return; bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0, IEEE80211_BSS_TYPE_IBSS, IEEE80211_PRIVACY_ANY); if (WARN_ON(!bss)) return; if (wdev->u.ibss.current_bss) { cfg80211_unhold_bss(wdev->u.ibss.current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub); } cfg80211_hold_bss(bss_from_pub(bss)); wdev->u.ibss.current_bss = bss_from_pub(bss); cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); #endif } void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, struct ieee80211_channel *channel, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_event *ev; unsigned long flags; trace_cfg80211_ibss_joined(dev, bssid, channel); if (WARN_ON(!channel)) return; ev = kzalloc(sizeof(*ev), gfp); if (!ev) return; ev->type = EVENT_IBSS_JOINED; memcpy(ev->ij.bssid, bssid, ETH_ALEN); ev->ij.channel = channel; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); spin_unlock_irqrestore(&wdev->event_lock, flags); queue_work(cfg80211_wq, &rdev->event_work); } EXPORT_SYMBOL(cfg80211_ibss_joined); int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ibss_params *params, struct cfg80211_cached_keys *connkeys) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_held(&rdev->wiphy.mtx); if (wdev->links[0].cac_started) return -EBUSY; if (wdev->u.ibss.ssid_len) return -EALREADY; if (!params->basic_rates) { /* * If no rates were explicitly configured, * use the mandatory rate set for 11b or * 11a for maximum compatibility. */ struct ieee80211_supported_band *sband; enum nl80211_band band; u32 flag; int j; band = params->chandef.chan->band; if (band == NL80211_BAND_5GHZ || band == NL80211_BAND_6GHZ) flag = IEEE80211_RATE_MANDATORY_A; else flag = IEEE80211_RATE_MANDATORY_B; sband = rdev->wiphy.bands[band]; for (j = 0; j < sband->n_bitrates; j++) { if (sband->bitrates[j].flags & flag) params->basic_rates |= BIT(j); } } if (WARN_ON(connkeys && connkeys->def < 0)) return -EINVAL; if (WARN_ON(wdev->connect_keys)) kfree_sensitive(wdev->connect_keys); wdev->connect_keys = connkeys; wdev->u.ibss.chandef = params->chandef; if (connkeys) { params->wep_keys = connkeys->params; params->wep_tx_key = connkeys->def; } #ifdef CONFIG_CFG80211_WEXT wdev->wext.ibss.chandef = params->chandef; #endif err = rdev_join_ibss(rdev, dev, params); if (err) { wdev->connect_keys = NULL; return err; } memcpy(wdev->u.ibss.ssid, params->ssid, params->ssid_len); wdev->u.ibss.ssid_len = params->ssid_len; return 0; } void cfg80211_clear_ibss(struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); int i; lockdep_assert_wiphy(wdev->wiphy); kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; rdev_set_qos_map(rdev, dev, NULL); /* * Delete all the keys ... pairwise keys can't really * exist any more anyway, but default keys might. */ if (rdev->ops->del_key) for (i = 0; i < 6; i++) rdev_del_key(rdev, dev, -1, i, false, NULL); if (wdev->u.ibss.current_bss) { cfg80211_unhold_bss(wdev->u.ibss.current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub); } wdev->u.ibss.current_bss = NULL; wdev->u.ibss.ssid_len = 0; memset(&wdev->u.ibss.chandef, 0, sizeof(wdev->u.ibss.chandef)); #ifdef CONFIG_CFG80211_WEXT if (!nowext) wdev->wext.ibss.ssid_len = 0; #endif cfg80211_sched_dfs_chan_update(rdev); } int cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, struct net_device *dev, bool nowext) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; lockdep_assert_wiphy(wdev->wiphy); if (!wdev->u.ibss.ssid_len) return -ENOLINK; err = rdev_leave_ibss(rdev, dev); if (err) return err; wdev->conn_owner_nlportid = 0; cfg80211_clear_ibss(dev, nowext); return 0; } #ifdef CONFIG_CFG80211_WEXT int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct cfg80211_cached_keys *ck = NULL; enum nl80211_band band; int i, err; lockdep_assert_wiphy(wdev->wiphy); if (!wdev->wext.ibss.beacon_interval) wdev->wext.ibss.beacon_interval = 100; /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { struct ieee80211_channel *new_chan = NULL; for (band = 0; band < NUM_NL80211_BANDS; band++) { struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; sband = rdev->wiphy.bands[band]; if (!sband) continue; for (i = 0; i < sband->n_channels; i++) { chan = &sband->channels[i]; if (chan->flags & IEEE80211_CHAN_NO_IR) continue; if (chan->flags & IEEE80211_CHAN_DISABLED) continue; new_chan = chan; break; } if (new_chan) break; } if (!new_chan) return -EINVAL; cfg80211_chandef_create(&wdev->wext.ibss.chandef, new_chan, NL80211_CHAN_NO_HT); } /* don't join -- SSID is not there */ if (!wdev->wext.ibss.ssid_len) return 0; if (!netif_running(wdev->netdev)) return 0; if (wdev->wext.keys) wdev->wext.keys->def = wdev->wext.default_key; wdev->wext.ibss.privacy = wdev->wext.default_key != -1; if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; for (i = 0; i < 4; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, &wdev->wext.ibss, ck); if (err) kfree(ck); return err; } int cfg80211_ibss_wext_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *wextfreq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ieee80211_channel *chan = NULL; int err, freq; /* call only for ibss! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; freq = cfg80211_wext_freq(wextfreq); if (freq < 0) return freq; if (freq) { chan = ieee80211_get_channel(wdev->wiphy, freq); if (!chan) return -EINVAL; if (chan->flags & IEEE80211_CHAN_NO_IR || chan->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; } if (wdev->wext.ibss.chandef.chan == chan) return 0; err = 0; if (wdev->u.ibss.ssid_len) err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; if (chan) { cfg80211_chandef_create(&wdev->wext.ibss.chandef, chan, NL80211_CHAN_NO_HT); wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ wdev->wext.ibss.channel_fixed = false; } return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_channel *chan = NULL; /* call only for ibss! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; if (wdev->u.ibss.current_bss) chan = wdev->u.ibss.current_bss->pub.channel; else if (wdev->wext.ibss.chandef.chan) chan = wdev->wext.ibss.chandef.chan; if (chan) { freq->m = chan->center_freq; freq->e = 6; return 0; } /* no channel if not joining */ return -EINVAL; } int cfg80211_ibss_wext_siwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); size_t len = data->length; int err; /* call only for ibss! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; err = 0; if (wdev->u.ibss.ssid_len) err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; /* iwconfig uses nul termination in SSID.. */ if (len > 0 && ssid[len - 1] == '\0') len--; memcpy(wdev->u.ibss.ssid, ssid, len); wdev->wext.ibss.ssid = wdev->u.ibss.ssid; wdev->wext.ibss.ssid_len = len; return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwessid(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *ssid) { struct wireless_dev *wdev = dev->ieee80211_ptr; /* call only for ibss! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; data->flags = 0; if (wdev->u.ibss.ssid_len) { data->flags = 1; data->length = wdev->u.ibss.ssid_len; memcpy(ssid, wdev->u.ibss.ssid, data->length); } else if (wdev->wext.ibss.ssid && wdev->wext.ibss.ssid_len) { data->flags = 1; data->length = wdev->wext.ibss.ssid_len; memcpy(ssid, wdev->wext.ibss.ssid, data->length); } return 0; } int cfg80211_ibss_wext_siwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); u8 *bssid = ap_addr->sa_data; int err; /* call only for ibss! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; if (ap_addr->sa_family != ARPHRD_ETHER) return -EINVAL; /* automatic mode */ if (is_zero_ether_addr(bssid) || is_broadcast_ether_addr(bssid)) bssid = NULL; if (bssid && !is_valid_ether_addr(bssid)) return -EINVAL; /* both automatic */ if (!bssid && !wdev->wext.ibss.bssid) return 0; /* fixed already - and no change */ if (wdev->wext.ibss.bssid && bssid && ether_addr_equal(bssid, wdev->wext.ibss.bssid)) return 0; err = 0; if (wdev->u.ibss.ssid_len) err = cfg80211_leave_ibss(rdev, dev, true); if (err) return err; if (bssid) { memcpy(wdev->wext.bssid, bssid, ETH_ALEN); wdev->wext.ibss.bssid = wdev->wext.bssid; } else wdev->wext.ibss.bssid = NULL; return cfg80211_ibss_wext_join(rdev, wdev); } int cfg80211_ibss_wext_giwap(struct net_device *dev, struct iw_request_info *info, struct sockaddr *ap_addr, char *extra) { struct wireless_dev *wdev = dev->ieee80211_ptr; /* call only for ibss! */ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return -EINVAL; ap_addr->sa_family = ARPHRD_ETHER; if (wdev->u.ibss.current_bss) memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid, ETH_ALEN); else if (wdev->wext.ibss.bssid) memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN); else eth_zero_addr(ap_addr->sa_data); return 0; } #endif
58 10 111 111 111 72 8 67 1 3 4 8 3 6 3 3 8 30 16 33 26 3 7 6 46 46 46 46 45 2 44 44 10 34 7 7 7 5 3 4 4 4 3 3 3 1 1 1 2 1 5 71 71 67 68 67 51 16 14 14 14 21 21 14 33 26 6 3 8 2 9 9 1 25 10 19 14 14 14 14 14 14 14 14 14 14 14 14 31 23 5 382 381 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 // SPDX-License-Identifier: GPL-2.0-or-later /* * Extension Header handling for IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Andi Kleen <ak@muc.de> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> */ /* Changes: * yoshfuji : ensure not to overrun while parsing * tlv options. * Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs(). * YOSHIFUJI Hideaki @USAGI Register inbound extension header * handlers as inet6_protocol{}. */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/in6.h> #include <linux/icmpv6.h> #include <linux/slab.h> #include <linux/export.h> #include <net/dst.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/calipso.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/xfrm.h> #endif #include <linux/seg6.h> #include <net/seg6.h> #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif #include <net/rpl.h> #include <linux/ioam6.h> #include <linux/ioam6_genl.h> #include <net/ioam6.h> #include <net/dst_metadata.h> #include <linux/uaccess.h> /********************* Generic functions *********************/ /* An unknown option is detected, decide what to do */ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, bool disallow_unknowns) { if (disallow_unknowns) { /* If unknown TLVs are disallowed by configuration * then always silently drop packet. Note this also * means no ICMP parameter problem is sent which * could be a good property to mitigate a reflection DOS * attack. */ goto drop; } switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return true; case 1: /* drop packet */ break; case 3: /* Send ICMP if not a multicast address and drop packet */ /* Actually, it is redundant check. icmp_send will recheck in any case. */ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) break; fallthrough; case 2: /* send ICMP PARM PROB regardless and drop packet */ icmpv6_param_prob_reason(skb, ICMPV6_UNK_OPTION, optoff, SKB_DROP_REASON_UNHANDLED_PROTO); return false; } drop: kfree_skb_reason(skb, SKB_DROP_REASON_UNHANDLED_PROTO); return false; } static bool ipv6_hop_ra(struct sk_buff *skb, int optoff); static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff); static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff); static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff); #if IS_ENABLED(CONFIG_IPV6_MIP6) static bool ipv6_dest_hao(struct sk_buff *skb, int optoff); #endif /* Parse tlv encoded option header (hop-by-hop or destination) */ static bool ip6_parse_tlv(bool hopbyhop, struct sk_buff *skb, int max_count) { int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); bool disallow_unknowns = false; int tlv_count = 0; int padlen = 0; if (unlikely(max_count < 0)) { disallow_unknowns = true; max_count = -max_count; } off += 2; len -= 2; while (len > 0) { int optlen, i; if (nh[off] == IPV6_TLV_PAD1) { padlen++; if (padlen > 7) goto bad; off++; len--; continue; } if (len < 2) goto bad; optlen = nh[off + 1] + 2; if (optlen > len) goto bad; if (nh[off] == IPV6_TLV_PADN) { /* RFC 2460 states that the purpose of PadN is * to align the containing header to multiples * of 8. 7 is therefore the highest valid value. * See also RFC 4942, Section 2.1.9.5. */ padlen += optlen; if (padlen > 7) goto bad; /* RFC 4942 recommends receiving hosts to * actively check PadN payload to contain * only zeroes. */ for (i = 2; i < optlen; i++) { if (nh[off + i] != 0) goto bad; } } else { tlv_count++; if (tlv_count > max_count) goto bad; if (hopbyhop) { switch (nh[off]) { case IPV6_TLV_ROUTERALERT: if (!ipv6_hop_ra(skb, off)) return false; break; case IPV6_TLV_IOAM: if (!ipv6_hop_ioam(skb, off)) return false; nh = skb_network_header(skb); break; case IPV6_TLV_JUMBO: if (!ipv6_hop_jumbo(skb, off)) return false; break; case IPV6_TLV_CALIPSO: if (!ipv6_hop_calipso(skb, off)) return false; break; default: if (!ip6_tlvopt_unknown(skb, off, disallow_unknowns)) return false; break; } } else { switch (nh[off]) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_TLV_HAO: if (!ipv6_dest_hao(skb, off)) return false; break; #endif default: if (!ip6_tlvopt_unknown(skb, off, disallow_unknowns)) return false; break; } } padlen = 0; } off += optlen; len -= optlen; } if (len == 0) return true; bad: kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } /***************************** Destination options header. *****************************/ #if IS_ENABLED(CONFIG_IPV6_MIP6) static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) { struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); SKB_DR(reason); int ret; if (opt->dsthao) { net_dbg_ratelimited("hao duplicated\n"); goto discard; } opt->dsthao = opt->dst1; opt->dst1 = 0; hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); if (hao->length != 16) { net_dbg_ratelimited("hao invalid option length = %d\n", hao->length); SKB_DR_SET(reason, IP_INHDR); goto discard; } if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { net_dbg_ratelimited("hao is not an unicast addr: %pI6\n", &hao->addr); SKB_DR_SET(reason, INVALID_PROTO); goto discard; } ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); if (unlikely(ret < 0)) { SKB_DR_SET(reason, XFRM_POLICY); goto discard; } if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) goto discard; /* update all variable using below by copied skbuff */ hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + optoff); ipv6h = ipv6_hdr(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; swap(ipv6h->saddr, hao->addr); if (skb->tstamp == 0) __net_timestamp(skb); return true; discard: kfree_skb_reason(skb, reason); return false; } #endif static int ipv6_destopt_rcv(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); struct inet6_skb_parm *opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) __u16 dstbuf; #endif struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(skb->dev); int extlen; if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { __IP6_INC_STATS(dev_net(dst->dev), idev, IPSTATS_MIB_INHDRERRORS); fail_and_free: kfree_skb(skb); return -1; } extlen = (skb_transport_header(skb)[1] + 1) << 3; if (extlen > net->ipv6.sysctl.max_dst_opts_len) goto fail_and_free; opt->lastopt = opt->dst1 = skb_network_header_len(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) dstbuf = opt->dst1; #endif if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) opt->nhoff = dstbuf; #else opt->nhoff = opt->dst1; #endif return 1; } __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); return -1; } static void seg6_update_csum(struct sk_buff *skb) { struct ipv6_sr_hdr *hdr; struct in6_addr *addr; __be32 from, to; /* srh is at transport offset and seg_left is already decremented * but daddr is not yet updated with next segment */ hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); addr = hdr->segments + hdr->segments_left; hdr->segments_left++; from = *(__be32 *)hdr; hdr->segments_left--; to = *(__be32 *)hdr; /* update skb csum with diff resulting from seg_left decrement */ update_csum_diff4(skb, from, to); /* compute csum diff between current and next segment and update */ update_csum_diff16(skb, (__be32 *)(&ipv6_hdr(skb)->daddr), (__be32 *)addr); } static int ipv6_srh_rcv(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(skb->dev); struct ipv6_sr_hdr *hdr; struct inet6_dev *idev; struct in6_addr *addr; int accept_seg6; hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); idev = __in6_dev_get(skb->dev); accept_seg6 = min(READ_ONCE(net->ipv6.devconf_all->seg6_enabled), READ_ONCE(idev->cnf.seg6_enabled)); if (!accept_seg6) { kfree_skb(skb); return -1; } #ifdef CONFIG_IPV6_SEG6_HMAC if (!seg6_hmac_validate_skb(skb)) { kfree_skb(skb); return -1; } #endif looped_back: if (hdr->segments_left == 0) { if (hdr->nexthdr == NEXTHDR_IPV6 || hdr->nexthdr == NEXTHDR_IPV4) { int offset = (hdr->hdrlen + 1) << 3; skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull(skb, offset); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->encapsulation = 0; if (hdr->nexthdr == NEXTHDR_IPV4) skb->protocol = htons(ETH_P_IP); __skb_tunnel_rx(skb, skb->dev, net); netif_rx(skb); return -1; } opt->srcrt = skb_network_header_len(skb); opt->lastopt = opt->srcrt; skb->transport_header += (hdr->hdrlen + 1) << 3; opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); return 1; } if (hdr->segments_left >= (hdr->hdrlen >> 1)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); return -1; } if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb); } hdr->segments_left--; addr = hdr->segments + hdr->segments_left; skb_push(skb, sizeof(struct ipv6hdr)); if (skb->ip_summed == CHECKSUM_COMPLETE) seg6_update_csum(skb); ipv6_hdr(skb)->daddr = *addr; ip6_route_input(skb); if (skb_dst(skb)->error) { dst_input(skb); return -1; } if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); return -1; } ipv6_hdr(skb)->hop_limit--; skb_pull(skb, sizeof(struct ipv6hdr)); goto looped_back; } dst_input(skb); return -1; } static int ipv6_rpl_srh_rcv(struct sk_buff *skb) { struct ipv6_rpl_sr_hdr *hdr, *ohdr, *chdr; struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(skb->dev); struct inet6_dev *idev; struct ipv6hdr *oldhdr; unsigned char *buf; int accept_rpl_seg; int i, err; u64 n = 0; u32 r; idev = __in6_dev_get(skb->dev); accept_rpl_seg = net->ipv6.devconf_all->rpl_seg_enabled; if (accept_rpl_seg > idev->cnf.rpl_seg_enabled) accept_rpl_seg = idev->cnf.rpl_seg_enabled; if (!accept_rpl_seg) { kfree_skb(skb); return -1; } looped_back: hdr = (struct ipv6_rpl_sr_hdr *)skb_transport_header(skb); if (hdr->segments_left == 0) { if (hdr->nexthdr == NEXTHDR_IPV6) { int offset = (hdr->hdrlen + 1) << 3; skb_postpull_rcsum(skb, skb_network_header(skb), skb_network_header_len(skb)); skb_pull(skb, offset); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->encapsulation = 0; __skb_tunnel_rx(skb, skb->dev, net); netif_rx(skb); return -1; } opt->srcrt = skb_network_header_len(skb); opt->lastopt = opt->srcrt; skb->transport_header += (hdr->hdrlen + 1) << 3; opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); return 1; } n = (hdr->hdrlen << 3) - hdr->pad - (16 - hdr->cmpre); r = do_div(n, (16 - hdr->cmpri)); /* checks if calculation was without remainder and n fits into * unsigned char which is segments_left field. Should not be * higher than that. */ if (r || (n + 1) > 255) { kfree_skb(skb); return -1; } if (hdr->segments_left > n + 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); return -1; } hdr->segments_left--; i = n - hdr->segments_left; buf = kcalloc(struct_size(hdr, segments.addr, n + 2), 2, GFP_ATOMIC); if (unlikely(!buf)) { kfree_skb(skb); return -1; } ohdr = (struct ipv6_rpl_sr_hdr *)buf; ipv6_rpl_srh_decompress(ohdr, hdr, &ipv6_hdr(skb)->daddr, n); chdr = (struct ipv6_rpl_sr_hdr *)(buf + ((ohdr->hdrlen + 1) << 3)); if (ipv6_addr_is_multicast(&ohdr->rpl_segaddr[i])) { kfree_skb(skb); kfree(buf); return -1; } err = ipv6_chk_rpl_srh_loop(net, ohdr->rpl_segaddr, n + 1); if (err) { icmpv6_send(skb, ICMPV6_PARAMPROB, 0, 0); kfree_skb(skb); kfree(buf); return -1; } swap(ipv6_hdr(skb)->daddr, ohdr->rpl_segaddr[i]); ipv6_rpl_srh_compress(chdr, ohdr, &ipv6_hdr(skb)->daddr, n); oldhdr = ipv6_hdr(skb); skb_pull(skb, ((hdr->hdrlen + 1) << 3)); skb_postpull_rcsum(skb, oldhdr, sizeof(struct ipv6hdr) + ((hdr->hdrlen + 1) << 3)); if (unlikely(!hdr->segments_left)) { if (pskb_expand_head(skb, sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3), 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); kfree(buf); return -1; } oldhdr = ipv6_hdr(skb); } skb_push(skb, ((chdr->hdrlen + 1) << 3) + sizeof(struct ipv6hdr)); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); memmove(ipv6_hdr(skb), oldhdr, sizeof(struct ipv6hdr)); memcpy(skb_transport_header(skb), chdr, (chdr->hdrlen + 1) << 3); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, ipv6_hdr(skb), sizeof(struct ipv6hdr) + ((chdr->hdrlen + 1) << 3)); kfree(buf); ip6_route_input(skb); if (skb_dst(skb)->error) { dst_input(skb); return -1; } if (skb_dst(skb)->dev->flags & IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); return -1; } ipv6_hdr(skb)->hop_limit--; skb_pull(skb, sizeof(struct ipv6hdr)); goto looped_back; } dst_input(skb); return -1; } /******************************** Routing header. ********************************/ /* called with rcu_read_lock() */ static int ipv6_rthdr_rcv(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; struct net *net = dev_net(skb->dev); int accept_source_route; accept_source_route = READ_ONCE(net->ipv6.devconf_all->accept_source_route); if (idev) accept_source_route = min(accept_source_route, READ_ONCE(idev->cnf.accept_source_route)); if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } switch (hdr->type) { case IPV6_SRCRT_TYPE_4: /* segment routing */ return ipv6_srh_rcv(skb); case IPV6_SRCRT_TYPE_3: /* rpl segment routing */ return ipv6_rpl_srh_rcv(skb); default: break; } looped_back: if (hdr->segments_left == 0) { switch (hdr->type) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: /* Silently discard type 2 header unless it was * processed by own */ if (!addr) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } break; #endif default: break; } opt->lastopt = opt->srcrt = skb_network_header_len(skb); skb->transport_header += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; opt->dst1 = 0; opt->nhoff = (&hdr->nexthdr) - skb_network_header(skb); return 1; } switch (hdr->type) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (accept_source_route < 0) goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; } break; #endif default: goto unknown_rh; } /* * This is the routing header forwarding algorithm from * RFC 2460, page 16. */ n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - skb_network_header(skb))); return -1; } /* We are about to mangle packet header. Be careful! Do not damage packets queued somewhere. */ if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); } if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; i = n - --hdr->segments_left; rthdr = (struct rt0_hdr *) hdr; addr = rthdr->addr; addr += i - 1; switch (hdr->type) { #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPV6_SRCRT_TYPE_2: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(dev_net(skb_dst(skb)->dev), addr)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } break; #endif default: break; } if (ipv6_addr_is_multicast(addr)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } swap(*addr, ipv6_hdr(skb)->daddr); ip6_route_input(skb); if (skb_dst(skb)->error) { skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; } if (skb_dst(skb)->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); kfree_skb(skb); return -1; } ipv6_hdr(skb)->hop_limit--; goto looped_back; } skb_push(skb, -skb_network_offset(skb)); dst_input(skb); return -1; unknown_rh: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; } static const struct inet6_protocol rthdr_protocol = { .handler = ipv6_rthdr_rcv, .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol destopt_protocol = { .handler = ipv6_destopt_rcv, .flags = INET6_PROTO_NOPOLICY, }; static const struct inet6_protocol nodata_protocol = { .handler = dst_discard, .flags = INET6_PROTO_NOPOLICY, }; int __init ipv6_exthdrs_init(void) { int ret; ret = inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING); if (ret) goto out; ret = inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS); if (ret) goto out_rthdr; ret = inet6_add_protocol(&nodata_protocol, IPPROTO_NONE); if (ret) goto out_destopt; out: return ret; out_destopt: inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); out_rthdr: inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); goto out; }; void ipv6_exthdrs_exit(void) { inet6_del_protocol(&nodata_protocol, IPPROTO_NONE); inet6_del_protocol(&destopt_protocol, IPPROTO_DSTOPTS); inet6_del_protocol(&rthdr_protocol, IPPROTO_ROUTING); } /********************************** Hop-by-hop options. **********************************/ /* Router Alert as of RFC 2711 */ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { IP6CB(skb)->flags |= IP6SKB_ROUTERALERT; memcpy(&IP6CB(skb)->ra, nh + optoff + 2, sizeof(IP6CB(skb)->ra)); return true; } net_dbg_ratelimited("ipv6_hop_ra: wrong RA length %d\n", nh[optoff + 1]); kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } /* IOAM */ static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff) { struct ioam6_trace_hdr *trace; struct ioam6_namespace *ns; struct ioam6_hdr *hdr; /* Bad alignment (must be 4n-aligned) */ if (optoff & 3) goto drop; /* Ignore if IOAM is not enabled on ingress */ if (!READ_ONCE(__in6_dev_get(skb->dev)->cnf.ioam6_enabled)) goto ignore; /* Truncated Option header */ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff); if (hdr->opt_len < 2) goto drop; switch (hdr->type) { case IOAM6_TYPE_PREALLOC: /* Truncated Pre-allocated Trace header */ if (hdr->opt_len < 2 + sizeof(*trace)) goto drop; /* Malformed Pre-allocated Trace header */ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr)); if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4) goto drop; /* Ignore if the IOAM namespace is unknown */ ns = ioam6_namespace(dev_net(skb->dev), trace->namespace_id); if (!ns) goto ignore; if (!skb_valid_dst(skb)) ip6_route_input(skb); /* About to mangle packet header */ if (skb_ensure_writable(skb, optoff + 2 + hdr->opt_len)) goto drop; /* Trace pointer may have changed */ trace = (struct ioam6_trace_hdr *)(skb_network_header(skb) + optoff + sizeof(*hdr)); ioam6_fill_trace_data(skb, ns, trace, true); ioam6_event(IOAM6_EVENT_TRACE, dev_net(skb->dev), GFP_ATOMIC, (void *)trace, hdr->opt_len - 2); break; default: break; } ignore: return true; drop: kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } /* Jumbo payload */ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); SKB_DR(reason); u32 pkt_len; if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { net_dbg_ratelimited("ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); SKB_DR_SET(reason, IP_INHDR); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff + 2, SKB_DROP_REASON_IP_INHDR); return false; } if (ipv6_hdr(skb)->payload_len) { icmpv6_param_prob_reason(skb, ICMPV6_HDR_FIELD, optoff, SKB_DROP_REASON_IP_INHDR); return false; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { SKB_DR_SET(reason, PKT_TOO_SMALL); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM; return true; drop: kfree_skb_reason(skb, reason); return false; } /* CALIPSO RFC 5570 */ static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] < 8) goto drop; if (nh[optoff + 6] * 4 + 8 > nh[optoff + 1]) goto drop; if (!calipso_validate(skb, nh + optoff)) goto drop; return true; drop: kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return false; } int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(skb->dev); int extlen; /* * skb_network_header(skb) is equal to skb->data, and * skb_network_header_len(skb) is always equal to * sizeof(struct ipv6hdr) by definition of * hop-by-hop options. */ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + ((skb_transport_header(skb)[1] + 1) << 3)))) { fail_and_free: kfree_skb(skb); return -1; } extlen = (skb_transport_header(skb)[1] + 1) << 3; if (extlen > net->ipv6.sysctl.max_hbh_opts_len) goto fail_and_free; opt->flags |= IP6SKB_HOPBYHOP; if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) { skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); return 1; } return -1; } /* * Creating outbound headers. * * "build" functions work when skb is filled from head to tail (datagram) * "push" functions work when headers are added from tail to head (tcp) * * In both cases we assume, that caller reserved enough room * for headers. */ static void ipv6_push_rthdr0(struct sk_buff *skb, u8 *proto, struct ipv6_rt_hdr *opt, struct in6_addr **addr_p, struct in6_addr *saddr) { struct rt0_hdr *phdr, *ihdr; int hops; ihdr = (struct rt0_hdr *) opt; phdr = skb_push(skb, (ihdr->rt_hdr.hdrlen + 1) << 3); memcpy(phdr, ihdr, sizeof(struct rt0_hdr)); hops = ihdr->rt_hdr.hdrlen >> 1; if (hops > 1) memcpy(phdr->addr, ihdr->addr + 1, (hops - 1) * sizeof(struct in6_addr)); phdr->addr[hops - 1] = **addr_p; *addr_p = ihdr->addr; phdr->rt_hdr.nexthdr = *proto; *proto = NEXTHDR_ROUTING; } static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, struct ipv6_rt_hdr *opt, struct in6_addr **addr_p, struct in6_addr *saddr) { struct ipv6_sr_hdr *sr_phdr, *sr_ihdr; int plen, hops; sr_ihdr = (struct ipv6_sr_hdr *)opt; plen = (sr_ihdr->hdrlen + 1) << 3; sr_phdr = skb_push(skb, plen); memcpy(sr_phdr, sr_ihdr, sizeof(struct ipv6_sr_hdr)); hops = sr_ihdr->first_segment + 1; memcpy(sr_phdr->segments + 1, sr_ihdr->segments + 1, (hops - 1) * sizeof(struct in6_addr)); sr_phdr->segments[0] = **addr_p; *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; if (sr_ihdr->hdrlen > hops * 2) { int tlvs_offset, tlvs_length; tlvs_offset = (1 + hops * 2) << 3; tlvs_length = (sr_ihdr->hdrlen - hops * 2) << 3; memcpy((char *)sr_phdr + tlvs_offset, (char *)sr_ihdr + tlvs_offset, tlvs_length); } #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(sr_phdr)) { struct net *net = NULL; if (skb->dev) net = dev_net(skb->dev); else if (skb->sk) net = sock_net(skb->sk); WARN_ON(!net); if (net) seg6_push_hmac(net, saddr, sr_phdr); } #endif sr_phdr->nexthdr = *proto; *proto = NEXTHDR_ROUTING; } static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto, struct ipv6_rt_hdr *opt, struct in6_addr **addr_p, struct in6_addr *saddr) { switch (opt->type) { case IPV6_SRCRT_TYPE_0: case IPV6_SRCRT_STRICT: case IPV6_SRCRT_TYPE_2: ipv6_push_rthdr0(skb, proto, opt, addr_p, saddr); break; case IPV6_SRCRT_TYPE_4: ipv6_push_rthdr4(skb, proto, opt, addr_p, saddr); break; default: break; } } static void ipv6_push_exthdr(struct sk_buff *skb, u8 *proto, u8 type, struct ipv6_opt_hdr *opt) { struct ipv6_opt_hdr *h = skb_push(skb, ipv6_optlen(opt)); memcpy(h, opt, ipv6_optlen(opt)); h->nexthdr = *proto; *proto = type; } void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, struct in6_addr **daddr, struct in6_addr *saddr) { if (opt->srcrt) { ipv6_push_rthdr(skb, proto, opt->srcrt, daddr, saddr); /* * IPV6_RTHDRDSTOPTS is ignored * unless IPV6_RTHDR is set (RFC3542). */ if (opt->dst0opt) ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); } if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } void ipv6_push_frag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto) { if (opt->dst1opt) ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst1opt); } EXPORT_SYMBOL(ipv6_push_frag_opts); struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) { struct ipv6_txoptions *opt2; opt2 = sock_kmalloc(sk, opt->tot_len, GFP_ATOMIC); if (opt2) { long dif = (char *)opt2 - (char *)opt; memcpy(opt2, opt, opt->tot_len); if (opt2->hopopt) *((char **)&opt2->hopopt) += dif; if (opt2->dst0opt) *((char **)&opt2->dst0opt) += dif; if (opt2->dst1opt) *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char **)&opt2->srcrt) += dif; refcount_set(&opt2->refcnt, 1); } return opt2; } EXPORT_SYMBOL_GPL(ipv6_dup_options); static void ipv6_renew_option(int renewtype, struct ipv6_opt_hdr **dest, struct ipv6_opt_hdr *old, struct ipv6_opt_hdr *new, int newtype, char **p) { struct ipv6_opt_hdr *src; src = (renewtype == newtype ? new : old); if (!src) return; memcpy(*p, src, ipv6_optlen(src)); *dest = (struct ipv6_opt_hdr *)*p; *p += CMSG_ALIGN(ipv6_optlen(*dest)); } /** * ipv6_renew_options - replace a specific ext hdr with a new one. * * @sk: sock from which to allocate memory * @opt: original options * @newtype: option type to replace in @opt * @newopt: new option of type @newtype to replace (user-mem) * * Returns a new set of options which is a copy of @opt with the * option type @newtype replaced with @newopt. * * @opt may be NULL, in which case a new set of options is returned * containing just @newopt. * * @newopt may be NULL, in which case the specified option type is * not copied into the new set of options. * * The new set of options is allocated from the socket option memory * buffer of @sk. */ struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, int newtype, struct ipv6_opt_hdr *newopt) { int tot_len = 0; char *p; struct ipv6_txoptions *opt2; if (opt) { if (newtype != IPV6_HOPOPTS && opt->hopopt) tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); if (newtype != IPV6_RTHDR && opt->srcrt) tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); if (newtype != IPV6_DSTOPTS && opt->dst1opt) tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); } if (newopt) tot_len += CMSG_ALIGN(ipv6_optlen(newopt)); if (!tot_len) return NULL; tot_len += sizeof(*opt2); opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC); if (!opt2) return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); refcount_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt, (opt ? opt->hopopt : NULL), newopt, newtype, &p); ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt, (opt ? opt->dst0opt : NULL), newopt, newtype, &p); ipv6_renew_option(IPV6_RTHDR, (struct ipv6_opt_hdr **)&opt2->srcrt, (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL), newopt, newtype, &p); ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt, (opt ? opt->dst1opt : NULL), newopt, newtype, &p); opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0); opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); return opt2; } struct ipv6_txoptions *__ipv6_fixup_options(struct ipv6_txoptions *opt_space, struct ipv6_txoptions *opt) { /* * ignore the dest before srcrt unless srcrt is being included. * --yoshfuji */ if (opt->dst0opt && !opt->srcrt) { if (opt_space != opt) { memcpy(opt_space, opt, sizeof(*opt_space)); opt = opt_space; } opt->opt_nflen -= ipv6_optlen(opt->dst0opt); opt->dst0opt = NULL; } return opt; } EXPORT_SYMBOL_GPL(__ipv6_fixup_options); /** * fl6_update_dst - update flowi destination address with info given * by srcrt option, if any. * * @fl6: flowi6 for which daddr is to be updated * @opt: struct ipv6_txoptions in which to look for srcrt opt * @orig: copy of original daddr address if modified * * Returns NULL if no txoptions or no srcrt, otherwise returns orig * and initial value of fl6->daddr set in orig */ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, const struct ipv6_txoptions *opt, struct in6_addr *orig) { if (!opt || !opt->srcrt) return NULL; *orig = fl6->daddr; switch (opt->srcrt->type) { case IPV6_SRCRT_TYPE_0: case IPV6_SRCRT_STRICT: case IPV6_SRCRT_TYPE_2: fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr; break; case IPV6_SRCRT_TYPE_4: { struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; fl6->daddr = srh->segments[srh->segments_left]; break; } default: return NULL; } return orig; } EXPORT_SYMBOL_GPL(fl6_update_dst);
57 57 57 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* include/asm-generic/tlb.h * * Generic TLB shootdown code * * Copyright 2001 Red Hat, Inc. * Based on code from mm/memory.c Copyright Linus Torvalds and others. * * Copyright 2011 Red Hat, Inc., Peter Zijlstra */ #ifndef _ASM_GENERIC__TLB_H #define _ASM_GENERIC__TLB_H #include <linux/mmu_notifier.h> #include <linux/swap.h> #include <linux/hugetlb_inline.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> /* * Blindly accessing user memory from NMI context can be dangerous * if we're in the middle of switching the current user task or switching * the loaded mm. */ #ifndef nmi_uaccess_okay # define nmi_uaccess_okay() true #endif #ifdef CONFIG_MMU /* * Generic MMU-gather implementation. * * The mmu_gather data structure is used by the mm code to implement the * correct and efficient ordering of freeing pages and TLB invalidations. * * This correct ordering is: * * 1) unhook page * 2) TLB invalidate page * 3) free page * * That is, we must never free a page before we have ensured there are no live * translations left to it. Otherwise it might be possible to observe (or * worse, change) the page content after it has been reused. * * The mmu_gather API consists of: * * - tlb_gather_mmu() / tlb_gather_mmu_fullmm() / tlb_finish_mmu() * * start and finish a mmu_gather * * Finish in particular will issue a (final) TLB invalidate and free * all (remaining) queued pages. * * - tlb_start_vma() / tlb_end_vma(); marks the start / end of a VMA * * Defaults to flushing at tlb_end_vma() to reset the range; helps when * there's large holes between the VMAs. * * - tlb_remove_table() * * tlb_remove_table() is the basic primitive to free page-table directories * (__p*_free_tlb()). In it's most primitive form it is an alias for * tlb_remove_page() below, for when page directories are pages and have no * additional constraints. * * See also MMU_GATHER_TABLE_FREE and MMU_GATHER_RCU_TABLE_FREE. * * - tlb_remove_page() / __tlb_remove_page() * - tlb_remove_page_size() / __tlb_remove_page_size() * - __tlb_remove_folio_pages() * * __tlb_remove_page_size() is the basic primitive that queues a page for * freeing. __tlb_remove_page() assumes PAGE_SIZE. Both will return a * boolean indicating if the queue is (now) full and a call to * tlb_flush_mmu() is required. * * tlb_remove_page() and tlb_remove_page_size() imply the call to * tlb_flush_mmu() when required and has no return value. * * __tlb_remove_folio_pages() is similar to __tlb_remove_page(), however, * instead of removing a single page, remove the given number of consecutive * pages that are all part of the same (large) folio: just like calling * __tlb_remove_page() on each page individually. * * - tlb_change_page_size() * * call before __tlb_remove_page*() to set the current page-size; implies a * possible tlb_flush_mmu() call. * * - tlb_flush_mmu() / tlb_flush_mmu_tlbonly() * * tlb_flush_mmu_tlbonly() - does the TLB invalidate (and resets * related state, like the range) * * tlb_flush_mmu() - in addition to the above TLB invalidate, also frees * whatever pages are still batched. * * - mmu_gather::fullmm * * A flag set by tlb_gather_mmu_fullmm() to indicate we're going to free * the entire mm; this allows a number of optimizations. * * - We can ignore tlb_{start,end}_vma(); because we don't * care about ranges. Everything will be shot down. * * - (RISC) architectures that use ASIDs can cycle to a new ASID * and delay the invalidation until ASID space runs out. * * - mmu_gather::need_flush_all * * A flag that can be set by the arch code if it wants to force * flush the entire TLB irrespective of the range. For instance * x86-PAE needs this when changing top-level entries. * * And allows the architecture to provide and implement tlb_flush(): * * tlb_flush() may, in addition to the above mentioned mmu_gather fields, make * use of: * * - mmu_gather::start / mmu_gather::end * * which provides the range that needs to be flushed to cover the pages to * be freed. * * - mmu_gather::freed_tables * * set when we freed page table pages * * - tlb_get_unmap_shift() / tlb_get_unmap_size() * * returns the smallest TLB entry size unmapped in this range. * * If an architecture does not provide tlb_flush() a default implementation * based on flush_tlb_range() will be used, unless MMU_GATHER_NO_RANGE is * specified, in which case we'll default to flush_tlb_mm(). * * Additionally there are a few opt-in features: * * MMU_GATHER_PAGE_SIZE * * This ensures we call tlb_flush() every time tlb_change_page_size() actually * changes the size and provides mmu_gather::page_size to tlb_flush(). * * This might be useful if your architecture has size specific TLB * invalidation instructions. * * MMU_GATHER_TABLE_FREE * * This provides tlb_remove_table(), to be used instead of tlb_remove_page() * for page directores (__p*_free_tlb()). * * Useful if your architecture has non-page page directories. * * When used, an architecture is expected to provide __tlb_remove_table() or * use the generic __tlb_remove_table(), which does the actual freeing of these * pages. * * MMU_GATHER_RCU_TABLE_FREE * * Like MMU_GATHER_TABLE_FREE, and adds semi-RCU semantics to the free (see * comment below). * * Useful if your architecture doesn't use IPIs for remote TLB invalidates * and therefore doesn't naturally serialize with software page-table walkers. * * MMU_GATHER_NO_FLUSH_CACHE * * Indicates the architecture has flush_cache_range() but it needs *NOT* be called * before unmapping a VMA. * * NOTE: strictly speaking we shouldn't have this knob and instead rely on * flush_cache_range() being a NOP, except Sparc64 seems to be * different here. * * MMU_GATHER_MERGE_VMAS * * Indicates the architecture wants to merge ranges over VMAs; typical when * multiple range invalidates are more expensive than a full invalidate. * * MMU_GATHER_NO_RANGE * * Use this if your architecture lacks an efficient flush_tlb_range(). This * option implies MMU_GATHER_MERGE_VMAS above. * * MMU_GATHER_NO_GATHER * * If the option is set the mmu_gather will not track individual pages for * delayed page free anymore. A platform that enables the option needs to * provide its own implementation of the __tlb_remove_page_size() function to * free pages. * * This is useful if your architecture already flushes TLB entries in the * various ptep_get_and_clear() functions. */ #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch { #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE struct rcu_head rcu; #endif unsigned int nr; void *tables[]; }; #define MAX_TABLE_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *)) #ifndef __HAVE_ARCH_TLB_REMOVE_TABLE static inline void __tlb_remove_table(void *table) { struct ptdesc *ptdesc = (struct ptdesc *)table; pagetable_dtor_free(ptdesc); } #endif extern void tlb_remove_table(struct mmu_gather *tlb, void *table); #else /* !CONFIG_MMU_GATHER_TABLE_FREE */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page); /* * Without MMU_GATHER_TABLE_FREE the architecture is assumed to have page based * page directories and we can use the normal page batching to free them. */ static inline void tlb_remove_table(struct mmu_gather *tlb, void *table) { struct page *page = (struct page *)table; pagetable_dtor(page_ptdesc(page)); tlb_remove_page(tlb, page); } #endif /* CONFIG_MMU_GATHER_TABLE_FREE */ #ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE /* * This allows an architecture that does not use the linux page-tables for * hardware to skip the TLBI when freeing page tables. */ #ifndef tlb_needs_table_invalidate #define tlb_needs_table_invalidate() (true) #endif void tlb_remove_table_sync_one(void); #else #ifdef tlb_needs_table_invalidate #error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE #endif static inline void tlb_remove_table_sync_one(void) { } #endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */ #ifndef CONFIG_MMU_GATHER_NO_GATHER /* * If we can't allocate a page to make a big batch of page pointers * to work on, then just handle a few from the on-stack structure. */ #define MMU_GATHER_BUNDLE 8 struct mmu_gather_batch { struct mmu_gather_batch *next; unsigned int nr; unsigned int max; struct encoded_page *encoded_pages[]; }; #define MAX_GATHER_BATCH \ ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *)) /* * Limit the maximum number of mmu_gather batches to reduce a risk of soft * lockups for non-preemptible kernels on huge machines when a lot of memory * is zapped during unmapping. * 10K pages freed at once should be safe even without a preemption point. */ #define MAX_GATHER_BATCH_COUNT (10000UL/MAX_GATHER_BATCH) extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, bool delay_rmap, int page_size); bool __tlb_remove_folio_pages(struct mmu_gather *tlb, struct page *page, unsigned int nr_pages, bool delay_rmap); #ifdef CONFIG_SMP /* * This both sets 'delayed_rmap', and returns true. It would be an inline * function, except we define it before the 'struct mmu_gather'. */ #define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true) extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma); #endif #endif /* * We have a no-op version of the rmap removal that doesn't * delay anything. That is used on S390, which flushes remote * TLBs synchronously, and on UP, which doesn't have any * remote TLBs to flush and is not preemptible due to this * all happening under the page table lock. */ #ifndef tlb_delay_rmap #define tlb_delay_rmap(tlb) (false) static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { } #endif /* * struct mmu_gather is an opaque type used by the mm code for passing around * any data needed by arch specific code for tlb_remove_page. */ struct mmu_gather { struct mm_struct *mm; #ifdef CONFIG_MMU_GATHER_TABLE_FREE struct mmu_table_batch *batch; #endif unsigned long start; unsigned long end; /* * we are in the middle of an operation to clear * a full mm and can make some optimizations */ unsigned int fullmm : 1; /* * we have performed an operation which * requires a complete flush of the tlb */ unsigned int need_flush_all : 1; /* * we have removed page directories */ unsigned int freed_tables : 1; /* * Do we have pending delayed rmap removals? */ unsigned int delayed_rmap : 1; /* * at which levels have we cleared entries? */ unsigned int cleared_ptes : 1; unsigned int cleared_pmds : 1; unsigned int cleared_puds : 1; unsigned int cleared_p4ds : 1; /* * tracks VM_EXEC | VM_HUGETLB in tlb_start_vma */ unsigned int vma_exec : 1; unsigned int vma_huge : 1; unsigned int vma_pfn : 1; unsigned int batch_count; #ifndef CONFIG_MMU_GATHER_NO_GATHER struct mmu_gather_batch *active; struct mmu_gather_batch local; struct page *__pages[MMU_GATHER_BUNDLE]; #ifdef CONFIG_MMU_GATHER_PAGE_SIZE unsigned int page_size; #endif #endif }; void tlb_flush_mmu(struct mmu_gather *tlb); static inline void __tlb_adjust_range(struct mmu_gather *tlb, unsigned long address, unsigned int range_size) { tlb->start = min(tlb->start, address); tlb->end = max(tlb->end, address + range_size); } static inline void __tlb_reset_range(struct mmu_gather *tlb) { if (tlb->fullmm) { tlb->start = tlb->end = ~0; } else { tlb->start = TASK_SIZE; tlb->end = 0; } tlb->freed_tables = 0; tlb->cleared_ptes = 0; tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; /* * Do not reset mmu_gather::vma_* fields here, we do not * call into tlb_start_vma() again to set them if there is an * intermediate flush. */ } #ifdef CONFIG_MMU_GATHER_NO_RANGE #if defined(tlb_flush) #error MMU_GATHER_NO_RANGE relies on default tlb_flush() #endif /* * When an architecture does not have efficient means of range flushing TLBs * there is no point in doing intermediate flushes on tlb_end_vma() to keep the * range small. We equally don't have to worry about page granularity or other * things. * * All we need to do is issue a full flush for any !0 range. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->end) flush_tlb_mm(tlb->mm); } #else /* CONFIG_MMU_GATHER_NO_RANGE */ #ifndef tlb_flush /* * When an architecture does not provide its own tlb_flush() implementation * but does have a reasonably efficient flush_vma_range() implementation * use that. */ static inline void tlb_flush(struct mmu_gather *tlb) { if (tlb->fullmm || tlb->need_flush_all) { flush_tlb_mm(tlb->mm); } else if (tlb->end) { struct vm_area_struct vma = { .vm_mm = tlb->mm, .vm_flags = (tlb->vma_exec ? VM_EXEC : 0) | (tlb->vma_huge ? VM_HUGETLB : 0), }; flush_tlb_range(&vma, tlb->start, tlb->end); } } #endif #endif /* CONFIG_MMU_GATHER_NO_RANGE */ static inline void tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { /* * flush_tlb_range() implementations that look at VM_HUGETLB (tile, * mips-4k) flush only large pages. * * flush_tlb_range() implementations that flush I-TLB also flush D-TLB * (tile, xtensa, arm), so it's ok to just add VM_EXEC to an existing * range. * * We rely on tlb_end_vma() to issue a flush, such that when we reset * these values the batch is empty. */ tlb->vma_huge = is_vm_hugetlb_page(vma); tlb->vma_exec = !!(vma->vm_flags & VM_EXEC); tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)); } static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) { /* * Anything calling __tlb_adjust_range() also sets at least one of * these bits. */ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || tlb->cleared_puds || tlb->cleared_p4ds)) return; tlb_flush(tlb); __tlb_reset_range(tlb); } static inline void tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) { if (__tlb_remove_page_size(tlb, page, false, page_size)) tlb_flush_mmu(tlb); } static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool delay_rmap) { return __tlb_remove_page_size(tlb, page, delay_rmap, PAGE_SIZE); } /* tlb_remove_page * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when * required. */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { return tlb_remove_page_size(tlb, page, PAGE_SIZE); } static inline void tlb_remove_ptdesc(struct mmu_gather *tlb, void *pt) { tlb_remove_table(tlb, pt); } /* Like tlb_remove_ptdesc, but for page-like page directories. */ static inline void tlb_remove_page_ptdesc(struct mmu_gather *tlb, struct ptdesc *pt) { tlb_remove_page(tlb, ptdesc_page(pt)); } static inline void tlb_change_page_size(struct mmu_gather *tlb, unsigned int page_size) { #ifdef CONFIG_MMU_GATHER_PAGE_SIZE if (tlb->page_size && tlb->page_size != page_size) { if (!tlb->fullmm && !tlb->need_flush_all) tlb_flush_mmu(tlb); } tlb->page_size = page_size; #endif } static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb) { if (tlb->cleared_ptes) return PAGE_SHIFT; if (tlb->cleared_pmds) return PMD_SHIFT; if (tlb->cleared_puds) return PUD_SHIFT; if (tlb->cleared_p4ds) return P4D_SHIFT; return PAGE_SHIFT; } static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb) { return 1UL << tlb_get_unmap_shift(tlb); } /* * In the case of tlb vma handling, we can optimise these away in the * case where we're doing a full MM flush. When we're doing a munmap, * the vmas are adjusted to only cover the region to be torn down. */ static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; tlb_update_vma_flags(tlb, vma); #ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE flush_cache_range(vma, vma->vm_start, vma->vm_end); #endif } static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { if (tlb->fullmm) return; /* * VM_PFNMAP is more fragile because the core mm will not track the * page mapcount -- there might not be page-frames for these PFNs after * all. Force flush TLBs for such ranges to avoid munmap() vs * unmap_mapping_range() races. */ if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) { /* * Do a TLB flush and reset the range at VMA boundaries; this avoids * the ranges growing with the unused space between consecutive VMAs. */ tlb_flush_mmu_tlbonly(tlb); } } /* * tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end, * and set corresponding cleared_*. */ static inline void tlb_flush_pte_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_ptes = 1; } static inline void tlb_flush_pmd_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_pmds = 1; } static inline void tlb_flush_pud_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_puds = 1; } static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, unsigned long address, unsigned long size) { __tlb_adjust_range(tlb, address, size); tlb->cleared_p4ds = 1; } #ifndef __tlb_remove_tlb_entry static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address) { } #endif /** * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation. * * Record the fact that pte's were really unmapped by updating the range, * so we can later optimise away the tlb invalidate. This helps when * userspace is unmapping already-unmapped pages, which happens quite a lot. */ #define tlb_remove_tlb_entry(tlb, ptep, address) \ do { \ tlb_flush_pte_range(tlb, address, PAGE_SIZE); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_tlb_entries - remember unmapping of multiple consecutive ptes for * later tlb invalidation. * * Similar to tlb_remove_tlb_entry(), but remember unmapping of multiple * consecutive ptes instead of only a single one. */ static inline void tlb_remove_tlb_entries(struct mmu_gather *tlb, pte_t *ptep, unsigned int nr, unsigned long address) { tlb_flush_pte_range(tlb, address, PAGE_SIZE * nr); for (;;) { __tlb_remove_tlb_entry(tlb, ptep, address); if (--nr == 0) break; ptep++; address += PAGE_SIZE; } } #define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \ do { \ unsigned long _sz = huge_page_size(h); \ if (_sz >= P4D_SIZE) \ tlb_flush_p4d_range(tlb, address, _sz); \ else if (_sz >= PUD_SIZE) \ tlb_flush_pud_range(tlb, address, _sz); \ else if (_sz >= PMD_SIZE) \ tlb_flush_pmd_range(tlb, address, _sz); \ else \ tlb_flush_pte_range(tlb, address, _sz); \ __tlb_remove_tlb_entry(tlb, ptep, address); \ } while (0) /** * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation * This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pmd_tlb_entry #define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0) #endif #define tlb_remove_pmd_tlb_entry(tlb, pmdp, address) \ do { \ tlb_flush_pmd_range(tlb, address, HPAGE_PMD_SIZE); \ __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) /** * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb * invalidation. This is a nop so far, because only x86 needs it. */ #ifndef __tlb_remove_pud_tlb_entry #define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) #endif #define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ do { \ tlb_flush_pud_range(tlb, address, HPAGE_PUD_SIZE); \ __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ } while (0) /* * For things like page tables caches (ie caching addresses "inside" the * page tables, like x86 does), for legacy reasons, flushing an * individual page had better flush the page table caches behind it. This * is definitely how x86 works, for example. And if you have an * architected non-legacy page table cache (which I'm not aware of * anybody actually doing), you're going to have some architecturally * explicit flushing for that, likely *separate* from a regular TLB entry * flush, and thus you'd need more than just some range expansion.. * * So if we ever find an architecture * that would want something that odd, I think it is up to that * architecture to do its own odd thing, not cause pain for others * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com * * For now w.r.t page table cache, mark the range_size as PAGE_SIZE */ #ifndef pte_free_tlb #define pte_free_tlb(tlb, ptep, address) \ do { \ tlb_flush_pmd_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pte_free_tlb(tlb, ptep, address); \ } while (0) #endif #ifndef pmd_free_tlb #define pmd_free_tlb(tlb, pmdp, address) \ do { \ tlb_flush_pud_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pmd_free_tlb(tlb, pmdp, address); \ } while (0) #endif #ifndef pud_free_tlb #define pud_free_tlb(tlb, pudp, address) \ do { \ tlb_flush_p4d_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __pud_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef p4d_free_tlb #define p4d_free_tlb(tlb, pudp, address) \ do { \ __tlb_adjust_range(tlb, address, PAGE_SIZE); \ tlb->freed_tables = 1; \ __p4d_free_tlb(tlb, pudp, address); \ } while (0) #endif #ifndef pte_needs_flush static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) { return true; } #endif #ifndef huge_pmd_needs_flush static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) { return true; } #endif #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */
2195 2195 492 463 77 1999 428 429 56 2374 348 2374 55 1496 1489 727 18 18 3 3 3 3 3 74 292 291 74 18 18 56 244 244 56 47 1188 808 890 55 56 56 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_NOTIFY_H #define _LINUX_FS_NOTIFY_H /* * include/linux/fsnotify.h - generic hooks for filesystem notification, to * reduce in-source duplication from both dnotify and inotify. * * We don't compile any of this away in some complicated menagerie of ifdefs. * Instead, we rely on the code inside to optimize away as needed. * * (C) Copyright 2005 Robert Love */ #include <linux/fsnotify_backend.h> #include <linux/audit.h> #include <linux/slab.h> #include <linux/bug.h> /* Are there any inode/mount/sb objects watched with priority prio or above? */ static inline bool fsnotify_sb_has_priority_watchers(struct super_block *sb, int prio) { struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); /* Were any marks ever added to any object on this sb? */ if (!sbinfo) return false; return atomic_long_read(&sbinfo->watched_objects[prio]); } /* Are there any inode/mount/sb objects that are being watched at all? */ static inline bool fsnotify_sb_has_watchers(struct super_block *sb) { return fsnotify_sb_has_priority_watchers(sb, 0); } /* * Notify this @dir inode about a change in a child directory entry. * The directory entry may have turned positive or negative or its inode may * have changed (i.e. renamed over). * * Unlike fsnotify_parent(), the event will be reported regardless of the * FS_EVENT_ON_CHILD mask on the parent inode and will not be reported if only * the child is interested and not the parent. */ static inline int fsnotify_name(__u32 mask, const void *data, int data_type, struct inode *dir, const struct qstr *name, u32 cookie) { if (!fsnotify_sb_has_watchers(dir->i_sb)) return 0; return fsnotify(mask, data, data_type, dir, name, NULL, cookie); } static inline void fsnotify_dirent(struct inode *dir, struct dentry *dentry, __u32 mask) { fsnotify_name(mask, dentry, FSNOTIFY_EVENT_DENTRY, dir, &dentry->d_name, 0); } static inline void fsnotify_inode(struct inode *inode, __u32 mask) { if (!fsnotify_sb_has_watchers(inode->i_sb)) return; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; fsnotify(mask, inode, FSNOTIFY_EVENT_INODE, NULL, NULL, inode, 0); } /* Notify this dentry's parent about a child's events. */ static inline int fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data, int data_type) { struct inode *inode = d_inode(dentry); if (!fsnotify_sb_has_watchers(inode->i_sb)) return 0; if (S_ISDIR(inode->i_mode)) { mask |= FS_ISDIR; /* sb/mount marks are not interested in name of directory */ if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) goto notify_child; } /* disconnected dentry cannot notify parent */ if (IS_ROOT(dentry)) goto notify_child; return __fsnotify_parent(dentry, mask, data, data_type); notify_child: return fsnotify(mask, data, data_type, NULL, NULL, inode, 0); } /* * Simple wrappers to consolidate calls to fsnotify_parent() when an event * is on a file/dentry. */ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) { fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } static inline int fsnotify_path(const struct path *path, __u32 mask) { return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } static inline int fsnotify_file(struct file *file, __u32 mask) { /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ if (FMODE_FSNOTIFY_NONE(file->f_mode)) return 0; return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS void file_set_fsnotify_mode_from_watchers(struct file *file); /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection * must not be held. */ lockdep_assert_once(file_write_not_started(file)); if (!(perm_mask & (MAY_READ | MAY_WRITE | MAY_ACCESS))) return 0; if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) return 0; /* * read()/write() and other types of access generate pre-content events. */ if (unlikely(FMODE_FSNOTIFY_HSM(file->f_mode))) { int ret = fsnotify_pre_content(&file->f_path, ppos, count); if (ret) return ret; } if (!(perm_mask & MAY_READ)) return 0; /* * read() also generates the legacy FS_ACCESS_PERM event, so content * scanners can inspect the content filled by pre-content event. */ return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } /* * fsnotify_truncate_perm - permission hook before file truncate */ static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { struct inode *inode = d_inode(path->dentry); if (!(inode->i_sb->s_iflags & SB_I_ALLOW_HSM) || !fsnotify_sb_has_priority_watchers(inode->i_sb, FSNOTIFY_PRIO_PRE_CONTENT)) return 0; return fsnotify_pre_content(path, &length, 0); } /* * fsnotify_file_perm - permission hook before file access (unknown range) */ static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return fsnotify_file_area_perm(file, perm_mask, NULL, 0); } /* * fsnotify_open_perm - permission hook before file open */ static inline int fsnotify_open_perm(struct file *file) { int ret; if (likely(!FMODE_FSNOTIFY_PERM(file->f_mode))) return 0; if (file->f_flags & __FMODE_EXEC) { ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } return fsnotify_path(&file->f_path, FS_OPEN_PERM); } #else static inline void file_set_fsnotify_mode_from_watchers(struct file *file) { } static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { return 0; } static inline int fsnotify_truncate_perm(const struct path *path, loff_t length) { return 0; } static inline int fsnotify_file_perm(struct file *file, int perm_mask) { return 0; } static inline int fsnotify_open_perm(struct file *file) { return 0; } #endif /* * fsnotify_link_count - inode's link count changed */ static inline void fsnotify_link_count(struct inode *inode) { fsnotify_inode(inode, FS_ATTRIB); } /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, const struct qstr *old_name, int isdir, struct inode *target, struct dentry *moved) { struct inode *source = moved->d_inode; u32 fs_cookie = fsnotify_get_cookie(); __u32 old_dir_mask = FS_MOVED_FROM; __u32 new_dir_mask = FS_MOVED_TO; __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; if (isdir) { old_dir_mask |= FS_ISDIR; new_dir_mask |= FS_ISDIR; rename_mask |= FS_ISDIR; } /* Event with information about both old and new parent+name */ fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, old_dir, old_name, 0); fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_dir, old_name, fs_cookie); fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, new_dir, new_name, fs_cookie); if (target) fsnotify_link_count(target); fsnotify_inode(source, FS_MOVE_SELF); audit_inode_child(new_dir, moved, AUDIT_TYPE_CHILD_CREATE); } /* * fsnotify_inode_delete - and inode is being evicted from cache, clean up is needed */ static inline void fsnotify_inode_delete(struct inode *inode) { __fsnotify_inode_delete(inode); } /* * fsnotify_vfsmount_delete - a vfsmount is being destroyed, clean up is needed */ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) { __fsnotify_vfsmount_delete(mnt); } /* * fsnotify_inoderemove - an inode is going away */ static inline void fsnotify_inoderemove(struct inode *inode) { fsnotify_inode(inode, FS_DELETE_SELF); __fsnotify_inode_delete(inode); } /* * fsnotify_create - 'name' was linked in * * Caller must make sure that dentry->d_name is stable. * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate * ->d_inode later */ static inline void fsnotify_create(struct inode *dir, struct dentry *dentry) { audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_dirent(dir, dentry, FS_CREATE); } /* * fsnotify_link - new hardlink in 'inode' directory * * Caller must make sure that new_dentry->d_name is stable. * Note: We have to pass also the linked inode ptr as some filesystems leave * new_dentry->d_inode NULL and instantiate inode pointer later */ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct dentry *new_dentry) { fsnotify_link_count(inode); audit_inode_child(dir, new_dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_name(FS_CREATE, inode, FSNOTIFY_EVENT_INODE, dir, &new_dentry->d_name, 0); } /* * fsnotify_delete - @dentry was unlinked and unhashed * * Caller must make sure that dentry->d_name is stable. * * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode * as this may be called after d_delete() and old_dentry may be negative. */ static inline void fsnotify_delete(struct inode *dir, struct inode *inode, struct dentry *dentry) { __u32 mask = FS_DELETE; if (S_ISDIR(inode->i_mode)) mask |= FS_ISDIR; fsnotify_name(mask, inode, FSNOTIFY_EVENT_INODE, dir, &dentry->d_name, 0); } /** * d_delete_notify - delete a dentry and call fsnotify_delete() * @dentry: The dentry to delete * * This helper is used to guaranty that the unlinked inode cannot be found * by lookup of this name after fsnotify_delete() event has been delivered. */ static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); ihold(inode); d_delete(dentry); fsnotify_delete(dir, inode, dentry); iput(inode); } /* * fsnotify_unlink - 'name' was unlinked * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { if (WARN_ON_ONCE(d_is_negative(dentry))) return; fsnotify_delete(dir, d_inode(dentry), dentry); } /* * fsnotify_mkdir - directory 'name' was created * * Caller must make sure that dentry->d_name is stable. * Note: some filesystems (e.g. kernfs) leave @dentry negative and instantiate * ->d_inode later */ static inline void fsnotify_mkdir(struct inode *dir, struct dentry *dentry) { audit_inode_child(dir, dentry, AUDIT_TYPE_CHILD_CREATE); fsnotify_dirent(dir, dentry, FS_CREATE | FS_ISDIR); } /* * fsnotify_rmdir - directory 'name' was removed * * Caller must make sure that dentry->d_name is stable. */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { if (WARN_ON_ONCE(d_is_negative(dentry))) return; fsnotify_delete(dir, d_inode(dentry), dentry); } /* * fsnotify_access - file was read */ static inline void fsnotify_access(struct file *file) { fsnotify_file(file, FS_ACCESS); } /* * fsnotify_modify - file was modified */ static inline void fsnotify_modify(struct file *file) { fsnotify_file(file, FS_MODIFY); } /* * fsnotify_open - file was opened */ static inline void fsnotify_open(struct file *file) { __u32 mask = FS_OPEN; if (file->f_flags & __FMODE_EXEC) mask |= FS_OPEN_EXEC; fsnotify_file(file, mask); } /* * fsnotify_close - file was closed */ static inline void fsnotify_close(struct file *file) { __u32 mask = (file->f_mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; fsnotify_file(file, mask); } /* * fsnotify_xattr - extended attributes were changed */ static inline void fsnotify_xattr(struct dentry *dentry) { fsnotify_dentry(dentry, FS_ATTRIB); } /* * fsnotify_change - notify_change event. file was modified and/or metadata * was changed. */ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { __u32 mask = 0; if (ia_valid & ATTR_UID) mask |= FS_ATTRIB; if (ia_valid & ATTR_GID) mask |= FS_ATTRIB; if (ia_valid & ATTR_SIZE) mask |= FS_MODIFY; /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) mask |= FS_ATTRIB; else if (ia_valid & ATTR_ATIME) mask |= FS_ACCESS; else if (ia_valid & ATTR_MTIME) mask |= FS_MODIFY; if (ia_valid & ATTR_MODE) mask |= FS_ATTRIB; if (mask) fsnotify_dentry(dentry, mask); } static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, int error) { struct fs_error_report report = { .error = error, .inode = inode, .sb = sb, }; return fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL, NULL, 0); } #endif /* _LINUX_FS_NOTIFY_H */
92 92 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 // SPDX-License-Identifier: GPL-2.0-only /* * fs/kernfs/mount.c - kernfs mount implementation * * Copyright (c) 2001-3 Patrick Mochel * Copyright (c) 2007 SUSE Linux Products GmbH * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org> */ #include <linux/fs.h> #include <linux/mount.h> #include <linux/init.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/exportfs.h> #include <linux/uuid.h> #include <linux/statfs.h> #include "kernfs-internal.h" struct kmem_cache *kernfs_node_cache __ro_after_init; struct kmem_cache *kernfs_iattrs_cache __ro_after_init; struct kernfs_global_locks *kernfs_locks __ro_after_init; static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) { struct kernfs_root *root = kernfs_root(kernfs_dentry_node(dentry)); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_options) return scops->show_options(sf, root); return 0; } static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry) { struct kernfs_node *node = kernfs_dentry_node(dentry); struct kernfs_root *root = kernfs_root(node); struct kernfs_syscall_ops *scops = root->syscall_ops; if (scops && scops->show_path) return scops->show_path(sf, node, root); seq_dentry(sf, dentry, " \t\n\\"); return 0; } static int kernfs_statfs(struct dentry *dentry, struct kstatfs *buf) { simple_statfs(dentry, buf); buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); return 0; } const struct super_operations kernfs_sops = { .statfs = kernfs_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, .show_options = kernfs_sop_show_options, .show_path = kernfs_sop_show_path, }; static int kernfs_encode_fh(struct inode *inode, __u32 *fh, int *max_len, struct inode *parent) { struct kernfs_node *kn = inode->i_private; if (*max_len < 2) { *max_len = 2; return FILEID_INVALID; } *max_len = 2; *(u64 *)fh = kn->id; return FILEID_KERNFS; } static struct dentry *__kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type, bool get_parent) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_node *kn; struct inode *inode; u64 id; if (fh_len < 2) return NULL; switch (fh_type) { case FILEID_KERNFS: id = *(u64 *)fid; break; case FILEID_INO32_GEN: case FILEID_INO32_GEN_PARENT: /* * blk_log_action() exposes "LOW32,HIGH32" pair without * type and userland can call us with generic fid * constructed from them. Combine it back to ID. See * blk_log_action(). */ id = ((u64)fid->i32.gen << 32) | fid->i32.ino; break; default: return NULL; } kn = kernfs_find_and_get_node_by_id(info->root, id); if (!kn) return ERR_PTR(-ESTALE); if (get_parent) { struct kernfs_node *parent; parent = kernfs_get_parent(kn); kernfs_put(kn); kn = parent; if (!kn) return ERR_PTR(-ESTALE); } inode = kernfs_get_inode(sb, kn); kernfs_put(kn); return d_obtain_alias(inode); } static struct dentry *kernfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, false); } static struct dentry *kernfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { return __kernfs_fh_to_dentry(sb, fid, fh_len, fh_type, true); } static struct dentry *kernfs_get_parent_dentry(struct dentry *child) { struct kernfs_node *kn = kernfs_dentry_node(child); return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent)); } static const struct export_operations kernfs_export_ops = { .encode_fh = kernfs_encode_fh, .fh_to_dentry = kernfs_fh_to_dentry, .fh_to_parent = kernfs_fh_to_parent, .get_parent = kernfs_get_parent_dentry, }; /** * kernfs_root_from_sb - determine kernfs_root associated with a super_block * @sb: the super_block in question * * Return: the kernfs_root associated with @sb. If @sb is not a kernfs one, * %NULL is returned. */ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) { if (sb->s_op == &kernfs_sops) return kernfs_info(sb)->root; return NULL; } /* * find the next ancestor in the path down to @child, where @parent was the * ancestor whose descendant we want to find. * * Say the path is /a/b/c/d. @child is d, @parent is %NULL. We return the root * node. If @parent is b, then we return the node for c. * Passing in d as @parent is not ok. */ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child, struct kernfs_node *parent) { if (child == parent) { pr_crit_once("BUG in find_next_ancestor: called with parent == child"); return NULL; } while (child->parent != parent) { if (!child->parent) return NULL; child = child->parent; } return child; } /** * kernfs_node_dentry - get a dentry for the given kernfs_node * @kn: kernfs_node for which a dentry is needed * @sb: the kernfs super_block * * Return: the dentry pointer */ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, struct super_block *sb) { struct dentry *dentry; struct kernfs_node *knparent; BUG_ON(sb->s_op != &kernfs_sops); dentry = dget(sb->s_root); /* Check if this is the root kernfs_node */ if (!kn->parent) return dentry; knparent = find_next_ancestor(kn, NULL); if (WARN_ON(!knparent)) { dput(dentry); return ERR_PTR(-EINVAL); } do { struct dentry *dtmp; struct kernfs_node *kntmp; if (kn == knparent) return dentry; kntmp = find_next_ancestor(kn, knparent); if (WARN_ON(!kntmp)) { dput(dentry); return ERR_PTR(-EINVAL); } dtmp = lookup_positive_unlocked(kntmp->name, dentry, strlen(kntmp->name)); dput(dentry); if (IS_ERR(dtmp)) return dtmp; knparent = kntmp; dentry = dtmp; } while (true); } static int kernfs_fill_super(struct super_block *sb, struct kernfs_fs_context *kfc) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *kf_root = kfc->root; struct inode *inode; struct dentry *root; info->sb = sb; /* Userspace would break if executables or devices appear on sysfs */ sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = kfc->magic; sb->s_op = &kernfs_sops; sb->s_xattr = kernfs_xattr_handlers; if (info->root->flags & KERNFS_ROOT_SUPPORT_EXPORTOP) sb->s_export_op = &kernfs_export_ops; sb->s_time_gran = 1; /* sysfs dentries and inodes don't require IO to create */ sb->s_shrink->seeks = 0; /* get root inode, initialize and unlock it */ down_read(&kf_root->kernfs_rwsem); inode = kernfs_get_inode(sb, info->root->kn); up_read(&kf_root->kernfs_rwsem); if (!inode) { pr_debug("kernfs: could not get root inode\n"); return -ENOMEM; } /* instantiate and link root dentry */ root = d_make_root(inode); if (!root) { pr_debug("%s: could not get root dentry!\n", __func__); return -ENOMEM; } sb->s_root = root; sb->s_d_op = &kernfs_dops; return 0; } static int kernfs_test_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_super_info *sb_info = kernfs_info(sb); struct kernfs_super_info *info = fc->s_fs_info; return sb_info->root == info->root && sb_info->ns == info->ns; } static int kernfs_set_super(struct super_block *sb, struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; kfc->ns_tag = NULL; return set_anon_super_fc(sb, fc); } /** * kernfs_super_ns - determine the namespace tag of a kernfs super_block * @sb: super_block of interest * * Return: the namespace tag associated with kernfs super_block @sb. */ const void *kernfs_super_ns(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); return info->ns; } /** * kernfs_get_tree - kernfs filesystem access/retrieval helper * @fc: The filesystem context. * * This is to be called from each kernfs user's fs_context->ops->get_tree() * implementation, which should set the specified ->@fs_type and ->@flags, and * specify the hierarchy and namespace tag to mount via ->@root and ->@ns, * respectively. * * Return: %0 on success, -errno on failure. */ int kernfs_get_tree(struct fs_context *fc) { struct kernfs_fs_context *kfc = fc->fs_private; struct super_block *sb; struct kernfs_super_info *info; int error; info = kzalloc(sizeof(*info), GFP_KERNEL); if (!info) return -ENOMEM; info->root = kfc->root; info->ns = kfc->ns_tag; INIT_LIST_HEAD(&info->node); fc->s_fs_info = info; sb = sget_fc(fc, kernfs_test_super, kernfs_set_super); if (IS_ERR(sb)) return PTR_ERR(sb); if (!sb->s_root) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = kfc->root; kfc->new_sb_created = true; error = kernfs_fill_super(sb, kfc); if (error) { deactivate_locked_super(sb); return error; } sb->s_flags |= SB_ACTIVE; uuid_t uuid; uuid_gen(&uuid); super_set_uuid(sb, uuid.b, sizeof(uuid)); down_write(&root->kernfs_supers_rwsem); list_add(&info->node, &info->root->supers); up_write(&root->kernfs_supers_rwsem); } fc->root = dget(sb->s_root); return 0; } void kernfs_free_fs_context(struct fs_context *fc) { /* Note that we don't deal with kfc->ns_tag here. */ kfree(fc->s_fs_info); fc->s_fs_info = NULL; } /** * kernfs_kill_sb - kill_sb for kernfs * @sb: super_block being killed * * This can be used directly for file_system_type->kill_sb(). If a kernfs * user needs extra cleanup, it can implement its own kill_sb() and call * this function at the end. */ void kernfs_kill_sb(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); struct kernfs_root *root = info->root; down_write(&root->kernfs_supers_rwsem); list_del(&info->node); up_write(&root->kernfs_supers_rwsem); /* * Remove the superblock from fs_supers/s_instances * so we can't find it, before freeing kernfs_super_info. */ kill_anon_super(sb); kfree(info); } static void __init kernfs_mutex_init(void) { int count; for (count = 0; count < NR_KERNFS_LOCKS; count++) mutex_init(&kernfs_locks->open_file_mutex[count]); } static void __init kernfs_lock_init(void) { kernfs_locks = kmalloc(sizeof(struct kernfs_global_locks), GFP_KERNEL); WARN_ON(!kernfs_locks); kernfs_mutex_init(); } void __init kernfs_init(void) { kernfs_node_cache = kmem_cache_create("kernfs_node_cache", sizeof(struct kernfs_node), 0, SLAB_PANIC, NULL); /* Creates slab cache for kernfs inode attributes */ kernfs_iattrs_cache = kmem_cache_create("kernfs_iattrs_cache", sizeof(struct kernfs_iattrs), 0, SLAB_PANIC, NULL); kernfs_lock_init(); }
33 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 // SPDX-License-Identifier: GPL-2.0-only /* * Linux VM pressure * * Copyright 2012 Linaro Ltd. * Anton Vorontsov <anton.vorontsov@linaro.org> * * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro, * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg. */ #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/log2.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/vmstat.h> #include <linux/eventfd.h> #include <linux/slab.h> #include <linux/swap.h> #include <linux/printk.h> #include <linux/vmpressure.h> /* * The window size (vmpressure_win) is the number of scanned pages before * we try to analyze scanned/reclaimed ratio. So the window is used as a * rate-limit tunable for the "low" level notification, and also for * averaging the ratio for medium/critical levels. Using small window * sizes can cause lot of false positives, but too big window size will * delay the notifications. * * As the vmscan reclaimer logic works with chunks which are multiple of * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well. * * TODO: Make the window size depend on machine size, as we do for vmstat * thresholds. Currently we set it to 512 pages (2MB for 4KB pages). */ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; /* * These thresholds are used when we account memory pressure through * scanned/reclaimed ratio. The current values were chosen empirically. In * essence, they are percents: the higher the value, the more number * unsuccessful reclaims there were. */ static const unsigned int vmpressure_level_med = 60; static const unsigned int vmpressure_level_critical = 95; /* * When there are too little pages left to scan, vmpressure() may miss the * critical pressure as number of pages will be less than "window size". * However, in that case the vmscan priority will raise fast as the * reclaimer will try to scan LRUs more deeply. * * The vmscan logic considers these special priorities: * * prio == DEF_PRIORITY (12): reclaimer starts with that value * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed * prio == 0 : close to OOM, kernel scans every page in an lru * * Any value in this range is acceptable for this tunable (i.e. from 12 to * 0). Current value for the vmpressure_level_critical_prio is chosen * empirically, but the number, in essence, means that we consider * critical level when scanning depth is ~10% of the lru size (vmscan * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one * eights). */ static const unsigned int vmpressure_level_critical_prio = ilog2(100 / 10); static struct vmpressure *work_to_vmpressure(struct work_struct *work) { return container_of(work, struct vmpressure, work); } static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) { struct mem_cgroup *memcg = vmpressure_to_memcg(vmpr); memcg = parent_mem_cgroup(memcg); if (!memcg) return NULL; return memcg_to_vmpressure(memcg); } enum vmpressure_levels { VMPRESSURE_LOW = 0, VMPRESSURE_MEDIUM, VMPRESSURE_CRITICAL, VMPRESSURE_NUM_LEVELS, }; enum vmpressure_modes { VMPRESSURE_NO_PASSTHROUGH = 0, VMPRESSURE_HIERARCHY, VMPRESSURE_LOCAL, VMPRESSURE_NUM_MODES, }; static const char * const vmpressure_str_levels[] = { [VMPRESSURE_LOW] = "low", [VMPRESSURE_MEDIUM] = "medium", [VMPRESSURE_CRITICAL] = "critical", }; static const char * const vmpressure_str_modes[] = { [VMPRESSURE_NO_PASSTHROUGH] = "default", [VMPRESSURE_HIERARCHY] = "hierarchy", [VMPRESSURE_LOCAL] = "local", }; static enum vmpressure_levels vmpressure_level(unsigned long pressure) { if (pressure >= vmpressure_level_critical) return VMPRESSURE_CRITICAL; else if (pressure >= vmpressure_level_med) return VMPRESSURE_MEDIUM; return VMPRESSURE_LOW; } static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, unsigned long reclaimed) { unsigned long scale = scanned + reclaimed; unsigned long pressure = 0; /* * reclaimed can be greater than scanned for things such as reclaimed * slab pages. shrink_node() just adds reclaimed pages without a * related increment to scanned pages. */ if (reclaimed >= scanned) goto out; /* * We calculate the ratio (in percents) of how many pages were * scanned vs. reclaimed in a given time frame (window). Note that * time is in VM reclaimer's "ticks", i.e. number of pages * scanned. This makes it possible to set desired reaction time * and serves as a ratelimit. */ pressure = scale - (reclaimed * scale / scanned); pressure = pressure * 100 / scale; out: pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, scanned, reclaimed); return vmpressure_level(pressure); } struct vmpressure_event { struct eventfd_ctx *efd; enum vmpressure_levels level; enum vmpressure_modes mode; struct list_head node; }; static bool vmpressure_event(struct vmpressure *vmpr, const enum vmpressure_levels level, bool ancestor, bool signalled) { struct vmpressure_event *ev; bool ret = false; mutex_lock(&vmpr->events_lock); list_for_each_entry(ev, &vmpr->events, node) { if (ancestor && ev->mode == VMPRESSURE_LOCAL) continue; if (signalled && ev->mode == VMPRESSURE_NO_PASSTHROUGH) continue; if (level < ev->level) continue; eventfd_signal(ev->efd); ret = true; } mutex_unlock(&vmpr->events_lock); return ret; } static void vmpressure_work_fn(struct work_struct *work) { struct vmpressure *vmpr = work_to_vmpressure(work); unsigned long scanned; unsigned long reclaimed; enum vmpressure_levels level; bool ancestor = false; bool signalled = false; spin_lock(&vmpr->sr_lock); /* * Several contexts might be calling vmpressure(), so it is * possible that the work was rescheduled again before the old * work context cleared the counters. In that case we will run * just after the old work returns, but then scanned might be zero * here. No need for any locks here since we don't care if * vmpr->reclaimed is in sync. */ scanned = vmpr->tree_scanned; if (!scanned) { spin_unlock(&vmpr->sr_lock); return; } reclaimed = vmpr->tree_reclaimed; vmpr->tree_scanned = 0; vmpr->tree_reclaimed = 0; spin_unlock(&vmpr->sr_lock); level = vmpressure_calc_level(scanned, reclaimed); do { if (vmpressure_event(vmpr, level, ancestor, signalled)) signalled = true; ancestor = true; } while ((vmpr = vmpressure_parent(vmpr))); } /** * vmpressure() - Account memory pressure through scanned/reclaimed ratio * @gfp: reclaimer's gfp mask * @memcg: cgroup memory controller handle * @tree: legacy subtree mode * @scanned: number of pages scanned * @reclaimed: number of pages reclaimed * * This function should be called from the vmscan reclaim path to account * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw * pressure index is then further refined and averaged over time. * * If @tree is set, vmpressure is in traditional userspace reporting * mode: @memcg is considered the pressure root and userspace is * notified of the entire subtree's reclaim efficiency. * * If @tree is not set, reclaim efficiency is recorded for @memcg, and * only in-kernel users are notified. * * This function does not return any value. */ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsigned long scanned, unsigned long reclaimed) { struct vmpressure *vmpr; if (mem_cgroup_disabled()) return; /* * The in-kernel users only care about the reclaim efficiency * for this @memcg rather than the whole subtree, and there * isn't and won't be any in-kernel user in a legacy cgroup. */ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !tree) return; vmpr = memcg_to_vmpressure(memcg); /* * Here we only want to account pressure that userland is able to * help us with. For example, suppose that DMA zone is under * pressure; if we notify userland about that kind of pressure, * then it will be mostly a waste as it will trigger unnecessary * freeing of memory by userland (since userland is more likely to * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That * is why we include only movable, highmem and FS/IO pages. * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so * we account it too. */ if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS))) return; /* * If we got here with no pages scanned, then that is an indicator * that reclaimer was unable to find any shrinkable LRUs at the * current scanning depth. But it does not mean that we should * report the critical pressure, yet. If the scanning priority * (scanning depth) goes too high (deep), we will be notified * through vmpressure_prio(). But so far, keep calm. */ if (!scanned) return; if (tree) { spin_lock(&vmpr->sr_lock); scanned = vmpr->tree_scanned += scanned; vmpr->tree_reclaimed += reclaimed; spin_unlock(&vmpr->sr_lock); if (scanned < vmpressure_win) return; schedule_work(&vmpr->work); } else { enum vmpressure_levels level; /* For now, no users for root-level efficiency */ if (!memcg || mem_cgroup_is_root(memcg)) return; spin_lock(&vmpr->sr_lock); scanned = vmpr->scanned += scanned; reclaimed = vmpr->reclaimed += reclaimed; if (scanned < vmpressure_win) { spin_unlock(&vmpr->sr_lock); return; } vmpr->scanned = vmpr->reclaimed = 0; spin_unlock(&vmpr->sr_lock); level = vmpressure_calc_level(scanned, reclaimed); if (level > VMPRESSURE_LOW) { /* * Let the socket buffer allocator know that * we are having trouble reclaiming LRU pages. * * For hysteresis keep the pressure state * asserted for a second in which subsequent * pressure events can occur. */ WRITE_ONCE(memcg->socket_pressure, jiffies + HZ); } } } /** * vmpressure_prio() - Account memory pressure through reclaimer priority level * @gfp: reclaimer's gfp mask * @memcg: cgroup memory controller handle * @prio: reclaimer's priority * * This function should be called from the reclaim path every time when * the vmscan's reclaiming priority (scanning depth) changes. * * This function does not return any value. */ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio) { /* * We only use prio for accounting critical level. For more info * see comment for vmpressure_level_critical_prio variable above. */ if (prio > vmpressure_level_critical_prio) return; /* * OK, the prio is below the threshold, updating vmpressure * information before shrinker dives into long shrinking of long * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0 * to the vmpressure() basically means that we signal 'critical' * level. */ vmpressure(gfp, memcg, true, vmpressure_win, 0); } #define MAX_VMPRESSURE_ARGS_LEN (strlen("critical") + strlen("hierarchy") + 2) /** * vmpressure_register_event() - Bind vmpressure notifications to an eventfd * @memcg: memcg that is interested in vmpressure notifications * @eventfd: eventfd context to link notifications with * @args: event arguments (pressure level threshold, optional mode) * * This function associates eventfd context with the vmpressure * infrastructure, so that the notifications will be delivered to the * @eventfd. The @args parameter is a comma-delimited string that denotes a * pressure level threshold (one of vmpressure_str_levels, i.e. "low", "medium", * or "critical") and an optional mode (one of vmpressure_str_modes, i.e. * "hierarchy" or "local"). * * To be used as memcg event method. * * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could * not be parsed. */ int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, const char *args) { struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev; enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH; enum vmpressure_levels level; char *spec, *spec_orig; char *token; int ret = 0; spec_orig = spec = kstrndup(args, MAX_VMPRESSURE_ARGS_LEN, GFP_KERNEL); if (!spec) return -ENOMEM; /* Find required level */ token = strsep(&spec, ","); ret = match_string(vmpressure_str_levels, VMPRESSURE_NUM_LEVELS, token); if (ret < 0) goto out; level = ret; /* Find optional mode */ token = strsep(&spec, ","); if (token) { ret = match_string(vmpressure_str_modes, VMPRESSURE_NUM_MODES, token); if (ret < 0) goto out; mode = ret; } ev = kzalloc(sizeof(*ev), GFP_KERNEL); if (!ev) { ret = -ENOMEM; goto out; } ev->efd = eventfd; ev->level = level; ev->mode = mode; mutex_lock(&vmpr->events_lock); list_add(&ev->node, &vmpr->events); mutex_unlock(&vmpr->events_lock); ret = 0; out: kfree(spec_orig); return ret; } /** * vmpressure_unregister_event() - Unbind eventfd from vmpressure * @memcg: memcg handle * @eventfd: eventfd context that was used to link vmpressure with the @cg * * This function does internal manipulations to detach the @eventfd from * the vmpressure notifications, and then frees internal resources * associated with the @eventfd (but the @eventfd itself is not freed). * * To be used as memcg event method. */ void vmpressure_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd) { struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev; mutex_lock(&vmpr->events_lock); list_for_each_entry(ev, &vmpr->events, node) { if (ev->efd != eventfd) continue; list_del(&ev->node); kfree(ev); break; } mutex_unlock(&vmpr->events_lock); } /** * vmpressure_init() - Initialize vmpressure control structure * @vmpr: Structure to be initialized * * This function should be called on every allocated vmpressure structure * before any usage. */ void vmpressure_init(struct vmpressure *vmpr) { spin_lock_init(&vmpr->sr_lock); mutex_init(&vmpr->events_lock); INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); } /** * vmpressure_cleanup() - shuts down vmpressure control structure * @vmpr: Structure to be cleaned up * * This function should be called before the structure in which it is * embedded is cleaned up. */ void vmpressure_cleanup(struct vmpressure *vmpr) { /* * Make sure there is no pending work before eventfd infrastructure * goes away. */ flush_work(&vmpr->work); }
34 34 34 34 34 33 34 29 29 29 29 28 1 29 29 12 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 // SPDX-License-Identifier: GPL-2.0-or-later /* * lib/plist.c * * Descending-priority-sorted double-linked list * * (C) 2002-2003 Intel Corp * Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>. * * 2001-2005 (c) MontaVista Software, Inc. * Daniel Walker <dwalker@mvista.com> * * (C) 2005 Thomas Gleixner <tglx@linutronix.de> * * Simplifications of the original code by * Oleg Nesterov <oleg@tv-sign.ru> * * Based on simple lists (include/linux/list.h). * * This file contains the add / del functions which are considered to * be too large to inline. See include/linux/plist.h for further * information. */ #include <linux/bug.h> #include <linux/plist.h> #ifdef CONFIG_DEBUG_PLIST static struct plist_head test_head; static void plist_check_prev_next(struct list_head *t, struct list_head *p, struct list_head *n) { WARN(n->prev != p || p->next != n, "top: %p, n: %p, p: %p\n" "prev: %p, n: %p, p: %p\n" "next: %p, n: %p, p: %p\n", t, t->next, t->prev, p, p->next, p->prev, n, n->next, n->prev); } static void plist_check_list(struct list_head *top) { struct list_head *prev = top, *next = top->next; plist_check_prev_next(top, prev, next); while (next != top) { WRITE_ONCE(prev, next); WRITE_ONCE(next, prev->next); plist_check_prev_next(top, prev, next); } } static void plist_check_head(struct plist_head *head) { if (!plist_head_empty(head)) plist_check_list(&plist_first(head)->prio_list); plist_check_list(&head->node_list); } #else # define plist_check_head(h) do { } while (0) #endif /** * plist_add - add @node to @head * * @node: &struct plist_node pointer * @head: &struct plist_head pointer */ void plist_add(struct plist_node *node, struct plist_head *head) { struct plist_node *first, *iter, *prev = NULL, *last, *reverse_iter; struct list_head *node_next = &head->node_list; plist_check_head(head); WARN_ON(!plist_node_empty(node)); WARN_ON(!list_empty(&node->prio_list)); if (plist_head_empty(head)) goto ins_node; first = iter = plist_first(head); last = reverse_iter = list_entry(first->prio_list.prev, struct plist_node, prio_list); do { if (node->prio < iter->prio) { node_next = &iter->node_list; break; } else if (node->prio >= reverse_iter->prio) { prev = reverse_iter; iter = list_entry(reverse_iter->prio_list.next, struct plist_node, prio_list); if (likely(reverse_iter != last)) node_next = &iter->node_list; break; } prev = iter; iter = list_entry(iter->prio_list.next, struct plist_node, prio_list); reverse_iter = list_entry(reverse_iter->prio_list.prev, struct plist_node, prio_list); } while (iter != first); if (!prev || prev->prio != node->prio) list_add_tail(&node->prio_list, &iter->prio_list); ins_node: list_add_tail(&node->node_list, node_next); plist_check_head(head); } /** * plist_del - Remove a @node from plist. * * @node: &struct plist_node pointer - entry to be removed * @head: &struct plist_head pointer - list head */ void plist_del(struct plist_node *node, struct plist_head *head) { plist_check_head(head); if (!list_empty(&node->prio_list)) { if (node->node_list.next != &head->node_list) { struct plist_node *next; next = list_entry(node->node_list.next, struct plist_node, node_list); /* add the next plist_node into prio_list */ if (list_empty(&next->prio_list)) list_add(&next->prio_list, &node->prio_list); } list_del_init(&node->prio_list); } list_del_init(&node->node_list); plist_check_head(head); } /** * plist_requeue - Requeue @node at end of same-prio entries. * * This is essentially an optimized plist_del() followed by * plist_add(). It moves an entry already in the plist to * after any other same-priority entries. * * @node: &struct plist_node pointer - entry to be moved * @head: &struct plist_head pointer - list head */ void plist_requeue(struct plist_node *node, struct plist_head *head) { struct plist_node *iter; struct list_head *node_next = &head->node_list; plist_check_head(head); BUG_ON(plist_head_empty(head)); BUG_ON(plist_node_empty(node)); if (node == plist_last(head)) return; iter = plist_next(node); if (node->prio != iter->prio) return; plist_del(node, head); plist_for_each_continue(iter, head) { if (node->prio != iter->prio) { node_next = &iter->node_list; break; } } list_add_tail(&node->node_list, node_next); plist_check_head(head); } #ifdef CONFIG_DEBUG_PLIST #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/module.h> #include <linux/init.h> static struct plist_node __initdata test_node[241]; static void __init plist_test_check(int nr_expect) { struct plist_node *first, *prio_pos, *node_pos; if (plist_head_empty(&test_head)) { BUG_ON(nr_expect != 0); return; } prio_pos = first = plist_first(&test_head); plist_for_each(node_pos, &test_head) { if (nr_expect-- < 0) break; if (node_pos == first) continue; if (node_pos->prio == prio_pos->prio) { BUG_ON(!list_empty(&node_pos->prio_list)); continue; } BUG_ON(prio_pos->prio > node_pos->prio); BUG_ON(prio_pos->prio_list.next != &node_pos->prio_list); prio_pos = node_pos; } BUG_ON(nr_expect != 0); BUG_ON(prio_pos->prio_list.next != &first->prio_list); } static void __init plist_test_requeue(struct plist_node *node) { plist_requeue(node, &test_head); if (node != plist_last(&test_head)) BUG_ON(node->prio == plist_next(node)->prio); } static int __init plist_test(void) { int nr_expect = 0, i, loop; unsigned int r = local_clock(); printk(KERN_DEBUG "start plist test\n"); plist_head_init(&test_head); for (i = 0; i < ARRAY_SIZE(test_node); i++) plist_node_init(test_node + i, 0); for (loop = 0; loop < 1000; loop++) { r = r * 193939 % 47629; i = r % ARRAY_SIZE(test_node); if (plist_node_empty(test_node + i)) { r = r * 193939 % 47629; test_node[i].prio = r % 99; plist_add(test_node + i, &test_head); nr_expect++; } else { plist_del(test_node + i, &test_head); nr_expect--; } plist_test_check(nr_expect); if (!plist_node_empty(test_node + i)) { plist_test_requeue(test_node + i); plist_test_check(nr_expect); } } for (i = 0; i < ARRAY_SIZE(test_node); i++) { if (plist_node_empty(test_node + i)) continue; plist_del(test_node + i, &test_head); nr_expect--; plist_test_check(nr_expect); } printk(KERN_DEBUG "end plist test\n"); /* Worst case test for plist_add() */ unsigned int test_data[241]; for (i = 0; i < ARRAY_SIZE(test_data); i++) test_data[i] = i; ktime_t start, end, time_elapsed = 0; plist_head_init(&test_head); for (i = 0; i < ARRAY_SIZE(test_node); i++) { plist_node_init(test_node + i, 0); test_node[i].prio = test_data[i]; } for (i = 0; i < ARRAY_SIZE(test_node); i++) { if (plist_node_empty(test_node + i)) { start = ktime_get(); plist_add(test_node + i, &test_head); end = ktime_get(); time_elapsed += (end - start); } } pr_debug("plist_add worst case test time elapsed %lld\n", time_elapsed); return 0; } module_init(plist_test); #endif
2 276 228 93 223 148 148 98 98 96 277 286 1 1 1 1 286 2 286 1 286 1 1 287 2 1 104 102 6 104 104 237 296 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright Red Hat Inc. 2017 * * This file is part of the SCTP kernel implementation * * These functions manipulate sctp stream queue/scheduling. * * Please send any bug reports or fixes you make to the * email addresched(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> */ #include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> /* First Come First Serve (a.k.a. FIFO) * RFC DRAFT ndata Section 3.1 */ static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid, __u16 value, gfp_t gfp) { return 0; } static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid, __u16 *value) { *value = 0; return 0; } static int sctp_sched_fcfs_init(struct sctp_stream *stream) { return 0; } static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { return 0; } static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) { } static void sctp_sched_fcfs_enqueue(struct sctp_outq *q, struct sctp_datamsg *msg) { } static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q) { struct sctp_stream *stream = &q->asoc->stream; struct sctp_chunk *ch = NULL; struct list_head *entry; if (list_empty(&q->out_chunk_list)) goto out; if (stream->out_curr) { ch = list_entry(stream->out_curr->ext->outq.next, struct sctp_chunk, stream_list); } else { entry = q->out_chunk_list.next; ch = list_entry(entry, struct sctp_chunk, list); } sctp_sched_dequeue_common(q, ch); out: return ch; } static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q, struct sctp_chunk *chunk) { } static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream) { } static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) { } static struct sctp_sched_ops sctp_sched_fcfs = { .set = sctp_sched_fcfs_set, .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, .free_sid = sctp_sched_fcfs_free_sid, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, .dequeue_done = sctp_sched_fcfs_dequeue_done, .sched_all = sctp_sched_fcfs_sched_all, .unsched_all = sctp_sched_fcfs_unsched_all, }; static void sctp_sched_ops_fcfs_init(void) { sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); } /* API to other parts of the stack */ static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; void sctp_sched_ops_register(enum sctp_sched_type sched, struct sctp_sched_ops *sched_ops) { sctp_sched_ops[sched] = sched_ops; } void sctp_sched_ops_init(void) { sctp_sched_ops_fcfs_init(); sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); sctp_sched_ops_fc_init(); sctp_sched_ops_wfq_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *soute; int i; sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) { soute = SCTP_SO(stream, i)->ext; if (!soute) continue; sched->free_sid(stream, i); /* Give the next scheduler a clean slate. */ memset_after(soute, 0, outq); } } int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) { struct sctp_sched_ops *old = asoc->outqueue.sched; struct sctp_datamsg *msg = NULL; struct sctp_sched_ops *n; struct sctp_chunk *ch; int i, ret = 0; if (sched > SCTP_SS_MAX) return -EINVAL; n = sctp_sched_ops[sched]; if (old == n) return ret; if (old) sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = n; n->init(&asoc->stream); for (i = 0; i < asoc->stream.outcnt; i++) { if (!SCTP_SO(&asoc->stream, i)->ext) continue; ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } /* We have to requeue all chunks already queued. */ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { if (ch->msg == msg) continue; msg = ch->msg; n->enqueue(&asoc->outqueue, msg); } return ret; err: sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ return ret; } int sctp_sched_get_sched(struct sctp_association *asoc) { int i; for (i = 0; i <= SCTP_SS_MAX; i++) if (asoc->outqueue.sched == sctp_sched_ops[i]) return i; return 0; } int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, __u16 value, gfp_t gfp) { if (sid >= asoc->stream.outcnt) return -EINVAL; if (!SCTP_SO(&asoc->stream, sid)->ext) { int ret; ret = sctp_stream_init_ext(&asoc->stream, sid); if (ret) return ret; } return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp); } int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, __u16 *value) { if (sid >= asoc->stream.outcnt) return -EINVAL; if (!SCTP_SO(&asoc->stream, sid)->ext) return 0; return asoc->outqueue.sched->get(&asoc->stream, sid, value); } void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) { if (!list_is_last(&ch->frag_list, &ch->msg->chunks) && !q->asoc->peer.intl_capable) { struct sctp_stream_out *sout; __u16 sid; /* datamsg is not finish, so save it as current one, * in case application switch scheduler or a higher * priority stream comes in. */ sid = sctp_chunk_stream_no(ch); sout = SCTP_SO(&q->asoc->stream, sid); q->asoc->stream.out_curr = sout; return; } q->asoc->stream.out_curr = NULL; q->sched->dequeue_done(q, ch); } /* Auxiliary functions for the schedulers */ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) { list_del_init(&ch->list); list_del_init(&ch->stream_list); q->out_qlen -= ch->skb->len; } int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *ext = SCTP_SO(stream, sid)->ext; INIT_LIST_HEAD(&ext->outq); return sched->init_sid(stream, sid, gfp); } struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) { struct sctp_association *asoc; asoc = container_of(stream, struct sctp_association, stream); return asoc->outqueue.sched; }
2 2 2 2 2 12 12 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 /* * llc_sap.c - driver routines for SAP component. * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <net/llc.h> #include <net/llc_if.h> #include <net/llc_conn.h> #include <net/llc_pdu.h> #include <net/llc_sap.h> #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_s_st.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/llc.h> #include <linux/slab.h> static int llc_mac_header_len(unsigned short devtype) { switch (devtype) { case ARPHRD_ETHER: case ARPHRD_LOOPBACK: return sizeof(struct ethhdr); } return 0; } /** * llc_alloc_frame - allocates sk_buff for frame * @sk: socket to allocate frame to * @dev: network device this skb will be sent over * @type: pdu type to allocate * @data_size: data size to allocate * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, u8 type, u32 data_size) { int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; struct sk_buff *skb; hlen += llc_mac_header_len(dev->type); skb = alloc_skb(hlen + data_size, GFP_ATOMIC); if (skb) { skb_reset_mac_header(skb); skb_reserve(skb, hlen); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->protocol = htons(ETH_P_802_2); skb->dev = dev; if (sk != NULL) skb_set_owner_w(skb, sk); } return skb; } void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; addr->sllc_xid = prim == LLC_XID_PRIM; addr->sllc_ua = prim == LLC_DATAUNIT_PRIM; llc_pdu_decode_sa(skb, addr->sllc_mac); llc_pdu_decode_ssap(skb, &addr->sllc_sap); } /** * llc_sap_rtn_pdu - Informs upper layer on rx of an UI, XID or TEST pdu. * @sap: pointer to SAP * @skb: received pdu */ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); switch (LLC_U_PDU_RSP(pdu)) { case LLC_1_PDU_CMD_TEST: ev->prim = LLC_TEST_PRIM; break; case LLC_1_PDU_CMD_XID: ev->prim = LLC_XID_PRIM; break; case LLC_1_PDU_CMD_UI: ev->prim = LLC_DATAUNIT_PRIM; break; } ev->ind_cfm_flag = LLC_IND; } /** * llc_find_sap_trans - finds transition for event * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event. * Returns the pointer to found transition on success or %NULL for * failure. */ static const struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, struct sk_buff *skb) { int i = 0; const struct llc_sap_state_trans *rc = NULL; const struct llc_sap_state_trans **next_trans; struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1]; /* * Search thru events for this state until list exhausted or until * its obvious the event is not valid for the current state */ for (next_trans = curr_state->transitions; next_trans[i]->ev; i++) if (!next_trans[i]->ev(sap, skb)) { rc = next_trans[i]; /* got event match; return it */ break; } return rc; } /** * llc_exec_sap_trans_actions - execute actions related to event * @sap: pointer to SAP * @trans: pointer to transition that it's actions must be performed * @skb: happened event. * * This function executes actions that is related to happened event. * Returns 0 for success and 1 for failure of at least one action. */ static int llc_exec_sap_trans_actions(struct llc_sap *sap, const struct llc_sap_state_trans *trans, struct sk_buff *skb) { int rc = 0; const llc_sap_action_t *next_action = trans->ev_actions; for (; next_action && *next_action; next_action++) if ((*next_action)(sap, skb)) rc = 1; return rc; } /** * llc_sap_next_state - finds transition, execs actions & change SAP state * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event, then * executes related actions and finally changes state of SAP. It returns * 0 on success and 1 for failure. */ static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb) { const struct llc_sap_state_trans *trans; int rc = 1; if (sap->state > LLC_NR_SAP_STATES) goto out; trans = llc_find_sap_trans(sap, skb); if (!trans) goto out; /* * Got the state to which we next transition; perform the actions * associated with this transition before actually transitioning to the * next state */ rc = llc_exec_sap_trans_actions(sap, trans, skb); if (rc) goto out; /* * Transition SAP to next state if all actions execute successfully */ sap->state = trans->next_state; out: return rc; } /** * llc_sap_state_process - sends event to SAP state machine * @sap: sap to use * @skb: pointer to occurred event * * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. * * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { llc_save_primitive(skb->sk, skb, ev->prim); /* queue skb to the user. */ if (sock_queue_rcv_skb(skb->sk, skb) == 0) return; } kfree_skb(skb); } /** * llc_build_and_send_test_pkt - TEST interface for upper layers. * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a TEST pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_TEST_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_build_and_send_xid_pkt - XID interface for upper layers * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a XID pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_XID_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_sap_rcv - sends received pdus to the sap state machine * @sap: current sap component structure. * @skb: received frame. * @sk: socket to associate to frame * * Sends received pdus to the sap state machine. */ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; skb_orphan(skb); sock_hold(sk); skb->sk = sk; skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } static inline bool llc_dgram_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sock *sk, const struct net *net) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && net_eq(sock_net(sk), net) && llc->laddr.lsap == laddr->lsap && ether_addr_equal(llc->laddr.mac, laddr->mac); } /** * llc_lookup_dgram - Finds dgram socket for the local sap/mac * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @net: netns to look up a socket in * * Search socket list of the SAP and finds connection using the local * mac, and local sap. Returns pointer for socket found, %NULL otherwise. */ static struct sock *llc_lookup_dgram(struct llc_sap *sap, const struct llc_addr *laddr, const struct net *net) { struct sock *rc; struct hlist_nulls_node *node; int slot = llc_sk_laddr_hashfn(sap, laddr); struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock_bh(); again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc, net)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc, net))) { sock_put(rc); continue; } goto found; } } rc = NULL; /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (unlikely(get_nulls_value(node) != slot)) goto again; found: rcu_read_unlock_bh(); return rc; } static inline bool llc_mcast_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sk_buff *skb, const struct sock *sk) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && llc->laddr.lsap == laddr->lsap && llc->dev == skb->dev; } static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb, struct sock **stack, int count) { struct sk_buff *skb1; int i; for (i = 0; i < count; i++) { skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) { sock_put(stack[i]); continue; } llc_sap_rcv(sap, skb1, stack[i]); sock_put(stack[i]); } } /** * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @skb: PDU to deliver * * Search socket list of the SAP and finds connections with same sap. * Deliver clone to each. */ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { int i = 0; struct sock *sk; struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; if (!llc_mcast_match(sap, laddr, skb, sk)) continue; sock_hold(sk); if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); i = 0; } } spin_unlock_bh(&sap->sk_lock); llc_do_mcast(sap, skb, stack, i); } void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr laddr; llc_pdu_decode_da(skb, laddr.mac); llc_pdu_decode_dsap(skb, &laddr.lsap); if (is_multicast_ether_addr(laddr.mac)) { llc_sap_mcast(sap, &laddr, skb); kfree_skb(skb); } else { struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev)); if (sk) { llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); } }
75 75 6 6 6 6 6 6 6 1 6 6 6 5 5 5 5 5 5 4 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 // SPDX-License-Identifier: GPL-2.0-only /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * SMC statistics netlink routines * * Copyright IBM Corp. 2021 * * Author(s): Guvenc Gulce */ #include <linux/init.h> #include <linux/mutex.h> #include <linux/percpu.h> #include <linux/ctype.h> #include <linux/smc.h> #include <net/genetlink.h> #include <net/sock.h> #include "smc_netlink.h" #include "smc_stats.h" int smc_stats_init(struct net *net) { net->smc.fback_rsn = kzalloc(sizeof(*net->smc.fback_rsn), GFP_KERNEL); if (!net->smc.fback_rsn) goto err_fback; net->smc.smc_stats = alloc_percpu(struct smc_stats); if (!net->smc.smc_stats) goto err_stats; mutex_init(&net->smc.mutex_fback_rsn); return 0; err_stats: kfree(net->smc.fback_rsn); err_fback: return -ENOMEM; } void smc_stats_exit(struct net *net) { kfree(net->smc.fback_rsn); if (net->smc.smc_stats) free_percpu(net->smc.smc_stats); } static int smc_nl_fill_stats_rmb_data(struct sk_buff *skb, struct smc_stats *stats, int tech, int type) { struct smc_stats_rmbcnt *stats_rmb_cnt; struct nlattr *attrs; if (type == SMC_NLA_STATS_T_TX_RMB_STATS) stats_rmb_cnt = &stats->smc[tech].rmb_tx; else stats_rmb_cnt = &stats->smc[tech].rmb_rx; attrs = nla_nest_start(skb, type); if (!attrs) goto errout; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_REUSE_CNT, stats_rmb_cnt->reuse_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_SIZE_SM_PEER_CNT, stats_rmb_cnt->buf_size_small_peer_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_SIZE_SM_CNT, stats_rmb_cnt->buf_size_small_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_FULL_PEER_CNT, stats_rmb_cnt->buf_full_peer_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_FULL_CNT, stats_rmb_cnt->buf_full_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_ALLOC_CNT, stats_rmb_cnt->alloc_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_RMB_DGRADE_CNT, stats_rmb_cnt->dgrade_cnt, SMC_NLA_STATS_RMB_PAD)) goto errattr; nla_nest_end(skb, attrs); return 0; errattr: nla_nest_cancel(skb, attrs); errout: return -EMSGSIZE; } static int smc_nl_fill_stats_bufsize_data(struct sk_buff *skb, struct smc_stats *stats, int tech, int type) { struct smc_stats_memsize *stats_pload; struct nlattr *attrs; if (type == SMC_NLA_STATS_T_TXPLOAD_SIZE) stats_pload = &stats->smc[tech].tx_pd; else if (type == SMC_NLA_STATS_T_RXPLOAD_SIZE) stats_pload = &stats->smc[tech].rx_pd; else if (type == SMC_NLA_STATS_T_TX_RMB_SIZE) stats_pload = &stats->smc[tech].tx_rmbsize; else if (type == SMC_NLA_STATS_T_RX_RMB_SIZE) stats_pload = &stats->smc[tech].rx_rmbsize; else goto errout; attrs = nla_nest_start(skb, type); if (!attrs) goto errout; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_8K, stats_pload->buf[SMC_BUF_8K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_16K, stats_pload->buf[SMC_BUF_16K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_32K, stats_pload->buf[SMC_BUF_32K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_64K, stats_pload->buf[SMC_BUF_64K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_128K, stats_pload->buf[SMC_BUF_128K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_256K, stats_pload->buf[SMC_BUF_256K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_512K, stats_pload->buf[SMC_BUF_512K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_1024K, stats_pload->buf[SMC_BUF_1024K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_PLOAD_G_1024K, stats_pload->buf[SMC_BUF_G_1024K], SMC_NLA_STATS_PLOAD_PAD)) goto errattr; nla_nest_end(skb, attrs); return 0; errattr: nla_nest_cancel(skb, attrs); errout: return -EMSGSIZE; } static int smc_nl_fill_stats_tech_data(struct sk_buff *skb, struct smc_stats *stats, int tech) { struct smc_stats_tech *smc_tech; struct nlattr *attrs; smc_tech = &stats->smc[tech]; if (tech == SMC_TYPE_D) attrs = nla_nest_start(skb, SMC_NLA_STATS_SMCD_TECH); else attrs = nla_nest_start(skb, SMC_NLA_STATS_SMCR_TECH); if (!attrs) goto errout; if (smc_nl_fill_stats_rmb_data(skb, stats, tech, SMC_NLA_STATS_T_TX_RMB_STATS)) goto errattr; if (smc_nl_fill_stats_rmb_data(skb, stats, tech, SMC_NLA_STATS_T_RX_RMB_STATS)) goto errattr; if (smc_nl_fill_stats_bufsize_data(skb, stats, tech, SMC_NLA_STATS_T_TXPLOAD_SIZE)) goto errattr; if (smc_nl_fill_stats_bufsize_data(skb, stats, tech, SMC_NLA_STATS_T_RXPLOAD_SIZE)) goto errattr; if (smc_nl_fill_stats_bufsize_data(skb, stats, tech, SMC_NLA_STATS_T_TX_RMB_SIZE)) goto errattr; if (smc_nl_fill_stats_bufsize_data(skb, stats, tech, SMC_NLA_STATS_T_RX_RMB_SIZE)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CLNT_V1_SUCC, smc_tech->clnt_v1_succ_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CLNT_V2_SUCC, smc_tech->clnt_v2_succ_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SRV_V1_SUCC, smc_tech->srv_v1_succ_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SRV_V2_SUCC, smc_tech->srv_v2_succ_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_BYTES, smc_tech->rx_bytes, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_TX_BYTES, smc_tech->tx_bytes, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_uint(skb, SMC_NLA_STATS_T_RX_RMB_USAGE, smc_tech->rx_rmbuse)) goto errattr; if (nla_put_uint(skb, SMC_NLA_STATS_T_TX_RMB_USAGE, smc_tech->tx_rmbuse)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_RX_CNT, smc_tech->rx_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_TX_CNT, smc_tech->tx_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SENDPAGE_CNT, 0, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_CORK_CNT, smc_tech->cork_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_NDLY_CNT, smc_tech->ndly_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_SPLICE_CNT, smc_tech->splice_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_T_URG_DATA_CNT, smc_tech->urg_data_cnt, SMC_NLA_STATS_PAD)) goto errattr; nla_nest_end(skb, attrs); return 0; errattr: nla_nest_cancel(skb, attrs); errout: return -EMSGSIZE; } int smc_nl_get_stats(struct sk_buff *skb, struct netlink_callback *cb) { struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); struct net *net = sock_net(skb->sk); struct smc_stats *stats; struct nlattr *attrs; int cpu, i, size; void *nlh; u64 *src; u64 *sum; if (cb_ctx->pos[0]) goto errmsg; nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &smc_gen_nl_family, NLM_F_MULTI, SMC_NETLINK_GET_STATS); if (!nlh) goto errmsg; attrs = nla_nest_start(skb, SMC_GEN_STATS); if (!attrs) goto errnest; stats = kzalloc(sizeof(*stats), GFP_KERNEL); if (!stats) goto erralloc; size = sizeof(*stats) / sizeof(u64); for_each_possible_cpu(cpu) { src = (u64 *)per_cpu_ptr(net->smc.smc_stats, cpu); sum = (u64 *)stats; for (i = 0; i < size; i++) *(sum++) += *(src++); } if (smc_nl_fill_stats_tech_data(skb, stats, SMC_TYPE_D)) goto errattr; if (smc_nl_fill_stats_tech_data(skb, stats, SMC_TYPE_R)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_CLNT_HS_ERR_CNT, stats->clnt_hshake_err_cnt, SMC_NLA_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_STATS_SRV_HS_ERR_CNT, stats->srv_hshake_err_cnt, SMC_NLA_STATS_PAD)) goto errattr; nla_nest_end(skb, attrs); genlmsg_end(skb, nlh); cb_ctx->pos[0] = 1; kfree(stats); return skb->len; errattr: kfree(stats); erralloc: nla_nest_cancel(skb, attrs); errnest: genlmsg_cancel(skb, nlh); errmsg: return skb->len; } static int smc_nl_get_fback_details(struct sk_buff *skb, struct netlink_callback *cb, int pos, bool is_srv) { struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); struct net *net = sock_net(skb->sk); int cnt_reported = cb_ctx->pos[2]; struct smc_stats_fback *trgt_arr; struct nlattr *attrs; int rc = 0; void *nlh; if (is_srv) trgt_arr = &net->smc.fback_rsn->srv[0]; else trgt_arr = &net->smc.fback_rsn->clnt[0]; if (!trgt_arr[pos].fback_code) return -ENODATA; nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &smc_gen_nl_family, NLM_F_MULTI, SMC_NETLINK_GET_FBACK_STATS); if (!nlh) goto errmsg; attrs = nla_nest_start(skb, SMC_GEN_FBACK_STATS); if (!attrs) goto errout; if (nla_put_u8(skb, SMC_NLA_FBACK_STATS_TYPE, is_srv)) goto errattr; if (!cnt_reported) { if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_SRV_CNT, net->smc.fback_rsn->srv_fback_cnt, SMC_NLA_FBACK_STATS_PAD)) goto errattr; if (nla_put_u64_64bit(skb, SMC_NLA_FBACK_STATS_CLNT_CNT, net->smc.fback_rsn->clnt_fback_cnt, SMC_NLA_FBACK_STATS_PAD)) goto errattr; cnt_reported = 1; } if (nla_put_u32(skb, SMC_NLA_FBACK_STATS_RSN_CODE, trgt_arr[pos].fback_code)) goto errattr; if (nla_put_u16(skb, SMC_NLA_FBACK_STATS_RSN_CNT, trgt_arr[pos].count)) goto errattr; cb_ctx->pos[2] = cnt_reported; nla_nest_end(skb, attrs); genlmsg_end(skb, nlh); return rc; errattr: nla_nest_cancel(skb, attrs); errout: genlmsg_cancel(skb, nlh); errmsg: return -EMSGSIZE; } int smc_nl_get_fback_stats(struct sk_buff *skb, struct netlink_callback *cb) { struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); struct net *net = sock_net(skb->sk); int rc_srv = 0, rc_clnt = 0, k; int skip_serv = cb_ctx->pos[1]; int snum = cb_ctx->pos[0]; bool is_srv = true; mutex_lock(&net->smc.mutex_fback_rsn); for (k = 0; k < SMC_MAX_FBACK_RSN_CNT; k++) { if (k < snum) continue; if (!skip_serv) { rc_srv = smc_nl_get_fback_details(skb, cb, k, is_srv); if (rc_srv && rc_srv != -ENODATA) break; } else { skip_serv = 0; } rc_clnt = smc_nl_get_fback_details(skb, cb, k, !is_srv); if (rc_clnt && rc_clnt != -ENODATA) { skip_serv = 1; break; } if (rc_clnt == -ENODATA && rc_srv == -ENODATA) break; } mutex_unlock(&net->smc.mutex_fback_rsn); cb_ctx->pos[1] = skip_serv; cb_ctx->pos[0] = k; return skb->len; }
72 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM exceptions #if !defined(_TRACE_PAGE_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_PAGE_FAULT_H #include <linux/tracepoint.h> #include <asm/trace/common.h> extern int trace_pagefault_reg(void); extern void trace_pagefault_unreg(void); DECLARE_EVENT_CLASS(x86_exceptions, TP_PROTO(unsigned long address, struct pt_regs *regs, unsigned long error_code), TP_ARGS(address, regs, error_code), TP_STRUCT__entry( __field( unsigned long, address ) __field( unsigned long, ip ) __field( unsigned long, error_code ) ), TP_fast_assign( __entry->address = address; __entry->ip = regs->ip; __entry->error_code = error_code; ), TP_printk("address=%ps ip=%ps error_code=0x%lx", (void *)__entry->address, (void *)__entry->ip, __entry->error_code) ); #define DEFINE_PAGE_FAULT_EVENT(name) \ DEFINE_EVENT_FN(x86_exceptions, name, \ TP_PROTO(unsigned long address, struct pt_regs *regs, \ unsigned long error_code), \ TP_ARGS(address, regs, error_code), \ trace_pagefault_reg, trace_pagefault_unreg); DEFINE_PAGE_FAULT_EVENT(page_fault_user); DEFINE_PAGE_FAULT_EVENT(page_fault_kernel); #undef TRACE_INCLUDE_PATH #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE exceptions #endif /* _TRACE_PAGE_FAULT_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
467 165 469 469 467 13 469 469 469 469 165 165 164 165 165 165 8 165 165 165 13 14 14 8 22 14 8 22 10 22 13 11 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 // SPDX-License-Identifier: GPL-2.0-only /* * klist.c - Routines for manipulating klists. * * Copyright (C) 2005 Patrick Mochel * * This klist interface provides a couple of structures that wrap around * struct list_head to provide explicit list "head" (struct klist) and list * "node" (struct klist_node) objects. For struct klist, a spinlock is * included that protects access to the actual list itself. struct * klist_node provides a pointer to the klist that owns it and a kref * reference count that indicates the number of current users of that node * in the list. * * The entire point is to provide an interface for iterating over a list * that is safe and allows for modification of the list during the * iteration (e.g. insertion and removal), including modification of the * current node on the list. * * It works using a 3rd object type - struct klist_iter - that is declared * and initialized before an iteration. klist_next() is used to acquire the * next element in the list. It returns NULL if there are no more items. * Internally, that routine takes the klist's lock, decrements the * reference count of the previous klist_node and increments the count of * the next klist_node. It then drops the lock and returns. * * There are primitives for adding and removing nodes to/from a klist. * When deleting, klist_del() will simply decrement the reference count. * Only when the count goes to 0 is the node removed from the list. * klist_remove() will try to delete the node from the list and block until * it is actually removed. This is useful for objects (like devices) that * have been removed from the system and must be freed (but must wait until * all accessors have finished). */ #include <linux/klist.h> #include <linux/export.h> #include <linux/sched.h> /* * Use the lowest bit of n_klist to mark deleted nodes and exclude * dead ones from iteration. */ #define KNODE_DEAD 1LU #define KNODE_KLIST_MASK ~KNODE_DEAD static struct klist *knode_klist(struct klist_node *knode) { return (struct klist *) ((unsigned long)knode->n_klist & KNODE_KLIST_MASK); } static bool knode_dead(struct klist_node *knode) { return (unsigned long)knode->n_klist & KNODE_DEAD; } static void knode_set_klist(struct klist_node *knode, struct klist *klist) { knode->n_klist = klist; /* no knode deserves to start its life dead */ WARN_ON(knode_dead(knode)); } static void knode_kill(struct klist_node *knode) { /* and no knode should die twice ever either, see we're very humane */ WARN_ON(knode_dead(knode)); *(unsigned long *)&knode->n_klist |= KNODE_DEAD; } /** * klist_init - Initialize a klist structure. * @k: The klist we're initializing. * @get: The get function for the embedding object (NULL if none) * @put: The put function for the embedding object (NULL if none) * * Initialises the klist structure. If the klist_node structures are * going to be embedded in refcounted objects (necessary for safe * deletion) then the get/put arguments are used to initialise * functions that take and release references on the embedding * objects. */ void klist_init(struct klist *k, void (*get)(struct klist_node *), void (*put)(struct klist_node *)) { INIT_LIST_HEAD(&k->k_list); spin_lock_init(&k->k_lock); k->get = get; k->put = put; } EXPORT_SYMBOL_GPL(klist_init); static void add_head(struct klist *k, struct klist_node *n) { spin_lock(&k->k_lock); list_add(&n->n_node, &k->k_list); spin_unlock(&k->k_lock); } static void add_tail(struct klist *k, struct klist_node *n) { spin_lock(&k->k_lock); list_add_tail(&n->n_node, &k->k_list); spin_unlock(&k->k_lock); } static void klist_node_init(struct klist *k, struct klist_node *n) { INIT_LIST_HEAD(&n->n_node); kref_init(&n->n_ref); knode_set_klist(n, k); if (k->get) k->get(n); } /** * klist_add_head - Initialize a klist_node and add it to front. * @n: node we're adding. * @k: klist it's going on. */ void klist_add_head(struct klist_node *n, struct klist *k) { klist_node_init(k, n); add_head(k, n); } EXPORT_SYMBOL_GPL(klist_add_head); /** * klist_add_tail - Initialize a klist_node and add it to back. * @n: node we're adding. * @k: klist it's going on. */ void klist_add_tail(struct klist_node *n, struct klist *k) { klist_node_init(k, n); add_tail(k, n); } EXPORT_SYMBOL_GPL(klist_add_tail); /** * klist_add_behind - Init a klist_node and add it after an existing node * @n: node we're adding. * @pos: node to put @n after */ void klist_add_behind(struct klist_node *n, struct klist_node *pos) { struct klist *k = knode_klist(pos); klist_node_init(k, n); spin_lock(&k->k_lock); list_add(&n->n_node, &pos->n_node); spin_unlock(&k->k_lock); } EXPORT_SYMBOL_GPL(klist_add_behind); /** * klist_add_before - Init a klist_node and add it before an existing node * @n: node we're adding. * @pos: node to put @n after */ void klist_add_before(struct klist_node *n, struct klist_node *pos) { struct klist *k = knode_klist(pos); klist_node_init(k, n); spin_lock(&k->k_lock); list_add_tail(&n->n_node, &pos->n_node); spin_unlock(&k->k_lock); } EXPORT_SYMBOL_GPL(klist_add_before); struct klist_waiter { struct list_head list; struct klist_node *node; struct task_struct *process; int woken; }; static DEFINE_SPINLOCK(klist_remove_lock); static LIST_HEAD(klist_remove_waiters); static void klist_release(struct kref *kref) { struct klist_waiter *waiter, *tmp; struct klist_node *n = container_of(kref, struct klist_node, n_ref); WARN_ON(!knode_dead(n)); list_del(&n->n_node); spin_lock(&klist_remove_lock); list_for_each_entry_safe(waiter, tmp, &klist_remove_waiters, list) { if (waiter->node != n) continue; list_del(&waiter->list); waiter->woken = 1; mb(); wake_up_process(waiter->process); } spin_unlock(&klist_remove_lock); knode_set_klist(n, NULL); } static int klist_dec_and_del(struct klist_node *n) { return kref_put(&n->n_ref, klist_release); } static void klist_put(struct klist_node *n, bool kill) { struct klist *k = knode_klist(n); void (*put)(struct klist_node *) = k->put; spin_lock(&k->k_lock); if (kill) knode_kill(n); if (!klist_dec_and_del(n)) put = NULL; spin_unlock(&k->k_lock); if (put) put(n); } /** * klist_del - Decrement the reference count of node and try to remove. * @n: node we're deleting. */ void klist_del(struct klist_node *n) { klist_put(n, true); } EXPORT_SYMBOL_GPL(klist_del); /** * klist_remove - Decrement the refcount of node and wait for it to go away. * @n: node we're removing. */ void klist_remove(struct klist_node *n) { struct klist_waiter waiter; waiter.node = n; waiter.process = current; waiter.woken = 0; spin_lock(&klist_remove_lock); list_add(&waiter.list, &klist_remove_waiters); spin_unlock(&klist_remove_lock); klist_del(n); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); if (waiter.woken) break; schedule(); } __set_current_state(TASK_RUNNING); } EXPORT_SYMBOL_GPL(klist_remove); /** * klist_node_attached - Say whether a node is bound to a list or not. * @n: Node that we're testing. */ int klist_node_attached(struct klist_node *n) { return (n->n_klist != NULL); } EXPORT_SYMBOL_GPL(klist_node_attached); /** * klist_iter_init_node - Initialize a klist_iter structure. * @k: klist we're iterating. * @i: klist_iter we're filling. * @n: node to start with. * * Similar to klist_iter_init(), but starts the action off with @n, * instead of with the list head. */ void klist_iter_init_node(struct klist *k, struct klist_iter *i, struct klist_node *n) { i->i_klist = k; i->i_cur = NULL; if (n && kref_get_unless_zero(&n->n_ref)) i->i_cur = n; } EXPORT_SYMBOL_GPL(klist_iter_init_node); /** * klist_iter_init - Iniitalize a klist_iter structure. * @k: klist we're iterating. * @i: klist_iter structure we're filling. * * Similar to klist_iter_init_node(), but start with the list head. */ void klist_iter_init(struct klist *k, struct klist_iter *i) { klist_iter_init_node(k, i, NULL); } EXPORT_SYMBOL_GPL(klist_iter_init); /** * klist_iter_exit - Finish a list iteration. * @i: Iterator structure. * * Must be called when done iterating over list, as it decrements the * refcount of the current node. Necessary in case iteration exited before * the end of the list was reached, and always good form. */ void klist_iter_exit(struct klist_iter *i) { if (i->i_cur) { klist_put(i->i_cur, false); i->i_cur = NULL; } } EXPORT_SYMBOL_GPL(klist_iter_exit); static struct klist_node *to_klist_node(struct list_head *n) { return container_of(n, struct klist_node, n_node); } /** * klist_prev - Ante up prev node in list. * @i: Iterator structure. * * First grab list lock. Decrement the reference count of the previous * node, if there was one. Grab the prev node, increment its reference * count, drop the lock, and return that prev node. */ struct klist_node *klist_prev(struct klist_iter *i) { void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *prev; unsigned long flags; spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { prev = to_klist_node(last->n_node.prev); if (!klist_dec_and_del(last)) put = NULL; } else prev = to_klist_node(i->i_klist->k_list.prev); i->i_cur = NULL; while (prev != to_klist_node(&i->i_klist->k_list)) { if (likely(!knode_dead(prev))) { kref_get(&prev->n_ref); i->i_cur = prev; break; } prev = to_klist_node(prev->n_node.prev); } spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); return i->i_cur; } EXPORT_SYMBOL_GPL(klist_prev); /** * klist_next - Ante up next node in list. * @i: Iterator structure. * * First grab list lock. Decrement the reference count of the previous * node, if there was one. Grab the next node, increment its reference * count, drop the lock, and return that next node. */ struct klist_node *klist_next(struct klist_iter *i) { void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *next; unsigned long flags; spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { next = to_klist_node(last->n_node.next); if (!klist_dec_and_del(last)) put = NULL; } else next = to_klist_node(i->i_klist->k_list.next); i->i_cur = NULL; while (next != to_klist_node(&i->i_klist->k_list)) { if (likely(!knode_dead(next))) { kref_get(&next->n_ref); i->i_cur = next; break; } next = to_klist_node(next->n_node.next); } spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); return i->i_cur; } EXPORT_SYMBOL_GPL(klist_next);
26 83 18 19 375 215 9 158 365 97 3 93 88 88 209 3 2 25 24 11 22 18 31 211 27 84 3 2 18 27 4 22 98 1 1 7 6 5 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/udp.h> #include <uapi/linux/tcp.h> #include <uapi/linux/virtio_net.h> static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) { switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: return protocol == cpu_to_be16(ETH_P_IP); case VIRTIO_NET_HDR_GSO_TCPV6: return protocol == cpu_to_be16(ETH_P_IPV6); case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP_L4: return protocol == cpu_to_be16(ETH_P_IP) || protocol == cpu_to_be16(ETH_P_IPV6); default: return false; } } static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { if (skb->protocol) return 0; switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: case VIRTIO_NET_HDR_GSO_UDP_L4: skb->protocol = cpu_to_be16(ETH_P_IP); break; case VIRTIO_NET_HDR_GSO_TCPV6: skb->protocol = cpu_to_be16(ETH_P_IPV6); break; default: return -EINVAL; } return 0; } static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { unsigned int nh_min_len = sizeof(struct iphdr); unsigned int gso_type = 0; unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: gso_type = SKB_GSO_TCPV4; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); break; case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; ip_proto = IPPROTO_TCP; thlen = sizeof(struct tcphdr); nh_min_len = sizeof(struct ipv6hdr); break; case VIRTIO_NET_HDR_GSO_UDP: gso_type = SKB_GSO_UDP; ip_proto = IPPROTO_UDP; thlen = sizeof(struct udphdr); break; case VIRTIO_NET_HDR_GSO_UDP_L4: gso_type = SKB_GSO_UDP_L4; ip_proto = IPPROTO_UDP; thlen = sizeof(struct udphdr); break; default: return -EINVAL; } if (hdr->gso_type & VIRTIO_NET_HDR_GSO_ECN) gso_type |= SKB_GSO_TCP_ECN; if (hdr->gso_size == 0) return -EINVAL; } skb_reset_mac_header(skb); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start); u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); if (!pskb_may_pull(skb, needed)) return -EINVAL; if (!skb_partial_csum_set(skb, start, off)) return -EINVAL; if (skb_transport_offset(skb) < nh_min_len) return -EINVAL; nh_min_len = skb_transport_offset(skb); p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } else { /* gso packets without NEEDS_CSUM do not set transport_offset. * probe and drop if does not match one of the above types. */ if (gso_type && skb->network_header) { struct flow_keys_basic keys; if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); if (!protocol) virtio_net_hdr_set_proto(skb, hdr); else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; else skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, 0)) { /* UFO does not specify ipv4 or 6: try both */ if (gso_type & SKB_GSO_UDP && skb->protocol == htons(ETH_P_IP)) { skb->protocol = htons(ETH_P_IPV6); goto retry; } return -EINVAL; } p_off = keys.control.thoff + thlen; if (!pskb_may_pull(skb, p_off) || keys.basic.ip_proto != ip_proto) return -EINVAL; skb_set_transport_header(skb, keys.control.thoff); } else if (gso_type) { p_off = nh_min_len + thlen; if (!pskb_may_pull(skb, p_off)) return -EINVAL; } } if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); switch (gso_type & ~SKB_GSO_TCP_ECN) { case SKB_GSO_UDP: /* UFO may not include transport header in gso_size. */ nh_off -= thlen; break; case SKB_GSO_UDP_L4: if (!(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)) return -EINVAL; if (skb->csum_offset != offsetof(struct udphdr, check)) return -EINVAL; if (skb->len - p_off > gso_size * UDP_MAX_SEGMENTS) return -EINVAL; if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; case SKB_GSO_TCPV4: case SKB_GSO_TCPV6: if (skb->ip_summed == CHECKSUM_PARTIAL && skb->csum_offset != offsetof(struct tcphdr, check)) return -EINVAL; break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ if (gso_size == GSO_BY_FRAGS) return -EINVAL; /* Too small packets are not really GSO ones. */ if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; /* Header must be checked, and gso_segs computed. */ shinfo->gso_type |= SKB_GSO_DODGY; shinfo->gso_segs = 0; } } return 0; } static inline int virtio_net_hdr_from_skb(const struct sk_buff *skb, struct virtio_net_hdr *hdr, bool little_endian, bool has_data_valid, int vlan_hlen) { memset(hdr, 0, sizeof(*hdr)); /* no info leak */ if (skb_is_gso(skb)) { struct skb_shared_info *sinfo = skb_shinfo(skb); /* This is a hint as to how much should be linear. */ hdr->hdr_len = __cpu_to_virtio16(little_endian, skb_headlen(skb)); hdr->gso_size = __cpu_to_virtio16(little_endian, sinfo->gso_size); if (sinfo->gso_type & SKB_GSO_TCPV4) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; else if (sinfo->gso_type & SKB_GSO_TCPV6) hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; else if (sinfo->gso_type & SKB_GSO_UDP_L4) hdr->gso_type = VIRTIO_NET_HDR_GSO_UDP_L4; else return -EINVAL; if (sinfo->gso_type & SKB_GSO_TCP_ECN) hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } else hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; if (skb->ip_summed == CHECKSUM_PARTIAL) { hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; hdr->csum_start = __cpu_to_virtio16(little_endian, skb_checksum_start_offset(skb) + vlan_hlen); hdr->csum_offset = __cpu_to_virtio16(little_endian, skb->csum_offset); } else if (has_data_valid && skb->ip_summed == CHECKSUM_UNNECESSARY) { hdr->flags = VIRTIO_NET_HDR_F_DATA_VALID; } /* else everything is zero */ return 0; } #endif /* _LINUX_VIRTIO_NET_H */
3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains AppArmor security identifier (secid) manipulation fns * * Copyright 2009-2017 Canonical Ltd. * * AppArmor allocates a unique secid for every label used. If a label * is replaced it receives the secid of the label it is replacing. */ #include <linux/errno.h> #include <linux/err.h> #include <linux/gfp.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/xarray.h> #include "include/cred.h" #include "include/lib.h" #include "include/secid.h" #include "include/label.h" #include "include/policy_ns.h" /* * secids - do not pin labels with a refcount. They rely on the label * properly updating/freeing them */ #define AA_FIRST_SECID 2 static DEFINE_XARRAY_FLAGS(aa_secids, XA_FLAGS_LOCK_IRQ | XA_FLAGS_TRACK_FREE); int apparmor_display_secid_mode; /* * TODO: allow policy to reserve a secid range? * TODO: add secid pinning * TODO: use secid_update in label replace */ /* * see label for inverse aa_label_to_secid */ struct aa_label *aa_secid_to_label(u32 secid) { return xa_load(&aa_secids, secid); } static int apparmor_label_to_secctx(struct aa_label *label, struct lsm_context *cp) { /* TODO: cache secctx and ref count so we don't have to recreate */ int flags = FLAG_VIEW_SUBNS | FLAG_HIDDEN_UNCONFINED | FLAG_ABS_ROOT; int len; if (!label) return -EINVAL; if (apparmor_display_secid_mode) flags |= FLAG_SHOW_MODE; if (cp) len = aa_label_asxprint(&cp->context, root_ns, label, flags, GFP_ATOMIC); else len = aa_label_snxprint(NULL, 0, root_ns, label, flags); if (len < 0) return -ENOMEM; if (cp) { cp->len = len; cp->id = LSM_ID_APPARMOR; } return len; } int apparmor_secid_to_secctx(u32 secid, struct lsm_context *cp) { struct aa_label *label = aa_secid_to_label(secid); return apparmor_label_to_secctx(label, cp); } int apparmor_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp) { struct aa_label *label; label = prop->apparmor.label; return apparmor_label_to_secctx(label, cp); } int apparmor_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid) { struct aa_label *label; label = aa_label_strn_parse(&root_ns->unconfined->label, secdata, seclen, GFP_KERNEL, false, false); if (IS_ERR(label)) return PTR_ERR(label); *secid = label->secid; return 0; } void apparmor_release_secctx(struct lsm_context *cp) { if (cp->id == LSM_ID_APPARMOR) { kfree(cp->context); cp->context = NULL; cp->id = LSM_ID_UNDEF; } } /** * aa_alloc_secid - allocate a new secid for a profile * @label: the label to allocate a secid for * @gfp: memory allocation flags * * Returns: 0 with @label->secid initialized * <0 returns error with @label->secid set to AA_SECID_INVALID */ int aa_alloc_secid(struct aa_label *label, gfp_t gfp) { unsigned long flags; int ret; xa_lock_irqsave(&aa_secids, flags); ret = __xa_alloc(&aa_secids, &label->secid, label, XA_LIMIT(AA_FIRST_SECID, INT_MAX), gfp); xa_unlock_irqrestore(&aa_secids, flags); if (ret < 0) { label->secid = AA_SECID_INVALID; return ret; } return 0; } /** * aa_free_secid - free a secid * @secid: secid to free */ void aa_free_secid(u32 secid) { unsigned long flags; xa_lock_irqsave(&aa_secids, flags); __xa_erase(&aa_secids, secid); xa_unlock_irqrestore(&aa_secids, flags); }
314 314 314 1 1 236 139 79 47 43 114 22 291 292 291 161 224 224 291 291 290 316 88 270 316 315 316 315 315 315 315 302 4 85 85 85 77 10 70 18 18 77 6 9 1 85 85 85 85 85 85 85 85 84 23 23 23 23 23 88 86 88 88 88 45 45 44 45 45 45 45 45 45 43 40 43 43 43 43 35 43 88 87 88 88 88 88 28 60 60 45 15 53 45 45 41 45 41 15 35 9 34 35 9 121 121 465 163 443 440 446 446 443 2 377 154 378 248 163 393 391 392 392 388 2 24 392 51 113 297 146 333 21 121 121 121 121 121 29 29 66 262 269 270 243 29 29 29 29 29 29 270 270 268 287 41 269 287 287 3 1 282 283 5 234 49 16 16 263 275 2 278 267 66 55 265 265 265 18 254 100 222 262 261 4 259 171 258 248 16 218 99 29 19 19 2 30 30 28 30 26 191 105 16 257 258 17 41 223 225 242 294 268 263 28 245 1 228 17 244 60 244 244 226 9 13 243 243 243 244 244 138 128 19 110 18 127 244 243 242 224 18 244 188 188 1 187 51 51 14 48 48 52 52 4 48 3 45 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 output functions * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/net/ipv4/ip_output.c * * Changes: * A.N.Kuznetsov : airthmetics in fragmentation. * extension headers are implemented. * route changes now work. * ip6_forward does not confuse sniffers. * etc. * * H. von Brand : Added missing #include <linux/string.h> * Imran Patel : frag id should be in NBO * Kazunori MIYAZAWA @USAGI * : add ip6_append_data and related functions * for datagram xmit */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/route.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/bpf-cgroup.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/sock.h> #include <net/snmp.h> #include <net/gso.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/rawv6.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/checksum.h> #include <linux/mroute6.h> #include <net/l3mdev.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { /* Make sure idev stays alive */ rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return -ENOMEM; } rcu_read_unlock(); } hdr = ipv6_hdr(skb); daddr = &hdr->daddr; if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing is not supported in any case. */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; } } if (lwtunnel_xmit_redirect(dst->lwtstate)) { int res = lwtunnel_xmit(skb); if (res != LWTUNNEL_XMIT_CONTINUE) return res; } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (IS_ERR_OR_NULL(neigh)) { if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; } } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); rcu_read_unlock(); return ret; } static int ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { struct sk_buff *segs, *nskb; netdev_features_t features; int ret = 0; /* Please see corresponding comment in ip_finish_output_gso * describing the cases where GSO segment length exceeds the * egress MTU. */ features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } consume_skb(skb); skb_list_walk_safe(segs, segs, nskb) { int err; skb_mark_not_on_list(segs); /* Last GSO segment can be smaller than gso_size (and MTU). * Adding a fragment header would produce an "atomic fragment", * which is considered harmful (RFC-8021). Avoid that. */ err = segs->len > mtu ? ip6_fragment(net, sk, segs, ip6_finish_output2) : ip6_finish_output2(net, sk, segs); if (err && ret == 0) ret = err; } return ret; } static int ip6_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && !skb_gso_validate_network_len(skb, mtu)) return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); return ip6_finish_output2(net, sk, skb); } static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IP6CB(skb)->flags |= IP6SKB_REROUTED; return dst_output(net, sk, skb); } #endif mtu = ip6_skb_dst_mtu(skb); if (skb_is_gso(skb)) return ip6_finish_output_gso(net, sk, skb, mtu); if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { int ret; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, indev, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } EXPORT_SYMBOL(ip6_output); bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); return inet6_test_bit(AUTOFLOWLABEL, sk); } /* * xmit an sk_buff (used by TCP, SCTP and DCCP) * Note : socket lock is not held for SYNACK packets, but might be modified * by calls to skb_set_owner_w() and ipv6_local_error(), * which are using proper atomic operations or spinlocks. */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) { struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { /* Make sure idev stays alive */ rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return -ENOBUFS; } rcu_read_unlock(); } if (opt) { seg_len += opt->opt_nflen + opt->opt_flen; if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, &fl6->saddr); } if (unlikely(seg_len > IPV6_MAXPLEN)) { hop_jumbo = skb_push(skb, hoplen); hop_jumbo->nexthdr = proto; hop_jumbo->hdrlen = 0; hop_jumbo->tlv_type = IPV6_TLV_JUMBO; hop_jumbo->tlv_len = 4; hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); proto = IPPROTO_HOPOPTS; seg_len = 0; IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); /* * Fill in the IPv6 header */ if (np) hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; hdr->saddr = fl6->saddr; hdr->daddr = *first_hop; skb->protocol = htons(ETH_P_IPV6); skb->priority = priority; skb->mark = mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out((struct sock *)sk, skb); if (unlikely(!skb)) return 0; /* hooks should never assume socket lock is held. * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, (struct sock *)sk, skb, NULL, dev, dst_output); } skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } EXPORT_SYMBOL(ip6_xmit); static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; struct sock *last = NULL; read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) rawv6_rcv(last, skb2); } last = sk; } } if (last) { rawv6_rcv(last, skb); read_unlock(&ip6_ra_lock); return 1; } read_unlock(&ip6_ra_lock); return 0; } static int ip6_forward_proxy_check(struct sk_buff *skb) { struct ipv6hdr *hdr = ipv6_hdr(skb); u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; if (ipv6_ext_hdr(nexthdr)) { offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); if (offset < 0) return 0; } else offset = sizeof(struct ipv6hdr); if (nexthdr == IPPROTO_ICMPV6) { struct icmp6hdr *icmp6; if (!pskb_may_pull(skb, (skb_network_header(skb) + offset + 1 - skb->data))) return 0; icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); switch (icmp6->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: /* For reaction involving unicast neighbor discovery * message destined to the proxied address, pass it to * input function. */ return 1; default: break; } } /* * The proxying router can't forward traffic sent to a link-local * address, so signal the sender and discard the packet. This * behavior is clarified by the MIPv6 specification. */ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { dst_link_failure(skb); return -1; } return 0; } static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); return 0; } #endif skb_clear_tstamp(skb); return dst_output(net, sk, skb); } static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { if (skb->len <= mtu) return false; /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; if (skb->ignore_df) return false; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; } int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); struct inet6_dev *idev; SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); if (READ_ONCE(net->ipv6.devconf_all->forwarding) == 0) goto error; if (skb->pkt_type != PACKET_HOST) goto drop; if (unlikely(skb->sk)) goto drop; if (skb_warn_if_lro(skb)) goto drop; if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } skb_forward_csum(skb); /* * We DO NOT make any processing on * RA packets, pushing them to user level AS IS * without ane WARRANTY that application will be able * to interpret them. The reason is that we * cannot make anything clever here. * * We are not end-node, so that if packet contains * AH/ESP, we cannot make anything. * Defragmentation also would be mistake, RA packets * cannot be fragmented, because there is no warranty * that different fragments will go along one path. --ANK */ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { if (ip6_call_ra_chain(skb, ntohs(opt->ra))) return 0; } /* * check and decrement ttl */ if (hdr->hop_limit <= 1) { icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return -ETIMEDOUT; } /* XXX: idev->cnf.proxy_ndp? */ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the * function too. * * But that would be incorrect, as proxying is * not forwarding. The ip6_input function * will handle this packet locally, and it * depends on the hop limit being unchanged. * * One example is the NDP hop limit, that * always has to stay 255, but other would be * similar checks around RA packets, where the * user can even change the desired limit. */ return ip6_input(skb); } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); SKB_DR_SET(reason, XFRM_POLICY); goto drop; } dst = skb_dst(skb); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ if (IP6CB(skb)->iif == dst->dev->ifindex && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct inet_peer *peer; struct rt6_info *rt; /* * incoming and outgoing devices are the same * send a redirect. */ rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) target = &rt->rt6i_gateway; else target = &hdr->daddr; rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); rcu_read_unlock(); } else { int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ if (addrtype == IPV6_ADDR_ANY || addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOT_NEIGHBOUR, 0); goto error; } } __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } hdr = ipv6_hdr(skb); /* Mangling hops number delayed to point after skb COW */ hdr->hop_limit--; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dst->dev, ip6_forward_finish); error: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); SKB_DR_SET(reason, IP_INADDRERRORS); drop: kfree_skb_reason(skb, reason); return -EINVAL; } static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; skb_dst_drop(to); skb_dst_set(to, dst_clone(skb_dst(from))); to->dev = from->dev; to->mark = from->mark; skb_copy_hash(to, from); #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif nf_copy(to, from); skb_ext_copy(to, from); skb_copy_secmark(to, from); } int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_fraglist_iter *iter) { unsigned int first_len; struct frag_hdr *fh; /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!iter->tmp_hdr) return -ENOMEM; iter->frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); iter->offset = 0; iter->hlen = hlen; iter->frag_id = frag_id; iter->nexthdr = nexthdr; __skb_pull(skb, hlen); fh = __skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); return 0; } EXPORT_SYMBOL(ip6_fraglist_init); void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter) { struct sk_buff *frag = iter->frag; unsigned int hlen = iter->hlen; struct frag_hdr *fh; frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); fh = __skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); iter->offset += skb->len - hlen - sizeof(struct frag_hdr); fh->nexthdr = iter->nexthdr; fh->reserved = 0; fh->frag_off = htons(iter->offset); if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = iter->frag_id; ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ip6_copy_metadata(frag, skb); } EXPORT_SYMBOL(ip6_fraglist_prepare); void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) { state->prevhdr = prevhdr; state->nexthdr = nexthdr; state->frag_id = frag_id; state->hlen = hlen; state->mtu = mtu; state->left = skb->len - hlen; /* Space per frame */ state->ptr = hlen; /* Where to start from */ state->hroom = hdr_room; state->troom = needed_tailroom; state->offset = 0; } EXPORT_SYMBOL(ip6_frag_init); struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) { u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; struct sk_buff *frag; struct frag_hdr *fh; unsigned int len; len = state->left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > state->mtu) len = state->mtu; /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < state->left) len &= ~7; /* Allocate buffer */ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + state->hroom + state->troom, GFP_ATOMIC); if (!frag) return ERR_PTR(-ENOMEM); /* * Set up data on packet */ ip6_copy_metadata(frag, skb); skb_reserve(frag, state->hroom); skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); frag->transport_header = (frag->network_header + state->hlen + sizeof(struct frag_hdr)); /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(frag, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); fragnexthdr_offset = skb_network_header(frag); fragnexthdr_offset += prevhdr - skb_network_header(skb); *fragnexthdr_offset = NEXTHDR_FRAGMENT; /* * Build fragment header. */ fh->nexthdr = state->nexthdr; fh->reserved = 0; fh->identification = state->frag_id; /* * Copy a block of the IP datagram. */ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), len)); state->left -= len; fh->frag_off = htons(state->offset); if (state->left > 0) fh->frag_off |= htons(IP6_MF); ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); state->ptr += len; state->offset += len; return frag; } EXPORT_SYMBOL(ip6_frag_next); int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; int hroom, err = 0; __be32 frag_id; u8 *prevhdr, nexthdr = 0; err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto fail; hlen = err; nexthdr = *prevhdr; nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ if (unlikely(!skb->ignore_df && skb->len > mtu)) goto fail_toobig; if (IP6CB(skb)->frag_max_size) { if (IP6CB(skb)->frag_max_size > mtu) goto fail_toobig; /* don't send fragments larger than what we received */ mtu = IP6CB(skb)->frag_max_size; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; } if (np) { u32 frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto fail; prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); struct ip6_fraglist_iter iter; struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || skb_cloned(skb) || skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } skb->truesize -= frag->truesize; } err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, &iter); if (err < 0) goto fail; /* We prevent @rt from being freed. */ rcu_read_lock(); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) ip6_fraglist_prepare(skb, &iter); skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !iter.frag) break; skb = ip6_fraglist_next(&iter); } kfree(iter.tmp_hdr); if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); rcu_read_unlock(); return 0; } kfree_skb_list(iter.frag); IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); rcu_read_unlock(); return err; slow_path_clean: skb_walk_frags(skb, frag2) { if (frag2 == frag) break; frag2->sk = NULL; frag2->destructor = NULL; skb->truesize += frag2->truesize; } } slow_path: /* * Fragment the datagram. */ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, &state); /* * Keep copying data until we run out. */ while (state.left > 0) { frag = ip6_frag_next(skb, &state); if (IS_ERR(frag)) { err = PTR_ERR(frag); goto fail; } /* * Put this fragment into the sending queue. */ skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); consume_skb(skb); return err; fail_toobig: icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; } static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *fl_addr, const struct in6_addr *addr_cache) { return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, const struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt; if (!dst) goto out; if (dst->ops->family != AF_INET6) { dst_release(dst); return NULL; } rt = dst_rt6_info(dst); /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, * and MSG_DONTROUTE --ANK (980726) * * 1. ip6_rt_check(): If route was host route, * check that cached destination is current. * If it is network route, we still may * check its validity using saved pointer * to the last used address: daddr_cache. * We do not want to save whole address now, * (because main consumer of this service * is tcp, which has not this problem), * so that the last trick works only on connected * sockets. * 2. oif also should be the same. */ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } out: return dst; } static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; struct rt6_info *rt; #endif int err; int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the * ip6_route_output call _before_ ip6_route_get_saddr. * * In source specific routing (no src=any default route), * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ if (ipv6_addr_any(&fl6->saddr)) { struct fib6_info *from; struct rt6_info *rt; *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); if (err) goto out_err_release; /* If we had an erroneous initial result, pretend it * never existed and let the SA-enabled version take * over. */ if ((*dst)->error) { dst_release(*dst); *dst = NULL; } if (fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) goto out_err_release; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * Here if the dst entry we've looked up * has a neighbour entry that is in the INCOMPLETE * state and the src address from the flow is * marked as OPTIMISTIC, we release the found * dst entry and replace it instead with the * dst entry of the nexthop router */ rt = dst_rt6_info(*dst); rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock(); if (err) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); if (ifp) in6_ifa_put(ifp); if (redirect) { /* * We need to get the dst entry for the * default router instead */ dst_release(*dst); memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); *dst = ip6_route_output(net, sk, &fl_gw6); err = (*dst)->error; if (err) goto out_err_release; } } #endif if (ipv6_addr_v4mapped(&fl6->saddr) && !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { err = -EAFNOSUPPORT; goto out_err_release; } return 0; out_err_release: dst_release(*dst); *dst = NULL; if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; } /** * ip6_dst_lookup - perform route lookup on flow * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @dst: pointer to dst_entry * for result * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; return ip6_dst_lookup_tail(net, sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** * ip6_dst_lookup_flow - perform route lookup on flow with ipsec * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * * This function performs a route lookup on the given flow. * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; err = ip6_dst_lookup_tail(net, sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); /** * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @connected: whether @sk is connected or not * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * * In addition, for a connected socket, cache the dst in the socket * if the current cache is not valid. * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); dst = ip6_sk_dst_check(sk, dst, fl6); if (dst) return dst; dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); if (connected && !IS_ERR(dst)) ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, gfp_t gfp) { return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (!skb) { /* first fragment, reserve header_len */ *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); } } static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so * ip6_cork_release() can put it down even in case of an error. */ cork->base.dst = &rt->dst; /* * setup for corking */ if (opt) { if (WARN_ON(v6_cork->opt)) return -EINVAL; nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); if (unlikely(!nopt)) return -ENOBUFS; nopt->tot_len = sizeof(*opt); nopt->opt_flen = opt->opt_flen; nopt->opt_nflen = opt->opt_nflen; nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !nopt->dst0opt) return -ENOBUFS; nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !nopt->dst1opt) return -ENOBUFS; nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !nopt->hopopt) return -ENOBUFS; nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !nopt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) mtu = frag_size; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; cork->base.priority = ipc6->sockc.priority; sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; } cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; } static int __ip6_append_data(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork_full, struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, unsigned int flags, struct ipcm6_cookie *ipc6) { struct sk_buff *skb, *skb_prev = NULL; struct inet_cork *cork = &cork_full->base; struct flowi6 *fl6 = &cork_full->fl.u.ip6; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; struct ubuf_info *uarg = NULL; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; bool zc = false; u32 tskey = 0; struct rt6_info *rt = dst_rt6_info(cork->dst); bool paged, hold_tskey = false, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; skb = skb_peek_tail(queue); if (!skb) { exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } paged = !!cork->gso_size; mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) goto emsgsize; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ if (headersize + transhdrlen > mtu) goto emsgsize; if (cork->length + length > mtu - headersize && ipc6->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu - headersize + sizeof(struct ipv6hdr)); goto emsgsize; } if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; if (cork->length + length > maxnonfragsize - headersize) { emsgsize: pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); return -EMSGSIZE; } /* CHECKSUM_PARTIAL only with no extension headers and when * we are not going to fragment */ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && length <= mtu - headersize && (!(flags & MSG_MORE) || cork->gso_size) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if ((flags & MSG_ZEROCOPY) && length) { struct msghdr *msg = from; if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) return -EINVAL; /* Leave uarg NULL if can't zerocopy, callers should * be able to handle it. */ if ((rt->dst.dev->features & NETIF_F_SG) && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; uarg = msg->msg_ubuf; } } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb)); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; } else { uarg_to_msgzc(uarg)->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); } } } else if ((flags & MSG_SPLICE_PAGES) && length) { if (inet_test_bit(HDRINCL, sk)) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && getfrag == ip_generic_getfrag) /* We need an empty buffer to attach stuff to */ paged = true; else flags &= ~MSG_SPLICE_PAGES; } if (cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { if (cork->flags & IPCORK_TS_OPT_ID) { tskey = cork->ts_opt_id; } else { tskey = atomic_inc_return(&sk->sk_tskey) - 1; hold_tskey = true; } } /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ cork->length += length; if (!skb) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen, alloc_extra; unsigned int pagedlen; alloc_new_skb: /* There's no room in the current skb */ if (skb) fraggap = skb->len - maxfraglen; else fraggap = 0; /* update mtu and maxfraglen if necessary */ if (!skb || !skb_prev) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, orig_mtu); skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; alloc_extra = hh_len; alloc_extra += dst_exthdrlen; alloc_extra += rt->dst.trailer_len; /* We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloc_extra += sizeof(struct frag_hdr); if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else if (!paged && (fraglen + alloc_extra < SKB_MAX_ALLOC || !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = fragheaderlen + transhdrlen; pagedlen = datalen - transhdrlen; } alloclen += alloc_extra; if (datalen != length + fraggap) { /* * this is not the last fragment, the trailer * space is regarded as data space. */ datalen += rt->dst.trailer_len; } fraglen = datalen + fragheaderlen; copy = datalen - transhdrlen - fraggap - pagedlen; /* [!] NOTE: copy may be negative if pagedlen>0 * because then the equation may reduces to -fraggap. */ if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { err = -EINVAL; goto error; } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; } if (!skb) goto error; /* * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); /* * Find where to start putting bytes */ data = skb_put(skb, fraglen - pagedlen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } if (copy > 0 && INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } else if (flags & MSG_SPLICE_PAGES) { copy = 0; } offset += copy; length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; /* Only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; cork->tx_flags = 0; skb_shinfo(skb)->tskey = tskey; tskey = 0; skb_zcopy_set(skb, uarg, &extra_uref); if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); /* * Put the packet on the pending queue */ if (!skb->destructor) { skb->destructor = sock_wfree; skb->sk = sk; wmem_alloc_delta += skb->truesize; } __skb_queue_tail(queue, skb); continue; } if (copy > length) copy = length; if (!(rt->dst.dev->features&NETIF_F_SG) && skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else if (flags & MSG_SPLICE_PAGES) { struct msghdr *msg = from; err = -EIO; if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; err = skb_splice_from_iter(skb, &msg->msg_iter, copy, sk->sk_allocation); if (err < 0) goto error; copy = err; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) goto error; skb_zcopy_downgrade_managed(skb); if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { err = -EMSGSIZE; if (i == MAX_SKB_FRAGS) goto error; __skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, 0); skb_shinfo(skb)->nr_frags = ++i; get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); if (err < 0) goto error; } offset += copy; length -= copy; } if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); return 0; error_efault: err = -EFAULT; error: net_zcopy_put_abort(uarg, extra_uref); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); if (hold_tskey) atomic_dec(&sk->sk_tskey); return err; } int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); int exthdrlen; int err; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ dst_hold(&rt->dst); err = ip6_setup_cork(sk, &inet->cork, &np->cork, ipc6, rt); if (err) return err; inet->cork.fl.u.ip6 = *fl6; exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { transhdrlen = 0; } return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags, ipc6); } EXPORT_SYMBOL_GPL(ip6_append_data); static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) { struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; skb_dst_set(skb, dst); } static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { if (v6_cork->opt) { struct ipv6_txoptions *opt = v6_cork->opt; kfree(opt->dst0opt); kfree(opt->dst1opt); kfree(opt->hopopt); kfree(opt->srcrt); kfree(opt); v6_cork->opt = NULL; } if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; } } struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; skb = __skb_dequeue(queue); if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } /* Allow local fragmentation. */ skb->ignore_df = ip6_sk_ignore_df(sk); __skb_pull(skb, skb_network_header_len(skb)); final_dst = &fl6->daddr; if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; skb->priority = cork->base.priority; skb->mark = cork->base.mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); else skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; if (sk->sk_socket->type == SOCK_RAW && !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) icmp6_type = fl6->fl6_icmp_type; else icmp6_type = icmp6_hdr(skb)->icmp6_type; ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } ip6_cork_release(cork, v6_cork); out: return skb; } int ip6_send_skb(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); } rcu_read_unlock(); return err; } int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; skb = ip6_finish_skb(sk); if (!skb) return 0; return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); static void __ip6_flush_pending_frames(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork) { struct sk_buff *skb; while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } ip6_cork_release(cork, v6_cork); } void ip6_flush_pending_frames(struct sock *sk) { __ip6_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork) { struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; if (flags & MSG_PROBE) { dst_release(&rt->dst); return NULL; } __skb_queue_head_init(&queue); cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); if (err) { ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_test_bit(DONTFRAG, sk); err = __ip6_append_data(sk, &queue, cork, &v6_cork, &current->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags, ipc6); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); } return __ip6_make_skb(sk, &queue, cork, &v6_cork); }
268 268 268 230 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ #ifndef _LINUX_RSEQ_H #define _LINUX_RSEQ_H #ifdef CONFIG_RSEQ #include <linux/preempt.h> #include <linux/sched.h> /* * Map the event mask on the user-space ABI enum rseq_cs_flags * for direct mask checks. */ enum rseq_event_mask_bits { RSEQ_EVENT_PREEMPT_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, RSEQ_EVENT_SIGNAL_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, RSEQ_EVENT_MIGRATE_BIT = RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, }; enum rseq_event_mask { RSEQ_EVENT_PREEMPT = (1U << RSEQ_EVENT_PREEMPT_BIT), RSEQ_EVENT_SIGNAL = (1U << RSEQ_EVENT_SIGNAL_BIT), RSEQ_EVENT_MIGRATE = (1U << RSEQ_EVENT_MIGRATE_BIT), }; static inline void rseq_set_notify_resume(struct task_struct *t) { if (t->rseq) set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { if (current->rseq) __rseq_handle_notify_resume(ksig, regs); } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { preempt_disable(); __set_bit(RSEQ_EVENT_SIGNAL_BIT, &current->rseq_event_mask); preempt_enable(); rseq_handle_notify_resume(ksig, regs); } /* rseq_preempt() requires preemption to be disabled. */ static inline void rseq_preempt(struct task_struct *t) { __set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); rseq_set_notify_resume(t); } /* rseq_migrate() requires preemption to be disabled. */ static inline void rseq_migrate(struct task_struct *t) { __set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); rseq_set_notify_resume(t); } /* * If parent process has a registered restartable sequences area, the * child inherits. Unregister rseq for a clone with CLONE_VM set. */ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { if (clone_flags & CLONE_VM) { t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } else { t->rseq = current->rseq; t->rseq_len = current->rseq_len; t->rseq_sig = current->rseq_sig; t->rseq_event_mask = current->rseq_event_mask; } } static inline void rseq_execve(struct task_struct *t) { t->rseq = NULL; t->rseq_len = 0; t->rseq_sig = 0; t->rseq_event_mask = 0; } #else static inline void rseq_set_notify_resume(struct task_struct *t) { } static inline void rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { } static inline void rseq_preempt(struct task_struct *t) { } static inline void rseq_migrate(struct task_struct *t) { } static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) { } static inline void rseq_execve(struct task_struct *t) { } #endif #ifdef CONFIG_DEBUG_RSEQ void rseq_syscall(struct pt_regs *regs); #else static inline void rseq_syscall(struct pt_regs *regs) { } #endif #endif /* _LINUX_RSEQ_H */
75 9 2 2 6 9 9 9 4 6 6 6 4 4 4 2 2 2 2 2 2 2 2 2 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 // SPDX-License-Identifier: GPL-2.0-only /* * Generic show_mem() implementation * * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de> */ #include <linux/blkdev.h> #include <linux/cma.h> #include <linux/cpuset.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/swap.h> #include <linux/vmstat.h> #include "internal.h" #include "swap.h" atomic_long_t _totalram_pages __read_mostly; EXPORT_SYMBOL(_totalram_pages); unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; static inline void show_node(struct zone *zone) { if (IS_ENABLED(CONFIG_NUMA)) printk("Node %d ", zone_to_nid(zone)); } long si_mem_available(void) { long available; unsigned long pagecache; unsigned long wmark_low = 0; unsigned long reclaimable; struct zone *zone; for_each_zone(zone) wmark_low += low_wmark_pages(zone); /* * Estimate the amount of memory available for userspace allocations, * without causing swapping or OOM. */ available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages; /* * Not all the page cache can be freed, otherwise the system will * start swapping or thrashing. Assume at least half of the page * cache, or the low watermark worth of cache, needs to stay. */ pagecache = global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE); pagecache -= min(pagecache / 2, wmark_low); available += pagecache; /* * Part of the reclaimable slab and other kernel memory consists of * items that are in use, and cannot be freed. Cap this estimate at the * low watermark. */ reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); reclaimable -= min(reclaimable / 2, wmark_low); available += reclaimable; if (available < 0) available = 0; return available; } EXPORT_SYMBOL_GPL(si_mem_available); void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages(); val->sharedram = global_node_page_state(NR_SHMEM); val->freeram = global_zone_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); val->totalhigh = totalhigh_pages(); val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE; } EXPORT_SYMBOL(si_meminfo); #ifdef CONFIG_NUMA void si_meminfo_node(struct sysinfo *val, int nid) { int zone_type; /* needs to be signed */ unsigned long managed_pages = 0; unsigned long managed_highpages = 0; unsigned long free_highpages = 0; pg_data_t *pgdat = NODE_DATA(nid); for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); val->totalram = managed_pages; val->sharedram = node_page_state(pgdat, NR_SHMEM); val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); #ifdef CONFIG_HIGHMEM for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (is_highmem(zone)) { managed_highpages += zone_managed_pages(zone); free_highpages += zone_page_state(zone, NR_FREE_PAGES); } } val->totalhigh = managed_highpages; val->freehigh = free_highpages; #else val->totalhigh = managed_highpages; val->freehigh = free_highpages; #endif val->mem_unit = PAGE_SIZE; } #endif /* * Determine whether the node should be displayed or not, depending on whether * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). */ static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask) { if (!(flags & SHOW_MEM_FILTER_NODES)) return false; /* * no node mask - aka implicit memory numa policy. Do not bother with * the synchronization - read_mems_allowed_begin - because we do not * have to be precise here. */ if (!nodemask) nodemask = &cpuset_current_mems_allowed; return !node_isset(nid, *nodemask); } static void show_migration_types(unsigned char type) { static const char types[MIGRATE_TYPES] = { [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_MOVABLE] = 'M', [MIGRATE_RECLAIMABLE] = 'E', [MIGRATE_HIGHATOMIC] = 'H', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif #ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = 'I', #endif }; char tmp[MIGRATE_TYPES + 1]; char *p = tmp; int i; for (i = 0; i < MIGRATE_TYPES; i++) { if (type & (1 << i)) *p++ = types[i]; } *p = '\0'; printk(KERN_CONT "(%s) ", tmp); } static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx) { int zone_idx; for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++) if (zone_managed_pages(pgdat->node_zones + zone_idx)) return true; return false; } /* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. * * Bits in @filter: * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long free_pcp = 0; int cpu, nid; struct zone *zone; pg_data_t *pgdat; for_each_populated_zone(zone) { if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; } printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu dirty:%lu writeback:%lu\n" " slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu\n" " sec_pagetables:%lu bounce:%lu\n" " kernel_misc_reclaimable:%lu\n" " free:%lu free_pcp:%lu free_cma:%lu\n", global_node_page_state(NR_ACTIVE_ANON), global_node_page_state(NR_INACTIVE_ANON), global_node_page_state(NR_ISOLATED_ANON), global_node_page_state(NR_ACTIVE_FILE), global_node_page_state(NR_INACTIVE_FILE), global_node_page_state(NR_ISOLATED_FILE), global_node_page_state(NR_UNEVICTABLE), global_node_page_state(NR_FILE_DIRTY), global_node_page_state(NR_WRITEBACK), global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B), global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), global_node_page_state(NR_PAGETABLE), global_node_page_state(NR_SECONDARY_PAGETABLE), global_zone_page_state(NR_BOUNCE), global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE), global_zone_page_state(NR_FREE_PAGES), free_pcp, global_zone_page_state(NR_FREE_CMA_PAGES)); for_each_online_pgdat(pgdat) { if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) continue; if (!node_has_managed_zones(pgdat, max_zone_idx)) continue; printk("Node %d" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " isolated(anon):%lukB" " isolated(file):%lukB" " mapped:%lukB" " dirty:%lukB" " writeback:%lukB" " shmem:%lukB" #ifdef CONFIG_TRANSPARENT_HUGEPAGE " shmem_thp:%lukB" " shmem_pmdmapped:%lukB" " anon_thp:%lukB" #endif " writeback_tmp:%lukB" " kernel_stack:%lukB" #ifdef CONFIG_SHADOW_CALL_STACK " shadow_call_stack:%lukB" #endif " pagetables:%lukB" " sec_pagetables:%lukB" " all_unreclaimable? %s" "\n", pgdat->node_id, K(node_page_state(pgdat, NR_ACTIVE_ANON)), K(node_page_state(pgdat, NR_INACTIVE_ANON)), K(node_page_state(pgdat, NR_ACTIVE_FILE)), K(node_page_state(pgdat, NR_INACTIVE_FILE)), K(node_page_state(pgdat, NR_UNEVICTABLE)), K(node_page_state(pgdat, NR_ISOLATED_ANON)), K(node_page_state(pgdat, NR_ISOLATED_FILE)), K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_WRITEBACK)), K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS)), K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), K(node_page_state(pgdat, NR_ANON_THPS)), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif K(node_page_state(pgdat, NR_PAGETABLE)), K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), str_yes_no(pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)); } for_each_populated_zone(zone) { int i; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; free_pcp = 0; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; show_node(zone); printk(KERN_CONT "%s" " free:%lukB" " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" " reserved_highatomic:%luKB" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " writepending:%lukB" " present:%lukB" " managed:%lukB" " mlocked:%lukB" " bounce:%lukB" " free_pcp:%lukB" " local_pcp:%ukB" " free_cma:%lukB" "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), K(zone->nr_reserved_highatomic), K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), K(this_cpu_read(zone->per_cpu_pageset->count)), K(zone_page_state(zone, NR_FREE_CMA_PAGES))); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); printk(KERN_CONT "\n"); } for_each_populated_zone(zone) { unsigned int order; unsigned long nr[NR_PAGE_ORDERS], flags, total = 0; unsigned char types[NR_PAGE_ORDERS]; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; show_node(zone); printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &zone->free_area[order]; int type; nr[order] = area->nr_free; total += nr[order] << order; types[order] = 0; for (type = 0; type < MIGRATE_TYPES; type++) { if (!free_area_empty(area, type)) types[order] |= 1 << type; } } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { printk(KERN_CONT "%lu*%lukB ", nr[order], K(1UL) << order); if (nr[order]) show_migration_types(types[order]); } printk(KERN_CONT "= %lukB\n", K(total)); } for_each_online_node(nid) { if (show_mem_node_skip(filter, nid, nodemask)) continue; hugetlb_show_meminfo_node(nid); } printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES)); show_swap_cache_info(); } void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long total = 0, reserved = 0, highmem = 0; struct zone *zone; printk("Mem-Info:\n"); show_free_areas(filter, nodemask, max_zone_idx); for_each_populated_zone(zone) { total += zone->present_pages; reserved += zone->present_pages - zone_managed_pages(zone); if (is_highmem(zone)) highmem += zone->present_pages; } printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); printk("%lu pages reserved\n", reserved); #ifdef CONFIG_CMA printk("%lu pages cma reserved\n", totalcma_pages); #endif #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif #ifdef CONFIG_MEM_ALLOC_PROFILING { struct codetag_bytes tags[10]; size_t i, nr; nr = alloc_tag_top_users(tags, ARRAY_SIZE(tags), false); if (nr) { pr_notice("Memory allocations:\n"); for (i = 0; i < nr; i++) { struct codetag *ct = tags[i].ct; struct alloc_tag *tag = ct_to_alloc_tag(ct); struct alloc_tag_counters counter = alloc_tag_read(tag); char bytes[10]; string_get_size(counter.bytes, 1, STRING_UNITS_2, bytes, sizeof(bytes)); /* Same as alloc_tag_to_text() but w/o intermediate buffer */ if (ct->modname) pr_notice("%12s %8llu %s:%u [%s] func:%s\n", bytes, counter.calls, ct->filename, ct->lineno, ct->modname, ct->function); else pr_notice("%12s %8llu %s:%u func:%s\n", bytes, counter.calls, ct->filename, ct->lineno, ct->function); } } } #endif }
1 3 6 8 8 6 1 1 7 6 1 6 6 7 8 8 8 2 1 1 1 1 4 4 2 4 2 2 9 7 1 1 6 1 1 2 4 2 3 6 7 1 6 6 19 19 1 2 7 7 2 4 3 13 1 5 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/act_api.c Packet action API. * * Author: Jamal Hadi Salim */ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/err.h> #include <linux/module.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> #include <net/pkt_cls.h> #include <net/tc_act/tc_pedit.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/flow_offload.h> #include <net/tc_wrapper.h> #ifdef CONFIG_INET DEFINE_STATIC_KEY_FALSE(tcf_frag_xmit_count); EXPORT_SYMBOL_GPL(tcf_frag_xmit_count); #endif int tcf_dev_queue_xmit(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb)) { #ifdef CONFIG_INET if (static_branch_unlikely(&tcf_frag_xmit_count)) return sch_frag_xmit_hook(skb, xmit); #endif return xmit(skb); } EXPORT_SYMBOL_GPL(tcf_dev_queue_xmit); static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } static void tcf_free_cookie_rcu(struct rcu_head *p) { struct tc_cookie *cookie = container_of(p, struct tc_cookie, rcu); kfree(cookie->data); kfree(cookie); } static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, struct tc_cookie *new_cookie) { struct tc_cookie *old; old = unrcu_pointer(xchg(old_cookie, RCU_INITIALIZER(new_cookie))); if (old) call_rcu(&old->rcu, tcf_free_cookie_rcu); } int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, struct tcf_chain **newchain, struct netlink_ext_ack *extack) { int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; u32 chain_index; if (!opcode) ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) ret = 0; if (ret) { NL_SET_ERR_MSG(extack, "invalid control action"); goto end; } if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { chain_index = action & TC_ACT_EXT_VAL_MASK; if (!tp || !newchain) { ret = -EINVAL; NL_SET_ERR_MSG(extack, "can't goto NULL proto/chain"); goto end; } *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); if (!*newchain) { ret = -ENOMEM; NL_SET_ERR_MSG(extack, "can't allocate goto_chain"); } } end: return ret; } EXPORT_SYMBOL(tcf_action_check_ctrlact); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, struct tcf_chain *goto_chain) { a->tcfa_action = action; goto_chain = rcu_replace_pointer(a->goto_chain, goto_chain, 1); return goto_chain; } EXPORT_SYMBOL(tcf_action_set_ctrlact); /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected * from filters. Readers later can not find us. */ static void free_tcf(struct tc_action *p) { struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->user_cookie, NULL); if (chain) tcf_chain_put_by_act(chain); kfree(p); } static void offload_action_hw_count_set(struct tc_action *act, u32 hw_count) { act->in_hw_count = hw_count; } static void offload_action_hw_count_inc(struct tc_action *act, u32 hw_count) { act->in_hw_count += hw_count; } static void offload_action_hw_count_dec(struct tc_action *act, u32 hw_count) { act->in_hw_count = act->in_hw_count > hw_count ? act->in_hw_count - hw_count : 0; } static unsigned int tcf_offload_act_num_actions_single(struct tc_action *act) { if (is_tcf_pedit(act)) return tcf_pedit_nkeys(act); else return 1; } static bool tc_act_skip_hw(u32 flags) { return (flags & TCA_ACT_FLAGS_SKIP_HW) ? true : false; } static bool tc_act_skip_sw(u32 flags) { return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false; } /* SKIP_HW and SKIP_SW are mutually exclusive flags. */ static bool tc_act_flags_valid(u32 flags) { flags &= TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW; return flags ^ (TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW); } static int offload_action_init(struct flow_offload_action *fl_action, struct tc_action *act, enum offload_act_command cmd, struct netlink_ext_ack *extack) { int err; fl_action->extack = extack; fl_action->command = cmd; fl_action->index = act->tcfa_index; fl_action->cookie = (unsigned long)act; if (act->ops->offload_act_setup) { spin_lock_bh(&act->tcfa_lock); err = act->ops->offload_act_setup(act, fl_action, NULL, false, extack); spin_unlock_bh(&act->tcfa_lock); return err; } return -EOPNOTSUPP; } static int tcf_action_offload_cmd_ex(struct flow_offload_action *fl_act, u32 *hw_count) { int err; err = flow_indr_dev_setup_offload(NULL, NULL, TC_SETUP_ACT, fl_act, NULL, NULL); if (err < 0) return err; if (hw_count) *hw_count = err; return 0; } static int tcf_action_offload_cmd_cb_ex(struct flow_offload_action *fl_act, u32 *hw_count, flow_indr_block_bind_cb_t *cb, void *cb_priv) { int err; err = cb(NULL, NULL, cb_priv, TC_SETUP_ACT, NULL, fl_act, NULL); if (err < 0) return err; if (hw_count) *hw_count = 1; return 0; } static int tcf_action_offload_cmd(struct flow_offload_action *fl_act, u32 *hw_count, flow_indr_block_bind_cb_t *cb, void *cb_priv) { return cb ? tcf_action_offload_cmd_cb_ex(fl_act, hw_count, cb, cb_priv) : tcf_action_offload_cmd_ex(fl_act, hw_count); } static int tcf_action_offload_add_ex(struct tc_action *action, struct netlink_ext_ack *extack, flow_indr_block_bind_cb_t *cb, void *cb_priv) { bool skip_sw = tc_act_skip_sw(action->tcfa_flags); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct flow_offload_action *fl_action; u32 in_hw_count = 0; int num, err = 0; if (tc_act_skip_hw(action->tcfa_flags)) return 0; num = tcf_offload_act_num_actions_single(action); fl_action = offload_action_alloc(num); if (!fl_action) return -ENOMEM; err = offload_action_init(fl_action, action, FLOW_ACT_REPLACE, extack); if (err) goto fl_err; err = tc_setup_action(&fl_action->action, actions, 0, extack); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup tc actions for offload"); goto fl_err; } err = tcf_action_offload_cmd(fl_action, &in_hw_count, cb, cb_priv); if (!err) cb ? offload_action_hw_count_inc(action, in_hw_count) : offload_action_hw_count_set(action, in_hw_count); if (skip_sw && !tc_act_in_hw(action)) err = -EINVAL; tc_cleanup_offload_action(&fl_action->action); fl_err: kfree(fl_action); return err; } /* offload the tc action after it is inserted */ static int tcf_action_offload_add(struct tc_action *action, struct netlink_ext_ack *extack) { return tcf_action_offload_add_ex(action, extack, NULL, NULL); } int tcf_action_update_hw_stats(struct tc_action *action) { struct flow_offload_action fl_act = {}; int err; err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL); if (err) return err; err = tcf_action_offload_cmd(&fl_act, NULL, NULL, NULL); if (!err) { preempt_disable(); tcf_action_stats_update(action, fl_act.stats.bytes, fl_act.stats.pkts, fl_act.stats.drops, fl_act.stats.lastused, true); preempt_enable(); action->used_hw_stats = fl_act.stats.used_hw_stats; action->used_hw_stats_valid = true; } else { return -EOPNOTSUPP; } return 0; } EXPORT_SYMBOL(tcf_action_update_hw_stats); static int tcf_action_offload_del_ex(struct tc_action *action, flow_indr_block_bind_cb_t *cb, void *cb_priv) { struct flow_offload_action fl_act = {}; u32 in_hw_count = 0; int err = 0; if (!tc_act_in_hw(action)) return 0; err = offload_action_init(&fl_act, action, FLOW_ACT_DESTROY, NULL); if (err) return err; err = tcf_action_offload_cmd(&fl_act, &in_hw_count, cb, cb_priv); if (err < 0) return err; if (!cb && action->in_hw_count != in_hw_count) return -EINVAL; /* do not need to update hw state when deleting action */ if (cb && in_hw_count) offload_action_hw_count_dec(action, in_hw_count); return 0; } static int tcf_action_offload_del(struct tc_action *action) { return tcf_action_offload_del_ex(action, NULL, NULL); } static void tcf_action_cleanup(struct tc_action *p) { tcf_action_offload_del(p); if (p->ops->cleanup) p->ops->cleanup(p); gen_kill_estimator(&p->tcfa_rate_est); free_tcf(p); } static int __tcf_action_put(struct tc_action *p, bool bind) { struct tcf_idrinfo *idrinfo = p->idrinfo; if (refcount_dec_and_mutex_lock(&p->tcfa_refcnt, &idrinfo->lock)) { if (bind) atomic_dec(&p->tcfa_bindcnt); idr_remove(&idrinfo->action_idr, p->tcfa_index); mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); return 1; } if (bind) atomic_dec(&p->tcfa_bindcnt); return 0; } static int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; /* Release with strict==1 and bind==0 is only called through act API * interface (classifiers always bind). Only case when action with * positive reference count and zero bind count can exist is when it was * also created with act API (unbinding last classifier will destroy the * action if it was created by classifier). So only case when bind count * can be changed after initial check is when unbound action is * destroyed by act API while classifier binds to action with same id * concurrently. This result either creation of new action(same behavior * as before), or reusing existing action if concurrent process * increments reference count before action is deleted. Both scenarios * are acceptable. */ if (p) { if (!bind && strict && atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; if (__tcf_action_put(p, bind)) ret = ACT_P_DELETED; } return ret; } int tcf_idr_release(struct tc_action *a, bool bind) { const struct tc_action_ops *ops = a->ops; int ret; ret = __tcf_idr_release(a, bind, false); if (ret == ACT_P_DELETED) module_put(ops->owner); return ret; } EXPORT_SYMBOL(tcf_idr_release); static size_t tcf_action_shared_attrs_size(const struct tc_action *act) { struct tc_cookie *user_cookie; u32 cookie_len = 0; rcu_read_lock(); user_cookie = rcu_dereference(act->user_cookie); if (user_cookie) cookie_len = nla_total_size(user_cookie->len); rcu_read_unlock(); return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ + nla_total_size_64bit(sizeof(struct gnet_stats_basic)) /* TCA_STATS_PKT64 */ + nla_total_size_64bit(sizeof(u64)) /* TCA_STATS_QUEUE */ + nla_total_size_64bit(sizeof(struct gnet_stats_queue)) + nla_total_size(0) /* TCA_ACT_OPTIONS nested */ + nla_total_size(sizeof(struct tcf_t)); /* TCA_GACT_TM */ } static size_t tcf_action_full_attrs_size(size_t sz) { return NLMSG_HDRLEN /* struct nlmsghdr */ + sizeof(struct tcamsg) + nla_total_size(0) /* TCA_ACT_TAB nested */ + sz; } static size_t tcf_action_fill_size(const struct tc_action *act) { size_t sz = tcf_action_shared_attrs_size(act); if (act->ops->get_fill_size) return act->ops->get_fill_size(act) + sz; return sz; } static int tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act) { unsigned char *b = skb_tail_pointer(skb); struct tc_cookie *cookie; if (nla_put_string(skb, TCA_ACT_KIND, a->ops->kind)) goto nla_put_failure; if (tcf_action_copy_stats(skb, a, 0)) goto nla_put_failure; if (from_act && nla_put_u32(skb, TCA_ACT_INDEX, a->tcfa_index)) goto nla_put_failure; rcu_read_lock(); cookie = rcu_dereference(a->user_cookie); if (cookie) { if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) { rcu_read_unlock(); goto nla_put_failure; } } rcu_read_unlock(); return 0; nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; int err = -EINVAL; u32 flags; if (tcf_action_dump_terse(skb, a, false)) goto nla_put_failure; if (a->hw_stats != TCA_ACT_HW_STATS_ANY && nla_put_bitfield32(skb, TCA_ACT_HW_STATS, a->hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; if (a->used_hw_stats_valid && nla_put_bitfield32(skb, TCA_ACT_USED_HW_STATS, a->used_hw_stats, TCA_ACT_HW_STATS_ANY)) goto nla_put_failure; flags = a->tcfa_flags & TCA_ACT_FLAGS_USER_MASK; if (flags && nla_put_bitfield32(skb, TCA_ACT_FLAGS, flags, flags)) goto nla_put_failure; if (nla_put_u32(skb, TCA_ACT_IN_HW_COUNT, a->in_hw_count)) goto nla_put_failure; nest = nla_nest_start_noflag(skb, TCA_ACT_OPTIONS); if (nest == NULL) goto nla_put_failure; err = tcf_action_dump_old(skb, a, bind, ref); if (err > 0) { nla_nest_end(skb, nest); return err; } nla_put_failure: nlmsg_trim(skb, b); return -1; } static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { int err = 0, index = -1, s_i = 0, n_i = 0; u32 act_flags = cb->args[2]; unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; struct idr *idr = &idrinfo->action_idr; struct tc_action *p; unsigned long id = 1; unsigned long tmp; mutex_lock(&idrinfo->lock); s_i = cb->args[0]; idr_for_each_entry_ul(idr, p, tmp, id) { index++; if (index < s_i) continue; if (IS_ERR(p)) continue; if (jiffy_since && time_after(jiffy_since, (unsigned long)p->tcfa_tm.lastuse)) continue; tcf_action_update_hw_stats(p); nest = nla_nest_start_noflag(skb, n_i); if (!nest) { index--; goto nla_put_failure; } err = (act_flags & TCA_ACT_FLAG_TERSE_DUMP) ? tcf_action_dump_terse(skb, p, true) : tcf_action_dump_1(skb, p, 0, 0); if (err < 0) { index--; nlmsg_trim(skb, nest); goto done; } nla_nest_end(skb, nest); n_i++; if (!(act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) && n_i >= TCA_ACT_MAX_PRIO) goto done; } done: if (index >= 0) cb->args[0] = index + 1; mutex_unlock(&idrinfo->lock); if (n_i) { if (act_flags & TCA_ACT_FLAG_LARGE_DUMP_ON) cb->args[1] = n_i; } return n_i; nla_put_failure: nla_nest_cancel(skb, nest); goto done; } static int tcf_idr_release_unsafe(struct tc_action *p) { if (atomic_read(&p->tcfa_bindcnt) > 0) return -EPERM; if (refcount_dec_and_test(&p->tcfa_refcnt)) { idr_remove(&p->idrinfo->action_idr, p->tcfa_index); tcf_action_cleanup(p); return ACT_P_DELETED; } return 0; } static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct nlattr *nest; int n_i = 0; int ret = -EINVAL; struct idr *idr = &idrinfo->action_idr; struct tc_action *p; unsigned long id = 1; unsigned long tmp; nest = nla_nest_start_noflag(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_ACT_KIND, ops->kind)) goto nla_put_failure; ret = 0; mutex_lock(&idrinfo->lock); idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p)) continue; ret = tcf_idr_release_unsafe(p); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) break; n_i++; } mutex_unlock(&idrinfo->lock); if (ret < 0) { if (n_i) NL_SET_ERR_MSG(extack, "Unable to flush all TC actions"); else goto nla_put_failure; } ret = nla_put_u32(skb, TCA_FCNT, n_i); if (ret) goto nla_put_failure; nla_nest_end(skb, nest); return n_i; nla_put_failure: nla_nest_cancel(skb, nest); return ret; } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { return tcf_del_walker(idrinfo, skb, ops, extack); } else if (type == RTM_GETACTION) { return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown command %d\n", type); NL_SET_ERR_MSG(extack, "tcf_generic_walker: unknown command"); return -EINVAL; } } EXPORT_SYMBOL(tcf_generic_walker); int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (IS_ERR(p)) p = NULL; else if (p) refcount_inc(&p->tcfa_refcnt); mutex_unlock(&idrinfo->lock); if (p) { *a = p; return true; } return false; } EXPORT_SYMBOL(tcf_idr_search); static int __tcf_generic_walker(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ops->net_id); if (unlikely(ops->walk)) return ops->walk(net, skb, cb, type, ops, extack); return tcf_generic_walker(tn, skb, cb, type, ops, extack); } static int __tcf_idr_search(struct net *net, const struct tc_action_ops *ops, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ops->net_id); if (unlikely(ops->lookup)) return ops->lookup(net, a, index); return tcf_idr_search(tn, a, index); } static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; int ret = 0; mutex_lock(&idrinfo->lock); p = idr_find(&idrinfo->action_idr, index); if (!p) { mutex_unlock(&idrinfo->lock); return -ENOENT; } if (!atomic_read(&p->tcfa_bindcnt)) { if (refcount_dec_and_test(&p->tcfa_refcnt)) { struct module *owner = p->ops->owner; WARN_ON(p != idr_remove(&idrinfo->action_idr, p->tcfa_index)); mutex_unlock(&idrinfo->lock); tcf_action_cleanup(p); module_put(owner); return 0; } ret = 0; } else { ret = -EPERM; } mutex_unlock(&idrinfo->lock); return ret; } int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, bool cpustats, u32 flags) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); struct tcf_idrinfo *idrinfo = tn->idrinfo; int err = -ENOMEM; if (unlikely(!p)) return -ENOMEM; refcount_set(&p->tcfa_refcnt, 1); if (bind) atomic_set(&p->tcfa_bindcnt, 1); if (cpustats) { p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats) goto err1; p->cpu_bstats_hw = netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync); if (!p->cpu_bstats_hw) goto err2; p->cpu_qstats = alloc_percpu(struct gnet_stats_queue); if (!p->cpu_qstats) goto err3; } gnet_stats_basic_sync_init(&p->tcfa_bstats); gnet_stats_basic_sync_init(&p->tcfa_bstats_hw); spin_lock_init(&p->tcfa_lock); p->tcfa_index = index; p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; p->tcfa_flags = flags; if (est) { err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats, &p->tcfa_rate_est, &p->tcfa_lock, false, est); if (err) goto err4; } p->idrinfo = idrinfo; __module_get(ops->owner); p->ops = ops; *a = p; return 0; err4: free_percpu(p->cpu_qstats); err3: free_percpu(p->cpu_bstats_hw); err2: free_percpu(p->cpu_bstats); err1: kfree(p); return err; } EXPORT_SYMBOL(tcf_idr_create); int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags) { /* Set cpustats according to actions flags. */ return tcf_idr_create(tn, index, est, a, ops, bind, !(flags & TCA_ACT_FLAGS_NO_PERCPU_STATS), flags); } EXPORT_SYMBOL(tcf_idr_create_from_flags); /* Cleanup idr index that was allocated but not initialized. */ void tcf_idr_cleanup(struct tc_action_net *tn, u32 index) { struct tcf_idrinfo *idrinfo = tn->idrinfo; mutex_lock(&idrinfo->lock); /* Remove ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ WARN_ON(!IS_ERR(idr_remove(&idrinfo->action_idr, index))); mutex_unlock(&idrinfo->lock); } EXPORT_SYMBOL(tcf_idr_cleanup); /* Check if action with specified index exists. If actions is found, increments * its reference and bind counters, and return 1. Otherwise insert temporary * error pointer (to prevent concurrent users from inserting actions with same * index) and return 0. * * May return -EAGAIN for binding actions in case of a parallel add/delete on * the requested index. */ int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind) { struct tcf_idrinfo *idrinfo = tn->idrinfo; struct tc_action *p; int ret; u32 max; if (*index) { rcu_read_lock(); p = idr_find(&idrinfo->action_idr, *index); if (IS_ERR(p)) { /* This means that another process allocated * index but did not assign the pointer yet. */ rcu_read_unlock(); return -EAGAIN; } if (!p) { /* Empty slot, try to allocate it */ max = *index; rcu_read_unlock(); goto new; } if (!refcount_inc_not_zero(&p->tcfa_refcnt)) { /* Action was deleted in parallel */ rcu_read_unlock(); return -EAGAIN; } if (bind) atomic_inc(&p->tcfa_bindcnt); *a = p; rcu_read_unlock(); return 1; } else { /* Find a slot */ *index = 1; max = UINT_MAX; } new: *a = NULL; mutex_lock(&idrinfo->lock); ret = idr_alloc_u32(&idrinfo->action_idr, ERR_PTR(-EBUSY), index, max, GFP_KERNEL); mutex_unlock(&idrinfo->lock); /* N binds raced for action allocation, * retry for all the ones that failed. */ if (ret == -ENOSPC && *index == max) ret = -EAGAIN; return ret; } EXPORT_SYMBOL(tcf_idr_check_alloc); void tcf_idrinfo_destroy(const struct tc_action_ops *ops, struct tcf_idrinfo *idrinfo) { struct idr *idr = &idrinfo->action_idr; struct tc_action *p; int ret; unsigned long id = 1; unsigned long tmp; idr_for_each_entry_ul(idr, p, tmp, id) { ret = __tcf_idr_release(p, false, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return; } idr_destroy(&idrinfo->action_idr); } EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); /* since act ops id is stored in pernet subsystem list, * then there is no way to walk through only all the action * subsystem, so we keep tc action pernet ops id for * reoffload to walk through. */ static LIST_HEAD(act_pernet_id_list); static DEFINE_MUTEX(act_id_mutex); struct tc_act_pernet_id { struct list_head list; unsigned int id; }; static int tcf_pernet_add_id_list(unsigned int id) { struct tc_act_pernet_id *id_ptr; int ret = 0; mutex_lock(&act_id_mutex); list_for_each_entry(id_ptr, &act_pernet_id_list, list) { if (id_ptr->id == id) { ret = -EEXIST; goto err_out; } } id_ptr = kzalloc(sizeof(*id_ptr), GFP_KERNEL); if (!id_ptr) { ret = -ENOMEM; goto err_out; } id_ptr->id = id; list_add_tail(&id_ptr->list, &act_pernet_id_list); err_out: mutex_unlock(&act_id_mutex); return ret; } static void tcf_pernet_del_id_list(unsigned int id) { struct tc_act_pernet_id *id_ptr; mutex_lock(&act_id_mutex); list_for_each_entry(id_ptr, &act_pernet_id_list, list) { if (id_ptr->id == id) { list_del(&id_ptr->list); kfree(id_ptr); break; } } mutex_unlock(&act_id_mutex); } int tcf_register_action(struct tc_action_ops *act, struct pernet_operations *ops) { struct tc_action_ops *a; int ret; if (!act->act || !act->dump || !act->init) return -EINVAL; /* We have to register pernet ops before making the action ops visible, * otherwise tcf_action_init_1() could get a partially initialized * netns. */ ret = register_pernet_subsys(ops); if (ret) return ret; if (ops->id) { ret = tcf_pernet_add_id_list(*ops->id); if (ret) goto err_id; } write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) { ret = -EEXIST; goto err_out; } } list_add_tail(&act->head, &act_base); write_unlock(&act_mod_lock); return 0; err_out: write_unlock(&act_mod_lock); if (ops->id) tcf_pernet_del_id_list(*ops->id); err_id: unregister_pernet_subsys(ops); return ret; } EXPORT_SYMBOL(tcf_register_action); int tcf_unregister_action(struct tc_action_ops *act, struct pernet_operations *ops) { struct tc_action_ops *a; int err = -ENOENT; write_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (a == act) { list_del(&act->head); err = 0; break; } } write_unlock(&act_mod_lock); if (!err) { unregister_pernet_subsys(ops); if (ops->id) tcf_pernet_del_id_list(*ops->id); } return err; } EXPORT_SYMBOL(tcf_unregister_action); /* lookup by name */ static struct tc_action_ops *tc_lookup_action_n(char *kind) { struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (strcmp(kind, a->kind) == 0) { if (try_module_get(a->owner)) res = a; break; } } read_unlock(&act_mod_lock); } return res; } /* lookup by nlattr */ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) { struct tc_action_ops *a, *res = NULL; if (kind) { read_lock(&act_mod_lock); list_for_each_entry(a, &act_base, head) { if (nla_strcmp(kind, a->kind) == 0) { if (try_module_get(a->owner)) res = a; break; } } read_unlock(&act_mod_lock); } return res; } /*TCA_ACT_MAX_PRIO is 32, there count up to 32 */ #define TCA_ACT_MAX_PRIO_MASK 0x1FF int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { u32 jmp_prgcnt = 0; u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ int i; int ret = TC_ACT_OK; if (skb_skip_tc_classify(skb)) return TC_ACT_OK; restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; continue; } if (tc_act_skip_sw(a->tcfa_flags)) continue; repeat_ttl = 32; repeat: ret = tc_act(skb, a, res); if (unlikely(ret == TC_ACT_REPEAT)) { if (--repeat_ttl != 0) goto repeat; /* suspicious opcode, stop pipeline */ net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); return TC_ACT_OK; } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; if (!jmp_prgcnt || (jmp_prgcnt > nr_actions)) { /* faulty opcode, stop pipeline */ return TC_ACT_OK; } else { jmp_ttl -= 1; if (jmp_ttl > 0) goto restart_act_graph; else /* faulty graph, stop pipeline */ return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { if (unlikely(!rcu_access_pointer(a->goto_chain))) { tcf_set_drop_reason(skb, SKB_DROP_REASON_TC_CHAIN_NOTFOUND); return TC_ACT_SHOT; } tcf_action_goto_chain_exec(a, res); } if (ret != TC_ACT_PIPE) break; } return ret; } EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct tc_action *actions[], int bind) { const struct tc_action_ops *ops; struct tc_action *a; int ret = 0, i; tcf_act_for_each_action(i, a, actions) { actions[i] = NULL; ops = a->ops; ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) module_put(ops->owner); else if (ret < 0) return ret; } return ret; } static int tcf_action_put(struct tc_action *p) { return __tcf_action_put(p, false); } static void tcf_action_put_many(struct tc_action *actions[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; if (tcf_action_put(a)) module_put(ops->owner); } } static void tca_put_bound_many(struct tc_action *actions[], int init_res[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; if (init_res[i] == ACT_P_CREATED) continue; if (tcf_action_put(a)) module_put(ops->owner); } } int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { return a->ops->dump(skb, a, bind, ref); } int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse) { struct tc_action *a; int err = -EINVAL, i; struct nlattr *nest; tcf_act_for_each_action(i, a, actions) { nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure; err = terse ? tcf_action_dump_terse(skb, a, false) : tcf_action_dump_1(skb, a, bind, ref); if (err < 0) goto errout; nla_nest_end(skb, nest); } return 0; nla_put_failure: err = -EINVAL; errout: nla_nest_cancel(skb, nest); return err; } static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) { struct tc_cookie *c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) return NULL; c->data = nla_memdup(tb[TCA_ACT_COOKIE], GFP_KERNEL); if (!c->data) { kfree(c); return NULL; } c->len = nla_len(tb[TCA_ACT_COOKIE]); return c; } static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr) { struct nla_bitfield32 hw_stats_bf; /* If the user did not pass the attr, that means he does * not care about the type. Return "any" in that case * which is setting on all supported types. */ if (!hw_stats_attr) return TCA_ACT_HW_STATS_ANY; hw_stats_bf = nla_get_bitfield32(hw_stats_attr); return hw_stats_bf.value; } static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, [TCA_ACT_COOKIE] = { .type = NLA_BINARY, .len = TC_COOKIE_MAX_SIZE }, [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, [TCA_ACT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS | TCA_ACT_FLAGS_SKIP_HW | TCA_ACT_FLAGS_SKIP_SW), [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; void tcf_idr_insert_many(struct tc_action *actions[], int init_res[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { struct tcf_idrinfo *idrinfo; if (init_res[i] == ACT_P_BOUND) continue; idrinfo = a->idrinfo; mutex_lock(&idrinfo->lock); /* Replace ERR_PTR(-EBUSY) allocated by tcf_idr_check_alloc */ idr_replace(&idrinfo->action_idr, a, a->tcfa_index); mutex_unlock(&idrinfo->lock); } } struct tc_action_ops *tc_action_load_ops(struct nlattr *nla, u32 flags, struct netlink_ext_ack *extack) { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; char act_name[IFNAMSIZ]; struct nlattr *kind; int err; if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) return ERR_PTR(err); err = -EINVAL; kind = tb[TCA_ACT_KIND]; if (!kind) { NL_SET_ERR_MSG(extack, "TC action kind must be specified"); return ERR_PTR(err); } if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(err); } } else { if (strscpy(act_name, "police", IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); return ERR_PTR(-EINVAL); } } a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { #ifdef CONFIG_MODULES bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL); if (rtnl_held) rtnl_unlock(); request_module(NET_ACT_ALIAS_PREFIX "%s", act_name); if (rtnl_held) rtnl_lock(); a_o = tc_lookup_action_n(act_name); /* We dropped the RTNL semaphore in order to * perform the module load. So, even if we * succeeded in loading the module we have to * tell the caller to replay the request. We * indicate this using -EAGAIN. */ if (a_o != NULL) { module_put(a_o->owner); return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "Failed to load TC action module"); return ERR_PTR(-ENOENT); } return a_o; } struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action_ops *a_o, int *init_res, u32 flags, struct netlink_ext_ack *extack) { bool police = flags & TCA_ACT_FLAGS_POLICE; struct nla_bitfield32 userflags = { 0, 0 }; struct tc_cookie *user_cookie = NULL; u8 hw_stats = TCA_ACT_HW_STATS_ANY; struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; int err; /* backward compatibility for policer */ if (!police) { err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) return ERR_PTR(err); if (tb[TCA_ACT_COOKIE]) { user_cookie = nla_memdup_cookie(tb); if (!user_cookie) { NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); err = -ENOMEM; goto err_out; } } hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); if (tb[TCA_ACT_FLAGS]) { userflags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); if (!tc_act_flags_valid(userflags.value)) { err = -EINVAL; goto err_out; } } err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, tp, userflags.value | flags, extack); } else { err = a_o->init(net, nla, est, &a, tp, userflags.value | flags, extack); } if (err < 0) goto err_out; *init_res = err; if (!police && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->user_cookie, user_cookie); if (!police) a->hw_stats = hw_stats; return a; err_out: if (user_cookie) { kfree(user_cookie->data); kfree(user_cookie); } return ERR_PTR(err); } static bool tc_act_bind(u32 flags) { return !!(flags & TCA_ACT_FLAGS_BIND); } /* Returns numbers of initialized actions or negative error. */ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, struct tc_action *actions[], int init_res[], size_t *attr_size, u32 flags, u32 fl_flags, struct netlink_ext_ack *extack) { struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t sz = 0; int err; int i; err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (err < 0) return err; for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { struct tc_action_ops *a_o; a_o = tc_action_load_ops(tb[i], flags, extack); if (IS_ERR(a_o)) { err = PTR_ERR(a_o); goto err_mod; } ops[i - 1] = a_o; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, ops[i - 1], &init_res[i - 1], flags, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; } sz += tcf_action_fill_size(act); /* Start from index 0 */ actions[i - 1] = act; if (tc_act_bind(flags)) { bool skip_sw = tc_skip_sw(fl_flags); bool skip_hw = tc_skip_hw(fl_flags); if (tc_act_bind(act->tcfa_flags)) { /* Action is created by classifier and is not * standalone. Check that the user did not set * any action flags different than the * classifier flags, and inherit the flags from * the classifier for the compatibility case * where no flags were specified at all. */ if ((tc_act_skip_sw(act->tcfa_flags) && !skip_sw) || (tc_act_skip_hw(act->tcfa_flags) && !skip_hw)) { NL_SET_ERR_MSG(extack, "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } if (skip_sw) act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_SW; if (skip_hw) act->tcfa_flags |= TCA_ACT_FLAGS_SKIP_HW; continue; } /* Action is standalone */ if (skip_sw != tc_act_skip_sw(act->tcfa_flags) || skip_hw != tc_act_skip_hw(act->tcfa_flags)) { NL_SET_ERR_MSG(extack, "Mismatch between action and filter offload flags"); err = -EINVAL; goto err; } } else { err = tcf_action_offload_add(act, extack); if (tc_act_skip_sw(act->tcfa_flags) && err) goto err; } } /* We have to commit them all together, because if any error happened in * between, we could not handle the failure gracefully. */ tcf_idr_insert_many(actions, init_res); *attr_size = tcf_action_full_attrs_size(sz); err = i - 1; goto err_mod; err: tcf_action_destroy(actions, flags & TCA_ACT_FLAGS_BIND); err_mod: for (i = 0; i < TCA_ACT_MAX_PRIO && ops[i]; i++) module_put(ops[i]->owner); return err; } void tcf_action_update_stats(struct tc_action *a, u64 bytes, u64 packets, u64 drops, bool hw) { if (a->cpu_bstats) { _bstats_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); this_cpu_ptr(a->cpu_qstats)->drops += drops; if (hw) _bstats_update(this_cpu_ptr(a->cpu_bstats_hw), bytes, packets); return; } _bstats_update(&a->tcfa_bstats, bytes, packets); a->tcfa_qstats.drops += drops; if (hw) _bstats_update(&a->tcfa_bstats_hw, bytes, packets); } EXPORT_SYMBOL(tcf_action_update_stats); int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p, int compat_mode) { int err = 0; struct gnet_dump d; if (p == NULL) goto errout; /* compat_mode being true specifies a call that is supposed * to add additional backward compatibility statistic TLVs. */ if (compat_mode) { if (p->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, TCA_STATS, TCA_XSTATS, &p->tcfa_lock, &d, TCA_PAD); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, &p->tcfa_lock, &d, TCA_ACT_PAD); if (err < 0) goto errout; if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfa_bstats, false) < 0 || gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw, &p->tcfa_bstats_hw, false) < 0 || gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 || gnet_stats_copy_queue(&d, p->cpu_qstats, &p->tcfa_qstats, p->tcfa_qstats.qlen) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) goto errout; return 0; errout: return -1; } static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[], u32 portid, u32 seq, u16 flags, int event, int bind, int ref, struct netlink_ext_ack *extack) { struct tcamsg *t; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; if (extack && extack->_msg && nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg)) goto out_nlmsg_trim; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) goto out_nlmsg_trim; if (tcf_action_dump(skb, actions, bind, ref, false) < 0) goto out_nlmsg_trim; nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; return skb->len; out_nlmsg_trim: nlmsg_trim(skb, b); return -1; } static int tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *actions[], int event, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event, 0, 1, NULL) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return -EINVAL; } return rtnl_unicast(skb, net, portid); } static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; struct tc_action *a; int index; int err; err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) goto err_out; err = -EINVAL; if (tb[TCA_ACT_INDEX] == NULL || nla_len(tb[TCA_ACT_INDEX]) < sizeof(index)) { NL_SET_ERR_MSG(extack, "Invalid TC action index value"); goto err_out; } index = nla_get_u32(tb[TCA_ACT_INDEX]); err = -EINVAL; ops = tc_lookup_action(tb[TCA_ACT_KIND]); if (!ops) { /* could happen in batch of actions */ NL_SET_ERR_MSG(extack, "Specified TC action kind not found"); goto err_out; } err = -ENOENT; if (__tcf_idr_search(net, ops, &a, index) == 0) { NL_SET_ERR_MSG(extack, "TC action with specified index not found"); goto err_mod; } module_put(ops->owner); return a; err_mod: module_put(ops->owner); err_out: return ERR_PTR(err); } static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, struct netlink_ext_ack *extack) { struct sk_buff *skb; unsigned char *b; struct nlmsghdr *nlh; struct tcamsg *t; struct netlink_callback dcb; struct nlattr *nest; struct nlattr *tb[TCA_ACT_MAX + 1]; const struct tc_action_ops *ops; struct nlattr *kind; int err = -ENOMEM; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return err; b = skb_tail_pointer(skb); err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) goto err_out; err = -EINVAL; kind = tb[TCA_ACT_KIND]; ops = tc_lookup_action(kind); if (!ops) { /*some idjot trying to flush unknown action */ NL_SET_ERR_MSG(extack, "Cannot flush unknown TC action"); goto err_out; } nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) { NL_SET_ERR_MSG(extack, "Failed to create TC action flush notification"); goto out_module_put; } t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (!nest) { NL_SET_ERR_MSG(extack, "Failed to add new netlink message"); goto out_module_put; } err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack); if (err <= 0) { nla_nest_cancel(skb, nest); goto out_module_put; } nla_nest_end(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; nlh->nlmsg_flags |= NLM_F_ROOT; module_put(ops->owner); err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send TC action flush notification"); return err; out_module_put: module_put(ops->owner); err_out: kfree_skb(skb); return err; } static int tcf_action_delete(struct net *net, struct tc_action *actions[]) { struct tc_action *a; int i; tcf_act_for_each_action(i, a, actions) { const struct tc_action_ops *ops = a->ops; /* Actions can be deleted concurrently so we must save their * type and id to search again after reference is released. */ struct tcf_idrinfo *idrinfo = a->idrinfo; u32 act_index = a->tcfa_index; actions[i] = NULL; if (tcf_action_put(a)) { /* last reference, action was deleted concurrently */ module_put(ops->owner); } else { int ret; /* now do the delete */ ret = tcf_idr_delete_index(idrinfo, act_index); if (ret < 0) return ret; } } return 0; } static struct sk_buff *tcf_reoffload_del_notify_msg(struct net *net, struct tc_action *action) { size_t attr_size = tcf_action_fill_size(action); struct tc_action *actions[TCA_ACT_MAX_PRIO] = { [0] = action, }; struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_reoffload_del_notify(struct net *net, struct tc_action *action) { const struct tc_action_ops *ops = action->ops; struct sk_buff *skb; int ret; if (!rtnl_notify_needed(net, 0, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_reoffload_del_notify_msg(net, action); if (IS_ERR(skb)) return PTR_ERR(skb); } ret = tcf_idr_release_unsafe(action); if (ret == ACT_P_DELETED) { module_put(ops->owner); ret = rtnetlink_maybe_send(skb, net, 0, RTNLGRP_TC, 0); } else { kfree_skb(skb); } return ret; } int tcf_action_reoffload_cb(flow_indr_block_bind_cb_t *cb, void *cb_priv, bool add) { struct tc_act_pernet_id *id_ptr; struct tcf_idrinfo *idrinfo; struct tc_action_net *tn; struct tc_action *p; unsigned int act_id; unsigned long tmp; unsigned long id; struct idr *idr; struct net *net; int ret; if (!cb) return -EINVAL; down_read(&net_rwsem); mutex_lock(&act_id_mutex); for_each_net(net) { list_for_each_entry(id_ptr, &act_pernet_id_list, list) { act_id = id_ptr->id; tn = net_generic(net, act_id); if (!tn) continue; idrinfo = tn->idrinfo; if (!idrinfo) continue; mutex_lock(&idrinfo->lock); idr = &idrinfo->action_idr; idr_for_each_entry_ul(idr, p, tmp, id) { if (IS_ERR(p) || tc_act_bind(p->tcfa_flags)) continue; if (add) { tcf_action_offload_add_ex(p, NULL, cb, cb_priv); continue; } /* cb unregister to update hw count */ ret = tcf_action_offload_del_ex(p, cb, cb_priv); if (ret < 0) continue; if (tc_act_skip_sw(p->tcfa_flags) && !tc_act_in_hw(p)) tcf_reoffload_del_notify(net, p); } mutex_unlock(&idrinfo->lock); } } mutex_unlock(&act_id_mutex); up_read(&net_rwsem); return 0; } static struct sk_buff *tcf_del_notify_msg(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION, 0, 2, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes"); kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; int ret; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_del_notify_msg(net, n, actions, portid, attr_size, extack); if (IS_ERR(skb)) return PTR_ERR(skb); } /* now do the delete */ ret = tcf_action_delete(net, actions); if (ret < 0) { NL_SET_ERR_MSG(extack, "Failed to delete TC action"); kfree_skb(skb); return ret; } return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, int event, struct netlink_ext_ack *extack) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t attr_size = 0; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; ret = nla_parse_nested_deprecated(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack); if (ret < 0) return ret; if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1]) return tca_action_flush(net, tb[1], n, portid, extack); NL_SET_ERR_MSG(extack, "Invalid netlink attributes while flushing TC action"); return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_get_1(net, tb[i], n, portid, extack); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; } attr_size += tcf_action_fill_size(act); actions[i - 1] = act; } attr_size = tcf_action_full_attrs_size(attr_size); if (event == RTM_GETACTION) ret = tcf_get_notify(net, portid, n, actions, event, extack); else { /* delete */ ret = tcf_del_notify(net, n, actions, portid, attr_size, extack); if (ret) goto err; return 0; } err: tcf_action_put_many(actions); return ret; } static struct sk_buff *tcf_add_notify_msg(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; skb = alloc_skb(max(attr_size, NLMSG_GOODSIZE), GFP_KERNEL); if (!skb) return ERR_PTR(-ENOBUFS); if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags, RTM_NEWACTION, 0, 0, extack) <= 0) { NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action"); kfree_skb(skb); return ERR_PTR(-EINVAL); } return skb; } static int tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[], u32 portid, size_t attr_size, struct netlink_ext_ack *extack) { struct sk_buff *skb; if (!rtnl_notify_needed(net, n->nlmsg_flags, RTNLGRP_TC)) { skb = NULL; } else { skb = tcf_add_notify_msg(net, n, actions, portid, attr_size, extack); if (IS_ERR(skb)) return PTR_ERR(skb); } return rtnetlink_maybe_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid, u32 flags, struct netlink_ext_ack *extack) { size_t attr_size = 0; int loop, ret; struct tc_action *actions[TCA_ACT_MAX_PRIO] = {}; int init_res[TCA_ACT_MAX_PRIO] = {}; for (loop = 0; loop < 10; loop++) { ret = tcf_action_init(net, NULL, nla, NULL, actions, init_res, &attr_size, flags, 0, extack); if (ret != -EAGAIN) break; } if (ret < 0) return ret; ret = tcf_add_notify(net, n, actions, portid, attr_size, extack); /* only put bound actions */ tca_put_bound_many(actions, init_res); return ret; } static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { [TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_ACT_FLAG_LARGE_DUMP_ON | TCA_ACT_FLAG_TERSE_DUMP), [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, }; static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = NETLINK_CB(skb).portid; u32 flags = 0; int ret = 0; if ((n->nlmsg_type != RTM_GETACTION) && !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; ret = nlmsg_parse_deprecated(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; if (tca[TCA_ACT_TAB] == NULL) { NL_SET_ERR_MSG(extack, "Netlink action attributes missing"); return -EINVAL; } /* n->nlmsg_flags & NLM_F_CREATE */ switch (n->nlmsg_type) { case RTM_NEWACTION: /* we are going to assume all other flags * imply create only if it doesn't exist * Note that CREATE | EXCL implies that * but since we want avoid ambiguity (eg when flags * is zero) then just set this */ if (n->nlmsg_flags & NLM_F_REPLACE) flags = TCA_ACT_FLAGS_REPLACE; ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, flags, extack); break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, portid, RTM_DELACTION, extack); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, portid, RTM_GETACTION, extack); break; default: BUG(); } return ret; } static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct nlattr *kind; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; if (nla_parse_deprecated(tb, TCA_ACT_MAX_PRIO, nla_data(tb1), NLMSG_ALIGN(nla_len(tb1)), NULL, NULL) < 0) return NULL; if (tb[1] == NULL) return NULL; if (nla_parse_nested_deprecated(tb2, TCA_ACT_MAX, tb[1], tcf_action_policy, NULL) < 0) return NULL; kind = tb2[TCA_ACT_KIND]; return kind; } static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); struct nlattr *tb[TCA_ROOT_MAX + 1]; struct nlattr *count_attr = NULL; unsigned long jiffy_since = 0; struct nlattr *kind = NULL; struct nla_bitfield32 bf; u32 msecs_since = 0; u32 act_count = 0; ret = nlmsg_parse_deprecated(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, tcaa_policy, cb->extack); if (ret < 0) return ret; kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; } a_o = tc_lookup_action(kind); if (a_o == NULL) return 0; cb->args[2] = 0; if (tb[TCA_ROOT_FLAGS]) { bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); cb->args[2] = bf.value; } if (tb[TCA_ROOT_TIME_DELTA]) { msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); } nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; if (msecs_since) jiffy_since = jiffies - msecs_to_jiffies(msecs_since); t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; cb->args[3] = jiffy_since; count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); if (!count_attr) goto out_module_put; nest = nla_nest_start_noflag(skb, TCA_ACT_TAB); if (nest == NULL) goto out_module_put; ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; act_count = cb->args[1]; memcpy(nla_data(count_attr), &act_count, sizeof(u32)); cb->args[1] = 0; } else nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; out_module_put: module_put(a_o->owner); nlmsg_trim(skb, b); return skb->len; } static const struct rtnl_msg_handler tc_action_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWACTION, .doit = tc_ctl_action}, {.msgtype = RTM_DELACTION, .doit = tc_ctl_action}, {.msgtype = RTM_GETACTION, .doit = tc_ctl_action, .dumpit = tc_dump_action}, }; static int __init tc_action_init(void) { rtnl_register_many(tc_action_rtnl_msg_handlers); return 0; } subsys_initcall(tc_action_init);
182 182 49 49 35 35 49 49 137 137 131 131 137 137 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2016 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/cache.h> #include <linux/random.h> #include <linux/hrtimer.h> #include <linux/ktime.h> #include <linux/string.h> #include <linux/net.h> #include <linux/siphash.h> #include <net/secure_seq.h> #if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) #include <linux/in6.h> #include <net/tcp.h> static siphash_aligned_key_t net_secret; static siphash_aligned_key_t ts_secret; #define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ) static __always_inline void net_secret_init(void) { net_get_random_once(&net_secret, sizeof(net_secret)); } static __always_inline void ts_secret_init(void) { net_get_random_once(&ts_secret, sizeof(ts_secret)); } #endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) { /* * As close as possible to RFC 793, which * suggests using a 250 kHz clock. * Further reading shows this assumes 2 Mb/s networks. * For 10 Mb/s Ethernet, a 1 MHz clock is appropriate. * For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but * we also need to limit the resolution so that the u32 seq * overlaps less than one time per MSL (2 minutes). * Choosing a clock of 64 ns period is OK. (period of 274 s) */ return seq + (ktime_get_real_ns() >> 6); } #endif #if IS_ENABLED(CONFIG_IPV6) u32 secure_tcpv6_ts_off(const struct net *net, const __be32 *saddr, const __be32 *daddr) { const struct { struct in6_addr saddr; struct in6_addr daddr; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, }; if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); return siphash(&combined, offsetofend(typeof(combined), daddr), &ts_secret); } EXPORT_SYMBOL(secure_tcpv6_ts_off); u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; __be16 sport; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .sport = sport, .dport = dport }; u32 hash; net_secret_init(); hash = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); return seq_scale(hash); } EXPORT_SYMBOL(secure_tcpv6_seq); u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; unsigned int timeseed; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, .dport = dport, }; net_secret_init(); return siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); } EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr) { if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1) return 0; ts_secret_init(); return siphash_2u32((__force u32)saddr, (__force u32)daddr, &ts_secret); } /* secure_tcp_seq_and_tsoff(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d), * but fortunately, `sport' cannot be 0 in any circumstances. If this changes, * it would be easy enough to have the former function use siphash_4u32, passing * the arguments as separate u32. */ u32 secure_tcp_seq(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u32 hash; net_secret_init(); hash = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); return seq_scale(hash); } EXPORT_SYMBOL_GPL(secure_tcp_seq); u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { net_secret_init(); return siphash_4u32((__force u32)saddr, (__force u32)daddr, (__force u16)dport, jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD, &net_secret); } EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral); #endif #if IS_ENABLED(CONFIG_IP_DCCP) u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport) { u64 seq; net_secret_init(); seq = siphash_3u32((__force u32)saddr, (__force u32)daddr, (__force u32)sport << 16 | (__force u32)dport, &net_secret); seq += ktime_get_real_ns(); seq &= (1ull << 48) - 1; return seq; } EXPORT_SYMBOL(secure_dccp_sequence_number); #if IS_ENABLED(CONFIG_IPV6) u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, __be16 sport, __be16 dport) { const struct { struct in6_addr saddr; struct in6_addr daddr; __be16 sport; __be16 dport; } __aligned(SIPHASH_ALIGNMENT) combined = { .saddr = *(struct in6_addr *)saddr, .daddr = *(struct in6_addr *)daddr, .sport = sport, .dport = dport }; u64 seq; net_secret_init(); seq = siphash(&combined, offsetofend(typeof(combined), dport), &net_secret); seq += ktime_get_real_ns(); seq &= (1ull << 48) - 1; return seq; } EXPORT_SYMBOL(secure_dccpv6_sequence_number); #endif #endif
3 3 1093 1095 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 // SPDX-License-Identifier: GPL-2.0-or-later /* * G8BPQ compatible "AX.25 via ethernet" driver release 004 * * This code REQUIRES 2.0.0 or higher/ NET3.029 * * This is a "pseudo" network driver to allow AX.25 over Ethernet * using G8BPQ encapsulation. It has been extracted from the protocol * implementation because * * - things got unreadable within the protocol stack * - to cure the protocol stack from "feature-ism" * - a protocol implementation shouldn't need to know on * which hardware it is running * - user-level programs like the AX.25 utilities shouldn't * need to know about the hardware. * - IP over ethernet encapsulated AX.25 was impossible * - rxecho.c did not work * - to have room for extensions * - it just deserves to "live" as an own driver * * This driver can use any ethernet destination address, and can be * limited to accept frames from one dedicated ethernet card only. * * Note that the driver sets up the BPQ devices automagically on * startup or (if started before the "insmod" of an ethernet device) * on "ifconfig up". It hopefully will remove the BPQ on "rmmod"ing * the ethernet device (in fact: as soon as another ethernet or bpq * device gets "ifconfig"ured). * * I have heard that several people are thinking of experiments * with highspeed packet radio using existing ethernet cards. * Well, this driver is prepared for this purpose, just add * your tx key control and a txdelay / tailtime algorithm, * probably some buffering, and /voila/... * * History * BPQ 001 Joerg(DL1BKE) Extracted BPQ code from AX.25 * protocol stack and added my own * yet existing patches * BPQ 002 Joerg(DL1BKE) Scan network device list on * startup. * BPQ 003 Joerg(DL1BKE) Ethernet destination address * and accepted source address * can be configured by an ioctl() * call. * Fixed to match Linux networking * changes - 2.1.15. * BPQ 004 Joerg(DL1BKE) Fixed to not lock up on ifconfig. */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/net.h> #include <linux/slab.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/init.h> #include <linux/rtnetlink.h> #include <net/ip.h> #include <net/arp.h> #include <net/net_namespace.h> #include <linux/bpqether.h> static const char banner[] __initconst = KERN_INFO \ "AX.25: bpqether driver version 004\n"; static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static struct packet_type bpq_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_BPQ), .func = bpq_rcv, }; static struct notifier_block bpq_dev_notifier = { .notifier_call = bpq_device_event, }; struct bpqdev { struct list_head bpq_list; /* list of bpq devices chain */ struct net_device *ethdev; /* link to ethernet device */ struct net_device *axdev; /* bpq device (bpq#) */ char dest_addr[6]; /* ether destination address */ char acpt_addr[6]; /* accept ether frames from this address only */ }; static LIST_HEAD(bpq_devices); /* * bpqether network devices are paired with ethernet devices below them, so * form a special "super class" of normal ethernet devices; split their locks * off into a separate class since they always nest. */ static struct lock_class_key bpq_netdev_xmit_lock_key; static struct lock_class_key bpq_netdev_addr_lock_key; static void bpq_set_lockdep_class_one(struct net_device *dev, struct netdev_queue *txq, void *_unused) { lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key); } static void bpq_set_lockdep_class(struct net_device *dev) { lockdep_set_class(&dev->addr_list_lock, &bpq_netdev_addr_lock_key); netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL); } /* ------------------------------------------------------------------------ */ /* * Get the ethernet device for a BPQ device */ static inline struct net_device *bpq_get_ether_dev(struct net_device *dev) { struct bpqdev *bpq = netdev_priv(dev); return bpq ? bpq->ethdev : NULL; } /* * Get the BPQ device for the ethernet device */ static inline struct net_device *bpq_get_ax25_dev(struct net_device *dev) { struct bpqdev *bpq; list_for_each_entry_rcu(bpq, &bpq_devices, bpq_list, lockdep_rtnl_is_held()) { if (bpq->ethdev == dev) return bpq->axdev; } return NULL; } static inline int dev_is_ethdev(struct net_device *dev) { return dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5); } /* ------------------------------------------------------------------------ */ /* * Receive an AX.25 frame via an ethernet interface. */ static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len; char * ptr; struct ethhdr *eth; struct bpqdev *bpq; if (!net_eq(dev_net(dev), &init_net)) goto drop; if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; if (!pskb_may_pull(skb, sizeof(struct ethhdr))) goto drop; rcu_read_lock(); dev = bpq_get_ax25_dev(dev); if (dev == NULL || !netif_running(dev)) goto drop_unlock; /* * if we want to accept frames from just one ethernet device * we check the source address of the sender. */ bpq = netdev_priv(dev); eth = eth_hdr(skb); if (!(bpq->acpt_addr[0] & 0x01) && !ether_addr_equal(eth->h_source, bpq->acpt_addr)) goto drop_unlock; if (skb_cow(skb, sizeof(struct ethhdr))) goto drop_unlock; len = skb->data[0] + skb->data[1] * 256 - 5; skb_pull(skb, 2); /* Remove the length bytes */ skb_trim(skb, len); /* Set the length of the data */ dev->stats.rx_packets++; dev->stats.rx_bytes += len; ptr = skb_push(skb, 1); *ptr = 0; skb->protocol = ax25_type_trans(skb, dev); netif_rx(skb); unlock: rcu_read_unlock(); return 0; drop_unlock: kfree_skb(skb); goto unlock; drop: kfree_skb(skb); return 0; } /* * Send an AX.25 frame via an ethernet interface */ static netdev_tx_t bpq_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned char *ptr; struct bpqdev *bpq; struct net_device *orig_dev; int size; if (skb->protocol == htons(ETH_P_IP)) return ax25_ip_xmit(skb); /* * Just to be *really* sure not to send anything if the interface * is down, the ethernet device may have gone. */ if (!netif_running(dev)) { kfree_skb(skb); return NETDEV_TX_OK; } skb_pull(skb, 1); /* Drop KISS byte */ size = skb->len; /* * We're about to mess with the skb which may still shared with the * generic networking code so unshare and ensure it's got enough * space for the BPQ headers. */ if (skb_cow(skb, AX25_BPQ_HEADER_LEN)) { if (net_ratelimit()) pr_err("bpqether: out of memory\n"); kfree_skb(skb); return NETDEV_TX_OK; } ptr = skb_push(skb, 2); /* Make space for length */ *ptr++ = (size + 5) % 256; *ptr++ = (size + 5) / 256; bpq = netdev_priv(dev); orig_dev = dev; if ((dev = bpq_get_ether_dev(dev)) == NULL) { orig_dev->stats.tx_dropped++; kfree_skb(skb); return NETDEV_TX_OK; } skb->protocol = ax25_type_trans(skb, dev); skb_reset_network_header(skb); dev_hard_header(skb, dev, ETH_P_BPQ, bpq->dest_addr, NULL, 0); dev->stats.tx_packets++; dev->stats.tx_bytes+=skb->len; dev_queue_xmit(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } /* * Set AX.25 callsign */ static int bpq_set_mac_address(struct net_device *dev, void *addr) { struct sockaddr *sa = (struct sockaddr *)addr; dev_addr_set(dev, sa->sa_data); return 0; } /* Ioctl commands * * SIOCSBPQETHOPT reserved for enhancements * SIOCSBPQETHADDR set the destination and accepted * source ethernet address (broadcast * or multicast: accept all) */ static int bpq_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd) { struct bpq_ethaddr __user *ethaddr = data; struct bpqdev *bpq = netdev_priv(dev); struct bpq_req req; if (!capable(CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case SIOCSBPQETHOPT: if (copy_from_user(&req, data, sizeof(struct bpq_req))) return -EFAULT; switch (req.cmd) { case SIOCGBPQETHPARAM: case SIOCSBPQETHPARAM: default: return -EINVAL; } break; case SIOCSBPQETHADDR: if (copy_from_user(bpq->dest_addr, ethaddr->destination, ETH_ALEN)) return -EFAULT; if (copy_from_user(bpq->acpt_addr, ethaddr->accept, ETH_ALEN)) return -EFAULT; break; default: return -EINVAL; } return 0; } /* * open/close a device */ static int bpq_open(struct net_device *dev) { netif_start_queue(dev); return 0; } static int bpq_close(struct net_device *dev) { netif_stop_queue(dev); return 0; } /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS /* * Proc filesystem */ static void *bpq_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { int i = 1; struct bpqdev *bpqdev; rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; list_for_each_entry_rcu(bpqdev, &bpq_devices, bpq_list) { if (i == *pos) return bpqdev; } return NULL; } static void *bpq_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct list_head *p; struct bpqdev *bpqdev = v; ++*pos; if (v == SEQ_START_TOKEN) p = rcu_dereference(list_next_rcu(&bpq_devices)); else p = rcu_dereference(list_next_rcu(&bpqdev->bpq_list)); return (p == &bpq_devices) ? NULL : list_entry(p, struct bpqdev, bpq_list); } static void bpq_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static int bpq_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_puts(seq, "dev ether destination accept from\n"); else { const struct bpqdev *bpqdev = v; seq_printf(seq, "%-5s %-10s %pM ", bpqdev->axdev->name, bpqdev->ethdev->name, bpqdev->dest_addr); if (is_multicast_ether_addr(bpqdev->acpt_addr)) seq_printf(seq, "*\n"); else seq_printf(seq, "%pM\n", bpqdev->acpt_addr); } return 0; } static const struct seq_operations bpq_seqops = { .start = bpq_seq_start, .next = bpq_seq_next, .stop = bpq_seq_stop, .show = bpq_seq_show, }; #endif /* ------------------------------------------------------------------------ */ static const struct net_device_ops bpq_netdev_ops = { .ndo_open = bpq_open, .ndo_stop = bpq_close, .ndo_start_xmit = bpq_xmit, .ndo_set_mac_address = bpq_set_mac_address, .ndo_siocdevprivate = bpq_siocdevprivate, }; static void bpq_setup(struct net_device *dev) { dev->netdev_ops = &bpq_netdev_ops; dev->needs_free_netdev = true; dev->flags = 0; dev->lltx = true; /* Allow recursion */ #if IS_ENABLED(CONFIG_AX25) dev->header_ops = &ax25_header_ops; #endif dev->type = ARPHRD_AX25; dev->hard_header_len = AX25_MAX_HEADER_LEN + AX25_BPQ_HEADER_LEN; dev->mtu = AX25_DEF_PACLEN; dev->addr_len = AX25_ADDR_LEN; memcpy(dev->broadcast, &ax25_bcast, AX25_ADDR_LEN); dev_addr_set(dev, (u8 *)&ax25_defaddr); } /* * Setup a new device. */ static int bpq_new_device(struct net_device *edev) { int err; struct net_device *ndev; struct bpqdev *bpq; ndev = alloc_netdev(sizeof(struct bpqdev), "bpq%d", NET_NAME_UNKNOWN, bpq_setup); if (!ndev) return -ENOMEM; bpq = netdev_priv(ndev); dev_hold(edev); bpq->ethdev = edev; bpq->axdev = ndev; eth_broadcast_addr(bpq->dest_addr); eth_broadcast_addr(bpq->acpt_addr); err = register_netdevice(ndev); if (err) goto error; bpq_set_lockdep_class(ndev); /* List protected by RTNL */ list_add_rcu(&bpq->bpq_list, &bpq_devices); return 0; error: dev_put(edev); free_netdev(ndev); return err; } static void bpq_free_device(struct net_device *ndev) { struct bpqdev *bpq = netdev_priv(ndev); dev_put(bpq->ethdev); list_del_rcu(&bpq->bpq_list); unregister_netdevice(ndev); } /* * Handle device status changes. */ static int bpq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (!dev_is_ethdev(dev) && !bpq_get_ax25_dev(dev)) return NOTIFY_DONE; switch (event) { case NETDEV_UP: /* new ethernet device -> new BPQ interface */ if (bpq_get_ax25_dev(dev) == NULL) bpq_new_device(dev); break; case NETDEV_DOWN: /* ethernet device closed -> close BPQ interface */ if ((dev = bpq_get_ax25_dev(dev)) != NULL) dev_close(dev); break; case NETDEV_UNREGISTER: /* ethernet device removed -> free BPQ interface */ if ((dev = bpq_get_ax25_dev(dev)) != NULL) bpq_free_device(dev); break; default: break; } return NOTIFY_DONE; } /* ------------------------------------------------------------------------ */ /* * Initialize driver. To be called from af_ax25 if not compiled as a * module */ static int __init bpq_init_driver(void) { #ifdef CONFIG_PROC_FS if (!proc_create_seq("bpqether", 0444, init_net.proc_net, &bpq_seqops)) { printk(KERN_ERR "bpq: cannot create /proc/net/bpqether entry.\n"); return -ENOENT; } #endif /* CONFIG_PROC_FS */ dev_add_pack(&bpq_packet_type); register_netdevice_notifier(&bpq_dev_notifier); printk(banner); return 0; } static void __exit bpq_cleanup_driver(void) { struct bpqdev *bpq; dev_remove_pack(&bpq_packet_type); unregister_netdevice_notifier(&bpq_dev_notifier); remove_proc_entry("bpqether", init_net.proc_net); rtnl_lock(); while (!list_empty(&bpq_devices)) { bpq = list_entry(bpq_devices.next, struct bpqdev, bpq_list); bpq_free_device(bpq->axdev); } rtnl_unlock(); } MODULE_AUTHOR("Joerg Reuter DL1BKE <jreuter@yaina.de>"); MODULE_DESCRIPTION("Transmit and receive AX.25 packets over Ethernet"); MODULE_LICENSE("GPL"); module_init(bpq_init_driver); module_exit(bpq_cleanup_driver);
3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" struct fec_req_info { struct ethnl_req_info base; }; struct fec_reply_data { struct ethnl_reply_data base; __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes); u32 active_fec; u8 fec_auto; struct fec_stat_grp { u64 stats[1 + ETHTOOL_MAX_LANES]; u8 cnt; } corr, uncorr, corr_bits; }; #define FEC_REPDATA(__reply_base) \ container_of(__reply_base, struct fec_reply_data, base) #define ETHTOOL_FEC_MASK ((ETHTOOL_FEC_LLRS << 1) - 1) const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1] = { [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats), }; static void ethtool_fec_to_link_modes(u32 fec, unsigned long *link_modes, u8 *fec_auto) { if (fec_auto) *fec_auto = !!(fec & ETHTOOL_FEC_AUTO); if (fec & ETHTOOL_FEC_OFF) __set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes); if (fec & ETHTOOL_FEC_RS) __set_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes); if (fec & ETHTOOL_FEC_BASER) __set_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes); if (fec & ETHTOOL_FEC_LLRS) __set_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes); } static int ethtool_link_modes_to_fecparam(struct ethtool_fecparam *fec, unsigned long *link_modes, u8 fec_auto) { memset(fec, 0, sizeof(*fec)); if (fec_auto) fec->fec |= ETHTOOL_FEC_AUTO; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_OFF; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_RS_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_RS; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_BASER_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_BASER; if (__test_and_clear_bit(ETHTOOL_LINK_MODE_FEC_LLRS_BIT, link_modes)) fec->fec |= ETHTOOL_FEC_LLRS; if (!bitmap_empty(link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) return -EINVAL; return 0; } static void fec_stats_recalc(struct fec_stat_grp *grp, struct ethtool_fec_stat *stats) { int i; if (stats->lanes[0] == ETHTOOL_STAT_NOT_SET) { grp->stats[0] = stats->total; grp->cnt = stats->total != ETHTOOL_STAT_NOT_SET; return; } grp->cnt = 1; grp->stats[0] = 0; for (i = 0; i < ETHTOOL_MAX_LANES; i++) { if (stats->lanes[i] == ETHTOOL_STAT_NOT_SET) break; grp->stats[0] += stats->lanes[i]; grp->stats[grp->cnt++] = stats->lanes[i]; } } static int fec_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { __ETHTOOL_DECLARE_LINK_MODE_MASK(active_fec_modes) = {}; struct fec_reply_data *data = FEC_REPDATA(reply_base); struct net_device *dev = reply_base->dev; struct ethtool_fecparam fec = {}; int ret; if (!dev->ethtool_ops->get_fecparam) return -EOPNOTSUPP; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = dev->ethtool_ops->get_fecparam(dev, &fec); if (ret) goto out_complete; if (req_base->flags & ETHTOOL_FLAG_STATS && dev->ethtool_ops->get_fec_stats) { struct ethtool_fec_stats stats; ethtool_stats_init((u64 *)&stats, sizeof(stats) / 8); dev->ethtool_ops->get_fec_stats(dev, &stats); fec_stats_recalc(&data->corr, &stats.corrected_blocks); fec_stats_recalc(&data->uncorr, &stats.uncorrectable_blocks); fec_stats_recalc(&data->corr_bits, &stats.corrected_bits); } WARN_ON_ONCE(fec.reserved); ethtool_fec_to_link_modes(fec.fec, data->fec_link_modes, &data->fec_auto); ethtool_fec_to_link_modes(fec.active_fec, active_fec_modes, NULL); data->active_fec = find_first_bit(active_fec_modes, __ETHTOOL_LINK_MODE_MASK_NBITS); /* Don't report attr if no FEC mode set. Note that * ethtool_fecparam_to_link_modes() ignores NONE and AUTO. */ if (data->active_fec == __ETHTOOL_LINK_MODE_MASK_NBITS) data->active_fec = 0; out_complete: ethnl_ops_complete(dev); return ret; } static int fec_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct fec_reply_data *data = FEC_REPDATA(reply_base); int len = 0; int ret; ret = ethnl_bitset_size(data->fec_link_modes, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; len += ret; len += nla_total_size(sizeof(u8)) + /* _FEC_AUTO */ nla_total_size(sizeof(u32)); /* _FEC_ACTIVE */ if (req_base->flags & ETHTOOL_FLAG_STATS) len += 3 * nla_total_size_64bit(sizeof(u64) * (1 + ETHTOOL_MAX_LANES)); return len; } static int fec_put_stats(struct sk_buff *skb, const struct fec_reply_data *data) { struct nlattr *nest; nest = nla_nest_start(skb, ETHTOOL_A_FEC_STATS); if (!nest) return -EMSGSIZE; if (nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORRECTED, sizeof(u64) * data->corr.cnt, data->corr.stats, ETHTOOL_A_FEC_STAT_PAD) || nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_UNCORR, sizeof(u64) * data->uncorr.cnt, data->uncorr.stats, ETHTOOL_A_FEC_STAT_PAD) || nla_put_64bit(skb, ETHTOOL_A_FEC_STAT_CORR_BITS, sizeof(u64) * data->corr_bits.cnt, data->corr_bits.stats, ETHTOOL_A_FEC_STAT_PAD)) goto err_cancel; nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int fec_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const struct fec_reply_data *data = FEC_REPDATA(reply_base); int ret; ret = ethnl_put_bitset(skb, ETHTOOL_A_FEC_MODES, data->fec_link_modes, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; if (nla_put_u8(skb, ETHTOOL_A_FEC_AUTO, data->fec_auto) || (data->active_fec && nla_put_u32(skb, ETHTOOL_A_FEC_ACTIVE, data->active_fec))) return -EMSGSIZE; if (req_base->flags & ETHTOOL_FLAG_STATS && fec_put_stats(skb, data)) return -EMSGSIZE; return 0; } /* FEC_SET */ const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = { [ETHTOOL_A_FEC_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_FEC_MODES] = { .type = NLA_NESTED }, [ETHTOOL_A_FEC_AUTO] = NLA_POLICY_MAX(NLA_U8, 1), }; static int ethnl_set_fec_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; return ops->get_fecparam && ops->set_fecparam ? 1 : -EOPNOTSUPP; } static int ethnl_set_fec(struct ethnl_req_info *req_info, struct genl_info *info) { __ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {}; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; struct ethtool_fecparam fec = {}; bool mod = false; u8 fec_auto; int ret; ret = dev->ethtool_ops->get_fecparam(dev, &fec); if (ret < 0) return ret; ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto); ret = ethnl_update_bitset(fec_link_modes, __ETHTOOL_LINK_MODE_MASK_NBITS, tb[ETHTOOL_A_FEC_MODES], link_mode_names, info->extack, &mod); if (ret < 0) return ret; ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod); if (!mod) return 0; ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto); if (ret) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], "invalid FEC modes requested"); return ret; } if (!fec.fec) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES], "no FEC modes set"); return -EINVAL; } ret = dev->ethtool_ops->set_fecparam(dev, &fec); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_fec_request_ops = { .request_cmd = ETHTOOL_MSG_FEC_GET, .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY, .hdr_attr = ETHTOOL_A_FEC_HEADER, .req_info_size = sizeof(struct fec_req_info), .reply_data_size = sizeof(struct fec_reply_data), .prepare_data = fec_prepare_data, .reply_size = fec_reply_size, .fill_reply = fec_fill_reply, .set_validate = ethnl_set_fec_validate, .set = ethnl_set_fec, .set_ntf_cmd = ETHTOOL_MSG_FEC_NTF, };
40 569 13 501 569 146 520 569 569 567 13 652 654 654 654 652 654 486 493 193 175 568 535 470 495 568 569 567 468 568 462 570 573 568 468 468 561 561 40 40 40 208 209 196 171 208 176 176 745 487 745 741 149 149 200 199 165 194 200 199 200 694 200 693 694 13 13 9 9 9 9 461 461 159 160 159 159 461 462 462 462 700 642 652 738 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 // SPDX-License-Identifier: GPL-2.0 /* * kobject.c - library routines for handling generic kernel objects * * Copyright (c) 2002-2003 Patrick Mochel <mochel@osdl.org> * Copyright (c) 2006-2007 Greg Kroah-Hartman <greg@kroah.com> * Copyright (c) 2006-2007 Novell Inc. * * Please see the file Documentation/core-api/kobject.rst for critical information * about using the kobject interface. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kobject.h> #include <linux/string.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/slab.h> #include <linux/random.h> /** * kobject_namespace() - Return @kobj's namespace tag. * @kobj: kobject in question * * Returns namespace tag of @kobj if its parent has namespace ops enabled * and thus @kobj should have a namespace tag associated with it. Returns * %NULL otherwise. */ const void *kobject_namespace(const struct kobject *kobj) { const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj); if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE) return NULL; return kobj->ktype->namespace(kobj); } /** * kobject_get_ownership() - Get sysfs ownership data for @kobj. * @kobj: kobject in question * @uid: kernel user ID for sysfs objects * @gid: kernel group ID for sysfs objects * * Returns initial uid/gid pair that should be used when creating sysfs * representation of given kobject. Normally used to adjust ownership of * objects in a container. */ void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { *uid = GLOBAL_ROOT_UID; *gid = GLOBAL_ROOT_GID; if (kobj->ktype->get_ownership) kobj->ktype->get_ownership(kobj, uid, gid); } static bool kobj_ns_type_is_valid(enum kobj_ns_type type) { if ((type <= KOBJ_NS_TYPE_NONE) || (type >= KOBJ_NS_TYPES)) return false; return true; } static int create_dir(struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); const struct kobj_ns_type_operations *ops; int error; error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); if (error) return error; if (ktype) { error = sysfs_create_groups(kobj, ktype->default_groups); if (error) { sysfs_remove_dir(kobj); return error; } } /* * @kobj->sd may be deleted by an ancestor going away. Hold an * extra reference so that it stays until @kobj is gone. */ sysfs_get(kobj->sd); /* * If @kobj has ns_ops, its children need to be filtered based on * their namespace tags. Enable namespace support on @kobj->sd. */ ops = kobj_child_ns_ops(kobj); if (ops) { BUG_ON(!kobj_ns_type_is_valid(ops->type)); BUG_ON(!kobj_ns_type_registered(ops->type)); sysfs_enable_ns(kobj->sd); } return 0; } static int get_kobj_path_length(const struct kobject *kobj) { int length = 1; const struct kobject *parent = kobj; /* walk up the ancestors until we hit the one pointing to the * root. * Add 1 to strlen for leading '/' of each level. */ do { if (kobject_name(parent) == NULL) return 0; length += strlen(kobject_name(parent)) + 1; parent = parent->parent; } while (parent); return length; } static int fill_kobj_path(const struct kobject *kobj, char *path, int length) { const struct kobject *parent; --length; for (parent = kobj; parent; parent = parent->parent) { int cur = strlen(kobject_name(parent)); /* back up enough to print this name with '/' */ length -= cur; if (length <= 0) return -EINVAL; memcpy(path + length, kobject_name(parent), cur); *(path + --length) = '/'; } pr_debug("'%s' (%p): %s: path = '%s'\n", kobject_name(kobj), kobj, __func__, path); return 0; } /** * kobject_get_path() - Allocate memory and fill in the path for @kobj. * @kobj: kobject in question, with which to build the path * @gfp_mask: the allocation type used to allocate the path * * Return: The newly allocated memory, caller must free with kfree(). */ char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask) { char *path; int len; retry: len = get_kobj_path_length(kobj); if (len == 0) return NULL; path = kzalloc(len, gfp_mask); if (!path) return NULL; if (fill_kobj_path(kobj, path, len)) { kfree(path); goto retry; } return path; } EXPORT_SYMBOL_GPL(kobject_get_path); /* add the kobject to its kset's list */ static void kobj_kset_join(struct kobject *kobj) { if (!kobj->kset) return; kset_get(kobj->kset); spin_lock(&kobj->kset->list_lock); list_add_tail(&kobj->entry, &kobj->kset->list); spin_unlock(&kobj->kset->list_lock); } /* remove the kobject from its kset's list */ static void kobj_kset_leave(struct kobject *kobj) { if (!kobj->kset) return; spin_lock(&kobj->kset->list_lock); list_del_init(&kobj->entry); spin_unlock(&kobj->kset->list_lock); kset_put(kobj->kset); } static void kobject_init_internal(struct kobject *kobj) { if (!kobj) return; kref_init(&kobj->kref); INIT_LIST_HEAD(&kobj->entry); kobj->state_in_sysfs = 0; kobj->state_add_uevent_sent = 0; kobj->state_remove_uevent_sent = 0; kobj->state_initialized = 1; } static int kobject_add_internal(struct kobject *kobj) { int error = 0; struct kobject *parent; if (!kobj) return -ENOENT; if (!kobj->name || !kobj->name[0]) { WARN(1, "kobject: (%p): attempted to be registered with empty name!\n", kobj); return -EINVAL; } parent = kobject_get(kobj->parent); /* join kset if set, use it as parent if we do not already have one */ if (kobj->kset) { if (!parent) parent = kobject_get(&kobj->kset->kobj); kobj_kset_join(kobj); kobj->parent = parent; } pr_debug("'%s' (%p): %s: parent: '%s', set: '%s'\n", kobject_name(kobj), kobj, __func__, parent ? kobject_name(parent) : "<NULL>", kobj->kset ? kobject_name(&kobj->kset->kobj) : "<NULL>"); error = create_dir(kobj); if (error) { kobj_kset_leave(kobj); kobject_put(parent); kobj->parent = NULL; /* be noisy on error issues */ if (error == -EEXIST) pr_err("%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n", __func__, kobject_name(kobj)); else pr_err("%s failed for %s (error: %d parent: %s)\n", __func__, kobject_name(kobj), error, parent ? kobject_name(parent) : "'none'"); } else kobj->state_in_sysfs = 1; return error; } /** * kobject_set_name_vargs() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * @vargs: vargs to format the string. */ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, va_list vargs) { const char *s; if (kobj->name && !fmt) return 0; s = kvasprintf_const(GFP_KERNEL, fmt, vargs); if (!s) return -ENOMEM; /* * ewww... some of these buggers have '/' in the name ... If * that's the case, we need to make sure we have an actual * allocated copy to modify, since kvasprintf_const may have * returned something from .rodata. */ if (strchr(s, '/')) { char *t; t = kstrdup(s, GFP_KERNEL); kfree_const(s); if (!t) return -ENOMEM; s = strreplace(t, '/', '!'); } kfree_const(kobj->name); kobj->name = s; return 0; } /** * kobject_set_name() - Set the name of a kobject. * @kobj: struct kobject to set the name of * @fmt: format string used to build the name * * This sets the name of the kobject. If you have already added the * kobject to the system, you must call kobject_rename() in order to * change the name of the kobject. */ int kobject_set_name(struct kobject *kobj, const char *fmt, ...) { va_list vargs; int retval; va_start(vargs, fmt); retval = kobject_set_name_vargs(kobj, fmt, vargs); va_end(vargs); return retval; } EXPORT_SYMBOL(kobject_set_name); /** * kobject_init() - Initialize a kobject structure. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * * This function will properly initialize a kobject such that it can then * be passed to the kobject_add() call. * * After this function is called, the kobject MUST be cleaned up by a call * to kobject_put(), not by a call to kfree directly to ensure that all of * the memory is cleaned up properly. */ void kobject_init(struct kobject *kobj, const struct kobj_type *ktype) { char *err_str; if (!kobj) { err_str = "invalid kobject pointer!"; goto error; } if (!ktype) { err_str = "must have a ktype to be initialized properly!\n"; goto error; } if (kobj->state_initialized) { /* do not error out as sometimes we can recover */ pr_err("kobject (%p): tried to init an initialized object, something is seriously wrong.\n", kobj); dump_stack_lvl(KERN_ERR); } kobject_init_internal(kobj); kobj->ktype = ktype; return; error: pr_err("kobject (%p): %s\n", kobj, err_str); dump_stack_lvl(KERN_ERR); } EXPORT_SYMBOL(kobject_init); static __printf(3, 0) int kobject_add_varg(struct kobject *kobj, struct kobject *parent, const char *fmt, va_list vargs) { int retval; retval = kobject_set_name_vargs(kobj, fmt, vargs); if (retval) { pr_err("can not set name properly!\n"); return retval; } kobj->parent = parent; return kobject_add_internal(kobj); } /** * kobject_add() - The main kobject add function. * @kobj: the kobject to add * @parent: pointer to the parent of the kobject. * @fmt: format to name the kobject with. * * The kobject name is set and added to the kobject hierarchy in this * function. * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the * kobject associated with the kset assigned to this kobject. If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * * Note, no "add" uevent will be created with this call, the caller should set * up all of the necessary sysfs files for the object and then call * kobject_uevent() with the UEVENT_ADD parameter to ensure that * userspace is properly notified of this kobject's creation. * * Return: If this function returns an error, kobject_put() must be * called to properly clean up the memory associated with the * object. Under no instance should the kobject that is passed * to this function be directly freed with a call to kfree(), * that can leak memory. * * If this function returns success, kobject_put() must also be called * in order to properly clean up the memory associated with the object. * * In short, once this function is called, kobject_put() MUST be called * when the use of the object is finished in order to properly free * everything. */ int kobject_add(struct kobject *kobj, struct kobject *parent, const char *fmt, ...) { va_list args; int retval; if (!kobj) return -EINVAL; if (!kobj->state_initialized) { pr_err("kobject '%s' (%p): tried to add an uninitialized object, something is seriously wrong.\n", kobject_name(kobj), kobj); dump_stack_lvl(KERN_ERR); return -EINVAL; } va_start(args, fmt); retval = kobject_add_varg(kobj, parent, fmt, args); va_end(args); return retval; } EXPORT_SYMBOL(kobject_add); /** * kobject_init_and_add() - Initialize a kobject structure and add it to * the kobject hierarchy. * @kobj: pointer to the kobject to initialize * @ktype: pointer to the ktype for this kobject. * @parent: pointer to the parent of this kobject. * @fmt: the name of the kobject. * * This function combines the call to kobject_init() and kobject_add(). * * If this function returns an error, kobject_put() must be called to * properly clean up the memory associated with the object. This is the * same type of error handling after a call to kobject_add() and kobject * lifetime rules are the same here. */ int kobject_init_and_add(struct kobject *kobj, const struct kobj_type *ktype, struct kobject *parent, const char *fmt, ...) { va_list args; int retval; kobject_init(kobj, ktype); va_start(args, fmt); retval = kobject_add_varg(kobj, parent, fmt, args); va_end(args); return retval; } EXPORT_SYMBOL_GPL(kobject_init_and_add); /** * kobject_rename() - Change the name of an object. * @kobj: object in question. * @new_name: object's new name * * It is the responsibility of the caller to provide mutual * exclusion between two different calls of kobject_rename * on the same kobject and to ensure that new_name is valid and * won't conflict with other kobjects. */ int kobject_rename(struct kobject *kobj, const char *new_name) { int error = 0; const char *devpath = NULL; const char *dup_name = NULL, *name; char *devpath_string = NULL; char *envp[2]; kobj = kobject_get(kobj); if (!kobj) return -EINVAL; if (!kobj->parent) { kobject_put(kobj); return -EINVAL; } devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { error = -ENOMEM; goto out; } devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); if (!devpath_string) { error = -ENOMEM; goto out; } sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); envp[0] = devpath_string; envp[1] = NULL; name = dup_name = kstrdup_const(new_name, GFP_KERNEL); if (!name) { error = -ENOMEM; goto out; } error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj)); if (error) goto out; /* Install the new kobject name */ dup_name = kobj->name; kobj->name = name; /* This function is mostly/only used for network interface. * Some hotplug package track interfaces by their name and * therefore want to know when the name is changed by the user. */ kobject_uevent_env(kobj, KOBJ_MOVE, envp); out: kfree_const(dup_name); kfree(devpath_string); kfree(devpath); kobject_put(kobj); return error; } EXPORT_SYMBOL_GPL(kobject_rename); /** * kobject_move() - Move object to another parent. * @kobj: object in question. * @new_parent: object's new parent (can be NULL) */ int kobject_move(struct kobject *kobj, struct kobject *new_parent) { int error; struct kobject *old_parent; const char *devpath = NULL; char *devpath_string = NULL; char *envp[2]; kobj = kobject_get(kobj); if (!kobj) return -EINVAL; new_parent = kobject_get(new_parent); if (!new_parent) { if (kobj->kset) new_parent = kobject_get(&kobj->kset->kobj); } /* old object path */ devpath = kobject_get_path(kobj, GFP_KERNEL); if (!devpath) { error = -ENOMEM; goto out; } devpath_string = kmalloc(strlen(devpath) + 15, GFP_KERNEL); if (!devpath_string) { error = -ENOMEM; goto out; } sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); envp[0] = devpath_string; envp[1] = NULL; error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj)); if (error) goto out; old_parent = kobj->parent; kobj->parent = new_parent; new_parent = NULL; kobject_put(old_parent); kobject_uevent_env(kobj, KOBJ_MOVE, envp); out: kobject_put(new_parent); kobject_put(kobj); kfree(devpath_string); kfree(devpath); return error; } EXPORT_SYMBOL_GPL(kobject_move); static void __kobject_del(struct kobject *kobj) { struct kernfs_node *sd; const struct kobj_type *ktype; sd = kobj->sd; ktype = get_ktype(kobj); if (ktype) sysfs_remove_groups(kobj, ktype->default_groups); /* send "remove" if the caller did not do it but sent "add" */ if (kobj->state_add_uevent_sent && !kobj->state_remove_uevent_sent) { pr_debug("'%s' (%p): auto cleanup 'remove' event\n", kobject_name(kobj), kobj); kobject_uevent(kobj, KOBJ_REMOVE); } sysfs_remove_dir(kobj); sysfs_put(sd); kobj->state_in_sysfs = 0; kobj_kset_leave(kobj); kobj->parent = NULL; } /** * kobject_del() - Unlink kobject from hierarchy. * @kobj: object. * * This is the function that should be called to delete an object * successfully added via kobject_add(). */ void kobject_del(struct kobject *kobj) { struct kobject *parent; if (!kobj) return; parent = kobj->parent; __kobject_del(kobj); kobject_put(parent); } EXPORT_SYMBOL(kobject_del); /** * kobject_get() - Increment refcount for object. * @kobj: object. */ struct kobject *kobject_get(struct kobject *kobj) { if (kobj) { if (!kobj->state_initialized) WARN(1, KERN_WARNING "kobject: '%s' (%p): is not initialized, yet kobject_get() is being called.\n", kobject_name(kobj), kobj); kref_get(&kobj->kref); } return kobj; } EXPORT_SYMBOL(kobject_get); struct kobject * __must_check kobject_get_unless_zero(struct kobject *kobj) { if (!kobj) return NULL; if (!kref_get_unless_zero(&kobj->kref)) kobj = NULL; return kobj; } EXPORT_SYMBOL(kobject_get_unless_zero); /* * kobject_cleanup - free kobject resources. * @kobj: object to cleanup */ static void kobject_cleanup(struct kobject *kobj) { struct kobject *parent = kobj->parent; const struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; pr_debug("'%s' (%p): %s, parent %p\n", kobject_name(kobj), kobj, __func__, kobj->parent); if (t && !t->release) pr_debug("'%s' (%p): does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", kobject_name(kobj), kobj); /* remove from sysfs if the caller did not do it */ if (kobj->state_in_sysfs) { pr_debug("'%s' (%p): auto cleanup kobject_del\n", kobject_name(kobj), kobj); __kobject_del(kobj); } else { /* avoid dropping the parent reference unnecessarily */ parent = NULL; } if (t && t->release) { pr_debug("'%s' (%p): calling ktype release\n", kobject_name(kobj), kobj); t->release(kobj); } /* free name if we allocated it */ if (name) { pr_debug("'%s': free name\n", name); kfree_const(name); } kobject_put(parent); } #ifdef CONFIG_DEBUG_KOBJECT_RELEASE static void kobject_delayed_cleanup(struct work_struct *work) { kobject_cleanup(container_of(to_delayed_work(work), struct kobject, release)); } #endif static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE unsigned long delay = HZ + HZ * get_random_u32_below(4); pr_info("'%s' (%p): %s, parent %p (delayed %ld)\n", kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); schedule_delayed_work(&kobj->release, delay); #else kobject_cleanup(kobj); #endif } /** * kobject_put() - Decrement refcount for object. * @kobj: object. * * Decrement the refcount, and if 0, call kobject_cleanup(). */ void kobject_put(struct kobject *kobj) { if (kobj) { if (!kobj->state_initialized) WARN(1, KERN_WARNING "kobject: '%s' (%p): is not initialized, yet kobject_put() is being called.\n", kobject_name(kobj), kobj); kref_put(&kobj->kref, kobject_release); } } EXPORT_SYMBOL(kobject_put); static void dynamic_kobj_release(struct kobject *kobj) { pr_debug("(%p): %s\n", kobj, __func__); kfree(kobj); } static const struct kobj_type dynamic_kobj_ktype = { .release = dynamic_kobj_release, .sysfs_ops = &kobj_sysfs_ops, }; /** * kobject_create() - Create a struct kobject dynamically. * * This function creates a kobject structure dynamically and sets it up * to be a "dynamic" kobject with a default release function set up. * * If the kobject was not able to be created, NULL will be returned. * The kobject structure returned from here must be cleaned up with a * call to kobject_put() and not kfree(), as kobject_init() has * already been called on this structure. */ static struct kobject *kobject_create(void) { struct kobject *kobj; kobj = kzalloc(sizeof(*kobj), GFP_KERNEL); if (!kobj) return NULL; kobject_init(kobj, &dynamic_kobj_ktype); return kobj; } /** * kobject_create_and_add() - Create a struct kobject dynamically and * register it with sysfs. * @name: the name for the kobject * @parent: the parent kobject of this kobject, if any. * * This function creates a kobject structure dynamically and registers it * with sysfs. When you are finished with this structure, call * kobject_put() and the structure will be dynamically freed when * it is no longer being used. * * If the kobject was not able to be created, NULL will be returned. */ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent) { struct kobject *kobj; int retval; kobj = kobject_create(); if (!kobj) return NULL; retval = kobject_add(kobj, parent, "%s", name); if (retval) { pr_warn("%s: kobject_add error: %d\n", __func__, retval); kobject_put(kobj); kobj = NULL; } return kobj; } EXPORT_SYMBOL_GPL(kobject_create_and_add); /** * kset_init() - Initialize a kset for use. * @k: kset */ void kset_init(struct kset *k) { kobject_init_internal(&k->kobj); INIT_LIST_HEAD(&k->list); spin_lock_init(&k->list_lock); } /* default kobject attribute operations */ static ssize_t kobj_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct kobj_attribute *kattr; ssize_t ret = -EIO; kattr = container_of(attr, struct kobj_attribute, attr); if (kattr->show) ret = kattr->show(kobj, kattr, buf); return ret; } static ssize_t kobj_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct kobj_attribute *kattr; ssize_t ret = -EIO; kattr = container_of(attr, struct kobj_attribute, attr); if (kattr->store) ret = kattr->store(kobj, kattr, buf, count); return ret; } const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kset_register() - Initialize and add a kset. * @k: kset. * * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name() * is freed, it can not be used any more. */ int kset_register(struct kset *k) { int err; if (!k) return -EINVAL; if (!k->kobj.ktype) { pr_err("must have a ktype to be initialized properly!\n"); return -EINVAL; } kset_init(k); err = kobject_add_internal(&k->kobj); if (err) { kfree_const(k->kobj.name); /* Set it to NULL to avoid accessing bad pointer in callers. */ k->kobj.name = NULL; return err; } kobject_uevent(&k->kobj, KOBJ_ADD); return 0; } EXPORT_SYMBOL(kset_register); /** * kset_unregister() - Remove a kset. * @k: kset. */ void kset_unregister(struct kset *k) { if (!k) return; kobject_del(&k->kobj); kobject_put(&k->kobj); } EXPORT_SYMBOL(kset_unregister); /** * kset_find_obj() - Search for object in kset. * @kset: kset we're looking in. * @name: object's name. * * Lock kset via @kset->subsys, and iterate over @kset->list, * looking for a matching kobject. If matching object is found * take a reference and return the object. */ struct kobject *kset_find_obj(struct kset *kset, const char *name) { struct kobject *k; struct kobject *ret = NULL; spin_lock(&kset->list_lock); list_for_each_entry(k, &kset->list, entry) { if (kobject_name(k) && !strcmp(kobject_name(k), name)) { ret = kobject_get_unless_zero(k); break; } } spin_unlock(&kset->list_lock); return ret; } EXPORT_SYMBOL_GPL(kset_find_obj); static void kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj); pr_debug("'%s' (%p): %s\n", kobject_name(kobj), kobj, __func__); kfree(kset); } static void kset_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { if (kobj->parent) kobject_get_ownership(kobj->parent, uid, gid); } static const struct kobj_type kset_ktype = { .sysfs_ops = &kobj_sysfs_ops, .release = kset_release, .get_ownership = kset_get_ownership, }; /** * kset_create() - Create a struct kset dynamically. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset * @parent_kobj: the parent kobject of this kset, if any. * * This function creates a kset structure dynamically. This structure can * then be registered with the system and show up in sysfs with a call to * kset_register(). When you are finished with this structure, if * kset_register() has been called, call kset_unregister() and the * structure will be dynamically freed when it is no longer being used. * * If the kset was not able to be created, NULL will be returned. */ static struct kset *kset_create(const char *name, const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj) { struct kset *kset; int retval; kset = kzalloc(sizeof(*kset), GFP_KERNEL); if (!kset) return NULL; retval = kobject_set_name(&kset->kobj, "%s", name); if (retval) { kfree(kset); return NULL; } kset->uevent_ops = uevent_ops; kset->kobj.parent = parent_kobj; /* * The kobject of this kset will have a type of kset_ktype and belong to * no kset itself. That way we can properly free it when it is * finished being used. */ kset->kobj.ktype = &kset_ktype; kset->kobj.kset = NULL; return kset; } /** * kset_create_and_add() - Create a struct kset dynamically and add it to sysfs. * * @name: the name for the kset * @uevent_ops: a struct kset_uevent_ops for the kset * @parent_kobj: the parent kobject of this kset, if any. * * This function creates a kset structure dynamically and registers it * with sysfs. When you are finished with this structure, call * kset_unregister() and the structure will be dynamically freed when it * is no longer being used. * * If the kset was not able to be created, NULL will be returned. */ struct kset *kset_create_and_add(const char *name, const struct kset_uevent_ops *uevent_ops, struct kobject *parent_kobj) { struct kset *kset; int error; kset = kset_create(name, uevent_ops, parent_kobj); if (!kset) return NULL; error = kset_register(kset); if (error) { kfree(kset); return NULL; } return kset; } EXPORT_SYMBOL_GPL(kset_create_and_add); static DEFINE_SPINLOCK(kobj_ns_type_lock); static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES]; int kobj_ns_type_register(const struct kobj_ns_type_operations *ops) { enum kobj_ns_type type = ops->type; int error; spin_lock(&kobj_ns_type_lock); error = -EINVAL; if (!kobj_ns_type_is_valid(type)) goto out; error = -EBUSY; if (kobj_ns_ops_tbl[type]) goto out; error = 0; kobj_ns_ops_tbl[type] = ops; out: spin_unlock(&kobj_ns_type_lock); return error; } int kobj_ns_type_registered(enum kobj_ns_type type) { int registered = 0; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type)) registered = kobj_ns_ops_tbl[type] != NULL; spin_unlock(&kobj_ns_type_lock); return registered; } const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent) { const struct kobj_ns_type_operations *ops = NULL; if (parent && parent->ktype && parent->ktype->child_ns_type) ops = parent->ktype->child_ns_type(parent); return ops; } const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj) { return kobj_child_ns_ops(kobj->parent); } bool kobj_ns_current_may_mount(enum kobj_ns_type type) { bool may_mount = true; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) may_mount = kobj_ns_ops_tbl[type]->current_may_mount(); spin_unlock(&kobj_ns_type_lock); return may_mount; } void *kobj_ns_grab_current(enum kobj_ns_type type) { void *ns = NULL; spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type]) ns = kobj_ns_ops_tbl[type]->grab_current_ns(); spin_unlock(&kobj_ns_type_lock); return ns; } EXPORT_SYMBOL_GPL(kobj_ns_grab_current); void kobj_ns_drop(enum kobj_ns_type type, void *ns) { spin_lock(&kobj_ns_type_lock); if (kobj_ns_type_is_valid(type) && kobj_ns_ops_tbl[type] && kobj_ns_ops_tbl[type]->drop_ns) kobj_ns_ops_tbl[type]->drop_ns(ns); spin_unlock(&kobj_ns_type_lock); } EXPORT_SYMBOL_GPL(kobj_ns_drop);
18 33 90 90 90 90 33 33 33 33 91 18 247 247 247 33 102 90 90 90 90 90 90 90 90 90 90 90 46 42 8 8 48 44 4 47 35 35 35 35 41 5 5 5 46 35 6 6 6 40 46 19 41 41 25 18 25 18 22 19 25 16 39 39 39 39 39 25 16 18 18 18 18 227 90 90 90 90 89 90 90 90 90 89 93 93 93 93 92 11 4 8 8 4 89 89 90 90 16 74 3 3 3 3 90 260 1 7 8 8 247 247 246 247 227 21 247 247 247 76 76 76 76 6 76 7 75 62 61 75 76 82 5 79 54 1 5 24 29 5 19 3 17 19 19 19 4 16 19 18 47 247 247 247 247 247 21 21 227 18 18 18 28 21 21 7 1 7 118 247 18 18 18 21 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 /* * Resizable virtual memory filesystem for Linux. * * Copyright (C) 2000 Linus Torvalds. * 2000 Transmeta Corp. * 2000-2001 Christoph Rohland * 2000-2001 SAP AG * 2002 Red Hat Inc. * Copyright (C) 2002-2011 Hugh Dickins. * Copyright (C) 2011 Google Inc. * Copyright (C) 2002-2005 VERITAS Software Corporation. * Copyright (C) 2004 Andi Kleen, SuSE Labs * * Extended attribute support for tmpfs: * Copyright (c) 2004, Luke Kenneth Casson Leighton <lkcl@lkcl.net> * Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> * * tiny-shmem: * Copyright (c) 2004, 2008 Matt Mackall <mpm@selenic.com> * * This file is released under the GPL. */ #include <linux/fs.h> #include <linux/init.h> #include <linux/vfs.h> #include <linux/mount.h> #include <linux/ramfs.h> #include <linux/pagemap.h> #include <linux/file.h> #include <linux/fileattr.h> #include <linux/mm.h> #include <linux/random.h> #include <linux/sched/signal.h> #include <linux/export.h> #include <linux/shmem_fs.h> #include <linux/swap.h> #include <linux/uio.h> #include <linux/hugetlb.h> #include <linux/fs_parser.h> #include <linux/swapfile.h> #include <linux/iversion.h> #include <linux/unicode.h> #include "swap.h" static struct vfsmount *shm_mnt __ro_after_init; #ifdef CONFIG_SHMEM /* * This virtual memory filesystem is heavily based on the ramfs. It * extends ramfs by the ability to use swap and honor resource limits * which makes it a completely usable filesystem. */ #include <linux/xattr.h> #include <linux/exportfs.h> #include <linux/posix_acl.h> #include <linux/posix_acl_xattr.h> #include <linux/mman.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/backing-dev.h> #include <linux/writeback.h> #include <linux/pagevec.h> #include <linux/percpu_counter.h> #include <linux/falloc.h> #include <linux/splice.h> #include <linux/security.h> #include <linux/swapops.h> #include <linux/mempolicy.h> #include <linux/namei.h> #include <linux/ctype.h> #include <linux/migrate.h> #include <linux/highmem.h> #include <linux/seq_file.h> #include <linux/magic.h> #include <linux/syscalls.h> #include <linux/fcntl.h> #include <uapi/linux/memfd.h> #include <linux/rmap.h> #include <linux/uuid.h> #include <linux/quotaops.h> #include <linux/rcupdate_wait.h> #include <linux/uaccess.h> #include "internal.h" #define BLOCKS_PER_PAGE (PAGE_SIZE/512) #define VM_ACCT(size) (PAGE_ALIGN(size) >> PAGE_SHIFT) /* Pretend that each entry is of this size in directory's i_size */ #define BOGO_DIRENT_SIZE 20 /* Pretend that one inode + its dentry occupy this much memory */ #define BOGO_INODE_SIZE 1024 /* Symlink up to this size is kmalloc'ed instead of using a swappable page */ #define SHORT_SYMLINK_LEN 128 /* * shmem_fallocate communicates with shmem_fault or shmem_writepage via * inode->i_private (with i_rwsem making sure that it has only one user at * a time): we would prefer not to enlarge the shmem inode just for that. */ struct shmem_falloc { wait_queue_head_t *waitq; /* faults into hole wait for punch to end */ pgoff_t start; /* start of range currently being fallocated */ pgoff_t next; /* the next page offset to be fallocated */ pgoff_t nr_falloced; /* how many new pages have been fallocated */ pgoff_t nr_unswapped; /* how often writepage refused to swap out */ }; struct shmem_options { unsigned long long blocks; unsigned long long inodes; struct mempolicy *mpol; kuid_t uid; kgid_t gid; umode_t mode; bool full_inums; int huge; int seen; bool noswap; unsigned short quota_types; struct shmem_quota_limits qlimits; #if IS_ENABLED(CONFIG_UNICODE) struct unicode_map *encoding; bool strict_encoding; #endif #define SHMEM_SEEN_BLOCKS 1 #define SHMEM_SEEN_INODES 2 #define SHMEM_SEEN_HUGE 4 #define SHMEM_SEEN_INUMS 8 #define SHMEM_SEEN_NOSWAP 16 #define SHMEM_SEEN_QUOTA 32 }; #ifdef CONFIG_TRANSPARENT_HUGEPAGE static unsigned long huge_shmem_orders_always __read_mostly; static unsigned long huge_shmem_orders_madvise __read_mostly; static unsigned long huge_shmem_orders_inherit __read_mostly; static unsigned long huge_shmem_orders_within_size __read_mostly; static bool shmem_orders_configured __initdata; #endif #ifdef CONFIG_TMPFS static unsigned long shmem_default_max_blocks(void) { return totalram_pages() / 2; } static unsigned long shmem_default_max_inodes(void) { unsigned long nr_pages = totalram_pages(); return min3(nr_pages - totalhigh_pages(), nr_pages / 2, ULONG_MAX / BOGO_INODE_SIZE); } #endif static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct folio **foliop, enum sgp_type sgp, gfp_t gfp, struct vm_area_struct *vma, vm_fault_t *fault_type); static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb) { return sb->s_fs_info; } /* * shmem_file_setup pre-accounts the whole fixed size of a VM object, * for shared memory and for shared anonymous (/dev/zero) mappings * (unless MAP_NORESERVE and sysctl_overcommit_memory <= 1), * consistent with the pre-accounting of private mappings ... */ static inline int shmem_acct_size(unsigned long flags, loff_t size) { return (flags & VM_NORESERVE) ? 0 : security_vm_enough_memory_mm(current->mm, VM_ACCT(size)); } static inline void shmem_unacct_size(unsigned long flags, loff_t size) { if (!(flags & VM_NORESERVE)) vm_unacct_memory(VM_ACCT(size)); } static inline int shmem_reacct_size(unsigned long flags, loff_t oldsize, loff_t newsize) { if (!(flags & VM_NORESERVE)) { if (VM_ACCT(newsize) > VM_ACCT(oldsize)) return security_vm_enough_memory_mm(current->mm, VM_ACCT(newsize) - VM_ACCT(oldsize)); else if (VM_ACCT(newsize) < VM_ACCT(oldsize)) vm_unacct_memory(VM_ACCT(oldsize) - VM_ACCT(newsize)); } return 0; } /* * ... whereas tmpfs objects are accounted incrementally as * pages are allocated, in order to allow large sparse files. * shmem_get_folio reports shmem_acct_blocks failure as -ENOSPC not -ENOMEM, * so that a failure on a sparse tmpfs mapping will give SIGBUS not OOM. */ static inline int shmem_acct_blocks(unsigned long flags, long pages) { if (!(flags & VM_NORESERVE)) return 0; return security_vm_enough_memory_mm(current->mm, pages * VM_ACCT(PAGE_SIZE)); } static inline void shmem_unacct_blocks(unsigned long flags, long pages) { if (flags & VM_NORESERVE) vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE)); } static int shmem_inode_acct_blocks(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); int err = -ENOSPC; if (shmem_acct_blocks(info->flags, pages)) return err; might_sleep(); /* when quotas */ if (sbinfo->max_blocks) { if (!percpu_counter_limited_add(&sbinfo->used_blocks, sbinfo->max_blocks, pages)) goto unacct; err = dquot_alloc_block_nodirty(inode, pages); if (err) { percpu_counter_sub(&sbinfo->used_blocks, pages); goto unacct; } } else { err = dquot_alloc_block_nodirty(inode, pages); if (err) goto unacct; } return 0; unacct: shmem_unacct_blocks(info->flags, pages); return err; } static void shmem_inode_unacct_blocks(struct inode *inode, long pages) { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); might_sleep(); /* when quotas */ dquot_free_block_nodirty(inode, pages); if (sbinfo->max_blocks) percpu_counter_sub(&sbinfo->used_blocks, pages); shmem_unacct_blocks(info->flags, pages); } static const struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; static const struct inode_operations shmem_inode_operations; static const struct inode_operations shmem_dir_inode_operations; static const struct inode_operations shmem_special_inode_operations; static const struct vm_operations_struct shmem_vm_ops; static const struct vm_operations_struct shmem_anon_vm_ops; static struct file_system_type shmem_fs_type; bool shmem_mapping(struct address_space *mapping) { return mapping->a_ops == &shmem_aops; } EXPORT_SYMBOL_GPL(shmem_mapping); bool vma_is_anon_shmem(struct vm_area_struct *vma) { return vma->vm_ops == &shmem_anon_vm_ops; } bool vma_is_shmem(struct vm_area_struct *vma) { return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops; } static LIST_HEAD(shmem_swaplist); static DEFINE_MUTEX(shmem_swaplist_mutex); #ifdef CONFIG_TMPFS_QUOTA static int shmem_enable_quotas(struct super_block *sb, unsigned short quota_types) { int type, err = 0; sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY; for (type = 0; type < SHMEM_MAXQUOTAS; type++) { if (!(quota_types & (1 << type))) continue; err = dquot_load_quota_sb(sb, type, QFMT_SHMEM, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); if (err) goto out_err; } return 0; out_err: pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n", type, err); for (type--; type >= 0; type--) dquot_quota_off(sb, type); return err; } static void shmem_disable_quotas(struct super_block *sb) { int type; for (type = 0; type < SHMEM_MAXQUOTAS; type++) dquot_quota_off(sb, type); } static struct dquot __rcu **shmem_get_dquots(struct inode *inode) { return SHMEM_I(inode)->i_dquot; } #endif /* CONFIG_TMPFS_QUOTA */ /* * shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and * produces a novel ino for the newly allocated inode. * * It may also be called when making a hard link to permit the space needed by * each dentry. However, in that case, no new inode number is needed since that * internally draws from another pool of inode numbers (currently global * get_next_ino()). This case is indicated by passing NULL as inop. */ #define SHMEM_INO_BATCH 1024 static int shmem_reserve_inode(struct super_block *sb, ino_t *inop) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); ino_t ino; if (!(sb->s_flags & SB_KERNMOUNT)) { raw_spin_lock(&sbinfo->stat_lock); if (sbinfo->max_inodes) { if (sbinfo->free_ispace < BOGO_INODE_SIZE) { raw_spin_unlock(&sbinfo->stat_lock); return -ENOSPC; } sbinfo->free_ispace -= BOGO_INODE_SIZE; } if (inop) { ino = sbinfo->next_ino++; if (unlikely(is_zero_ino(ino))) ino = sbinfo->next_ino++; if (unlikely(!sbinfo->full_inums && ino > UINT_MAX)) { /* * Emulate get_next_ino uint wraparound for * compatibility */ if (IS_ENABLED(CONFIG_64BIT)) pr_warn("%s: inode number overflow on device %d, consider using inode64 mount option\n", __func__, MINOR(sb->s_dev)); sbinfo->next_ino = 1; ino = sbinfo->next_ino++; } *inop = ino; } raw_spin_unlock(&sbinfo->stat_lock); } else if (inop) { /* * __shmem_file_setup, one of our callers, is lock-free: it * doesn't hold stat_lock in shmem_reserve_inode since * max_inodes is always 0, and is called from potentially * unknown contexts. As such, use a per-cpu batched allocator * which doesn't require the per-sb stat_lock unless we are at * the batch boundary. * * We don't need to worry about inode{32,64} since SB_KERNMOUNT * shmem mounts are not exposed to userspace, so we don't need * to worry about things like glibc compatibility. */ ino_t *next_ino; next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu()); ino = *next_ino; if (unlikely(ino % SHMEM_INO_BATCH == 0)) { raw_spin_lock(&sbinfo->stat_lock); ino = sbinfo->next_ino; sbinfo->next_ino += SHMEM_INO_BATCH; raw_spin_unlock(&sbinfo->stat_lock); if (unlikely(is_zero_ino(ino))) ino++; } *inop = ino; *next_ino = ++ino; put_cpu(); } return 0; } static void shmem_free_inode(struct super_block *sb, size_t freed_ispace) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); if (sbinfo->max_inodes) { raw_spin_lock(&sbinfo->stat_lock); sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace; raw_spin_unlock(&sbinfo->stat_lock); } } /** * shmem_recalc_inode - recalculate the block usage of an inode * @inode: inode to recalc * @alloced: the change in number of pages allocated to inode * @swapped: the change in number of pages swapped from inode * * We have to calculate the free blocks since the mm can drop * undirtied hole pages behind our back. * * But normally info->alloced == inode->i_mapping->nrpages + info->swapped * So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped) */ static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped) { struct shmem_inode_info *info = SHMEM_I(inode); long freed; spin_lock(&info->lock); info->alloced += alloced; info->swapped += swapped; freed = info->alloced - info->swapped - READ_ONCE(inode->i_mapping->nrpages); /* * Special case: whereas normally shmem_recalc_inode() is called * after i_mapping->nrpages has already been adjusted (up or down), * shmem_writepage() has to raise swapped before nrpages is lowered - * to stop a racing shmem_recalc_inode() from thinking that a page has * been freed. Compensate here, to avoid the need for a followup call. */ if (swapped > 0) freed += swapped; if (freed > 0) info->alloced -= freed; spin_unlock(&info->lock); /* The quota case may block */ if (freed > 0) shmem_inode_unacct_blocks(inode, freed); } bool shmem_charge(struct inode *inode, long pages) { struct address_space *mapping = inode->i_mapping; if (shmem_inode_acct_blocks(inode, pages)) return false; /* nrpages adjustment first, then shmem_recalc_inode() when balanced */ xa_lock_irq(&mapping->i_pages); mapping->nrpages += pages; xa_unlock_irq(&mapping->i_pages); shmem_recalc_inode(inode, pages, 0); return true; } void shmem_uncharge(struct inode *inode, long pages) { /* pages argument is currently unused: keep it to help debugging */ /* nrpages adjustment done by __filemap_remove_folio() or caller */ shmem_recalc_inode(inode, 0, 0); } /* * Replace item expected in xarray by a new item, while holding xa_lock. */ static int shmem_replace_entry(struct address_space *mapping, pgoff_t index, void *expected, void *replacement) { XA_STATE(xas, &mapping->i_pages, index); void *item; VM_BUG_ON(!expected); VM_BUG_ON(!replacement); item = xas_load(&xas); if (item != expected) return -ENOENT; xas_store(&xas, replacement); return 0; } /* * Sometimes, before we decide whether to proceed or to fail, we must check * that an entry was not already brought back from swap by a racing thread. * * Checking folio is not enough: by the time a swapcache folio is locked, it * might be reused, and again be swapcache, using the same swap as before. */ static bool shmem_confirm_swap(struct address_space *mapping, pgoff_t index, swp_entry_t swap) { return xa_load(&mapping->i_pages, index) == swp_to_radix_entry(swap); } /* * Definitions for "huge tmpfs": tmpfs mounted with the huge= option * * SHMEM_HUGE_NEVER: * disables huge pages for the mount; * SHMEM_HUGE_ALWAYS: * enables huge pages for the mount; * SHMEM_HUGE_WITHIN_SIZE: * only allocate huge pages if the page will be fully within i_size, * also respect fadvise()/madvise() hints; * SHMEM_HUGE_ADVISE: * only allocate huge pages if requested with fadvise()/madvise(); */ #define SHMEM_HUGE_NEVER 0 #define SHMEM_HUGE_ALWAYS 1 #define SHMEM_HUGE_WITHIN_SIZE 2 #define SHMEM_HUGE_ADVISE 3 /* * Special values. * Only can be set via /sys/kernel/mm/transparent_hugepage/shmem_enabled: * * SHMEM_HUGE_DENY: * disables huge on shm_mnt and all mounts, for emergency use; * SHMEM_HUGE_FORCE: * enables huge on shm_mnt and all mounts, w/o needing option, for testing; * */ #define SHMEM_HUGE_DENY (-1) #define SHMEM_HUGE_FORCE (-2) #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* ifdef here to avoid bloating shmem.o when not necessary */ static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; static int tmpfs_huge __read_mostly = SHMEM_HUGE_NEVER; /** * shmem_mapping_size_orders - Get allowable folio orders for the given file size. * @mapping: Target address_space. * @index: The page index. * @write_end: end of a write, could extend inode size. * * This returns huge orders for folios (when supported) based on the file size * which the mapping currently allows at the given index. The index is relevant * due to alignment considerations the mapping might have. The returned order * may be less than the size passed. * * Return: The orders. */ static inline unsigned int shmem_mapping_size_orders(struct address_space *mapping, pgoff_t index, loff_t write_end) { unsigned int order; size_t size; if (!mapping_large_folio_support(mapping) || !write_end) return 0; /* Calculate the write size based on the write_end */ size = write_end - (index << PAGE_SHIFT); order = filemap_get_order(size); if (!order) return 0; /* If we're not aligned, allocate a smaller folio */ if (index & ((1UL << order) - 1)) order = __ffs(index); order = min_t(size_t, order, MAX_PAGECACHE_ORDER); return order > 0 ? BIT(order + 1) - 1 : 0; } static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index, loff_t write_end, bool shmem_huge_force, struct vm_area_struct *vma, unsigned long vm_flags) { unsigned int maybe_pmd_order = HPAGE_PMD_ORDER > MAX_PAGECACHE_ORDER ? 0 : BIT(HPAGE_PMD_ORDER); unsigned long within_size_orders; unsigned int order; pgoff_t aligned_index; loff_t i_size; if (!S_ISREG(inode->i_mode)) return 0; if (shmem_huge == SHMEM_HUGE_DENY) return 0; if (shmem_huge_force || shmem_huge == SHMEM_HUGE_FORCE) return maybe_pmd_order; /* * The huge order allocation for anon shmem is controlled through * the mTHP interface, so we still use PMD-sized huge order to * check whether global control is enabled. * * For tmpfs mmap()'s huge order, we still use PMD-sized order to * allocate huge pages due to lack of a write size hint. * * Otherwise, tmpfs will allow getting a highest order hint based on * the size of write and fallocate paths, then will try each allowable * huge orders. */ switch (SHMEM_SB(inode->i_sb)->huge) { case SHMEM_HUGE_ALWAYS: if (vma) return maybe_pmd_order; return shmem_mapping_size_orders(inode->i_mapping, index, write_end); case SHMEM_HUGE_WITHIN_SIZE: if (vma) within_size_orders = maybe_pmd_order; else within_size_orders = shmem_mapping_size_orders(inode->i_mapping, index, write_end); order = highest_order(within_size_orders); while (within_size_orders) { aligned_index = round_up(index + 1, 1 << order); i_size = max(write_end, i_size_read(inode)); i_size = round_up(i_size, PAGE_SIZE); if (i_size >> PAGE_SHIFT >= aligned_index) return within_size_orders; order = next_order(&within_size_orders, order); } fallthrough; case SHMEM_HUGE_ADVISE: if (vm_flags & VM_HUGEPAGE) return maybe_pmd_order; fallthrough; default: return 0; } } static int shmem_parse_huge(const char *str) { int huge; if (!str) return -EINVAL; if (!strcmp(str, "never")) huge = SHMEM_HUGE_NEVER; else if (!strcmp(str, "always")) huge = SHMEM_HUGE_ALWAYS; else if (!strcmp(str, "within_size")) huge = SHMEM_HUGE_WITHIN_SIZE; else if (!strcmp(str, "advise")) huge = SHMEM_HUGE_ADVISE; else if (!strcmp(str, "deny")) huge = SHMEM_HUGE_DENY; else if (!strcmp(str, "force")) huge = SHMEM_HUGE_FORCE; else return -EINVAL; if (!has_transparent_hugepage() && huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY) return -EINVAL; /* Do not override huge allocation policy with non-PMD sized mTHP */ if (huge == SHMEM_HUGE_FORCE && huge_shmem_orders_inherit != BIT(HPAGE_PMD_ORDER)) return -EINVAL; return huge; } #if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) static const char *shmem_format_huge(int huge) { switch (huge) { case SHMEM_HUGE_NEVER: return "never"; case SHMEM_HUGE_ALWAYS: return "always"; case SHMEM_HUGE_WITHIN_SIZE: return "within_size"; case SHMEM_HUGE_ADVISE: return "advise"; case SHMEM_HUGE_DENY: return "deny"; case SHMEM_HUGE_FORCE: return "force"; default: VM_BUG_ON(1); return "bad_val"; } } #endif static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shrink_control *sc, unsigned long nr_to_free) { LIST_HEAD(list), *pos, *next; struct inode *inode; struct shmem_inode_info *info; struct folio *folio; unsigned long batch = sc ? sc->nr_to_scan : 128; unsigned long split = 0, freed = 0; if (list_empty(&sbinfo->shrinklist)) return SHRINK_STOP; spin_lock(&sbinfo->shrinklist_lock); list_for_each_safe(pos, next, &sbinfo->shrinklist) { info = list_entry(pos, struct shmem_inode_info, shrinklist); /* pin the inode */ inode = igrab(&info->vfs_inode); /* inode is about to be evicted */ if (!inode) { list_del_init(&info->shrinklist); goto next; } list_move(&info->shrinklist, &list); next: sbinfo->shrinklist_len--; if (!--batch) break; } spin_unlock(&sbinfo->shrinklist_lock); list_for_each_safe(pos, next, &list) { pgoff_t next, end; loff_t i_size; int ret; info = list_entry(pos, struct shmem_inode_info, shrinklist); inode = &info->vfs_inode; if (nr_to_free && freed >= nr_to_free) goto move_back; i_size = i_size_read(inode); folio = filemap_get_entry(inode->i_mapping, i_size / PAGE_SIZE); if (!folio || xa_is_value(folio)) goto drop; /* No large folio at the end of the file: nothing to split */ if (!folio_test_large(folio)) { folio_put(folio); goto drop; } /* Check if there is anything to gain from splitting */ next = folio_next_index(folio); end = shmem_fallocend(inode, DIV_ROUND_UP(i_size, PAGE_SIZE)); if (end <= folio->index || end >= next) { folio_put(folio); goto drop; } /* * Move the inode on the list back to shrinklist if we failed * to lock the page at this time. * * Waiting for the lock may lead to deadlock in the * reclaim path. */ if (!folio_trylock(folio)) { folio_put(folio); goto move_back; } ret = split_folio(folio); folio_unlock(folio); folio_put(folio); /* If split failed move the inode on the list back to shrinklist */ if (ret) goto move_back; freed += next - end; split++; drop: list_del_init(&info->shrinklist); goto put; move_back: /* * Make sure the inode is either on the global list or deleted * from any local list before iput() since it could be deleted * in another thread once we put the inode (then the local list * is corrupted). */ spin_lock(&sbinfo->shrinklist_lock); list_move(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; spin_unlock(&sbinfo->shrinklist_lock); put: iput(inode); } return split; } static long shmem_unused_huge_scan(struct super_block *sb, struct shrink_control *sc) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); if (!READ_ONCE(sbinfo->shrinklist_len)) return SHRINK_STOP; return shmem_unused_huge_shrink(sbinfo, sc, 0); } static long shmem_unused_huge_count(struct super_block *sb, struct shrink_control *sc) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); return READ_ONCE(sbinfo->shrinklist_len); } #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ #define shmem_huge SHMEM_HUGE_DENY static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shrink_control *sc, unsigned long nr_to_free) { return 0; } static unsigned int shmem_huge_global_enabled(struct inode *inode, pgoff_t index, loff_t write_end, bool shmem_huge_force, struct vm_area_struct *vma, unsigned long vm_flags) { return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static void shmem_update_stats(struct folio *folio, int nr_pages) { if (folio_test_pmd_mappable(folio)) __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr_pages); __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_pages); __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_pages); } /* * Somewhat like filemap_add_folio, but error if expected item has gone. */ static int shmem_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t index, void *expected, gfp_t gfp) { XA_STATE_ORDER(xas, &mapping->i_pages, index, folio_order(folio)); long nr = folio_nr_pages(folio); VM_BUG_ON_FOLIO(index != round_down(index, nr), folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_swapbacked(folio), folio); folio_ref_add(folio, nr); folio->mapping = mapping; folio->index = index; gfp &= GFP_RECLAIM_MASK; folio_throttle_swaprate(folio, gfp); do { xas_lock_irq(&xas); if (expected != xas_find_conflict(&xas)) { xas_set_err(&xas, -EEXIST); goto unlock; } if (expected && xas_find_conflict(&xas)) { xas_set_err(&xas, -EEXIST); goto unlock; } xas_store(&xas, folio); if (xas_error(&xas)) goto unlock; shmem_update_stats(folio, nr); mapping->nrpages += nr; unlock: xas_unlock_irq(&xas); } while (xas_nomem(&xas, gfp)); if (xas_error(&xas)) { folio->mapping = NULL; folio_ref_sub(folio, nr); return xas_error(&xas); } return 0; } /* * Somewhat like filemap_remove_folio, but substitutes swap for @folio. */ static void shmem_delete_from_page_cache(struct folio *folio, void *radswap) { struct address_space *mapping = folio->mapping; long nr = folio_nr_pages(folio); int error; xa_lock_irq(&mapping->i_pages); error = shmem_replace_entry(mapping, folio->index, folio, radswap); folio->mapping = NULL; mapping->nrpages -= nr; shmem_update_stats(folio, -nr); xa_unlock_irq(&mapping->i_pages); folio_put_refs(folio, nr); BUG_ON(error); } /* * Remove swap entry from page cache, free the swap and its page cache. Returns * the number of pages being freed. 0 means entry not found in XArray (0 pages * being freed). */ static long shmem_free_swap(struct address_space *mapping, pgoff_t index, void *radswap) { int order = xa_get_order(&mapping->i_pages, index); void *old; old = xa_cmpxchg_irq(&mapping->i_pages, index, radswap, NULL, 0); if (old != radswap) return 0; free_swap_and_cache_nr(radix_to_swp_entry(radswap), 1 << order); return 1 << order; } /* * Determine (in bytes) how many of the shmem object's pages mapped by the * given offsets are swapped out. * * This is safe to call without i_rwsem or the i_pages lock thanks to RCU, * as long as the inode doesn't go away and racy results are not a problem. */ unsigned long shmem_partial_swap_usage(struct address_space *mapping, pgoff_t start, pgoff_t end) { XA_STATE(xas, &mapping->i_pages, start); struct page *page; unsigned long swapped = 0; unsigned long max = end - 1; rcu_read_lock(); xas_for_each(&xas, page, max) { if (xas_retry(&xas, page)) continue; if (xa_is_value(page)) swapped += 1 << xas_get_order(&xas); if (xas.xa_index == max) break; if (need_resched()) { xas_pause(&xas); cond_resched_rcu(); } } rcu_read_unlock(); return swapped << PAGE_SHIFT; } /* * Determine (in bytes) how many of the shmem object's pages mapped by the * given vma is swapped out. * * This is safe to call without i_rwsem or the i_pages lock thanks to RCU, * as long as the inode doesn't go away and racy results are not a problem. */ unsigned long shmem_swap_usage(struct vm_area_struct *vma) { struct inode *inode = file_inode(vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); struct address_space *mapping = inode->i_mapping; unsigned long swapped; /* Be careful as we don't hold info->lock */ swapped = READ_ONCE(info->swapped); /* * The easier cases are when the shmem object has nothing in swap, or * the vma maps it whole. Then we can simply use the stats that we * already track. */ if (!swapped) return 0; if (!vma->vm_pgoff && vma->vm_end - vma->vm_start >= inode->i_size) return swapped << PAGE_SHIFT; /* Here comes the more involved part */ return shmem_partial_swap_usage(mapping, vma->vm_pgoff, vma->vm_pgoff + vma_pages(vma)); } /* * SysV IPC SHM_UNLOCK restore Unevictable pages to their evictable lists. */ void shmem_unlock_mapping(struct address_space *mapping) { struct folio_batch fbatch; pgoff_t index = 0; folio_batch_init(&fbatch); /* * Minor point, but we might as well stop if someone else SHM_LOCKs it. */ while (!mapping_unevictable(mapping) && filemap_get_folios(mapping, &index, ~0UL, &fbatch)) { check_move_unevictable_folios(&fbatch); folio_batch_release(&fbatch); cond_resched(); } } static struct folio *shmem_get_partial_folio(struct inode *inode, pgoff_t index) { struct folio *folio; /* * At first avoid shmem_get_folio(,,,SGP_READ): that fails * beyond i_size, and reports fallocated folios as holes. */ folio = filemap_get_entry(inode->i_mapping, index); if (!folio) return folio; if (!xa_is_value(folio)) { folio_lock(folio); if (folio->mapping == inode->i_mapping) return folio; /* The folio has been swapped out */ folio_unlock(folio); folio_put(folio); } /* * But read a folio back from swap if any of it is within i_size * (although in some cases this is just a waste of time). */ folio = NULL; shmem_get_folio(inode, index, 0, &folio, SGP_READ); return folio; } /* * Remove range of pages and swap entries from page cache, and free them. * If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate. */ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, bool unfalloc) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t start = (lstart + PAGE_SIZE - 1) >> PAGE_SHIFT; pgoff_t end = (lend + 1) >> PAGE_SHIFT; struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; struct folio *folio; bool same_folio; long nr_swaps_freed = 0; pgoff_t index; int i; if (lend == -1) end = -1; /* unsigned, so actually very big */ if (info->fallocend > start && info->fallocend <= end && !unfalloc) info->fallocend = start; folio_batch_init(&fbatch); index = start; while (index < end && find_lock_entries(mapping, &index, end - 1, &fbatch, indices)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { folio = fbatch.folios[i]; if (xa_is_value(folio)) { if (unfalloc) continue; nr_swaps_freed += shmem_free_swap(mapping, indices[i], folio); continue; } if (!unfalloc || !folio_test_uptodate(folio)) truncate_inode_folio(mapping, folio); folio_unlock(folio); } folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); cond_resched(); } /* * When undoing a failed fallocate, we want none of the partial folio * zeroing and splitting below, but shall want to truncate the whole * folio when !uptodate indicates that it was added by this fallocate, * even when [lstart, lend] covers only a part of the folio. */ if (unfalloc) goto whole_folios; same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT); folio = shmem_get_partial_folio(inode, lstart >> PAGE_SHIFT); if (folio) { same_folio = lend < folio_pos(folio) + folio_size(folio); folio_mark_dirty(folio); if (!truncate_inode_partial_folio(folio, lstart, lend)) { start = folio_next_index(folio); if (same_folio) end = folio->index; } folio_unlock(folio); folio_put(folio); folio = NULL; } if (!same_folio) folio = shmem_get_partial_folio(inode, lend >> PAGE_SHIFT); if (folio) { folio_mark_dirty(folio); if (!truncate_inode_partial_folio(folio, lstart, lend)) end = folio->index; folio_unlock(folio); folio_put(folio); } whole_folios: index = start; while (index < end) { cond_resched(); if (!find_get_entries(mapping, &index, end - 1, &fbatch, indices)) { /* If all gone or hole-punch or unfalloc, we're done */ if (index == start || end != -1) break; /* But if truncating, restart to make sure all gone */ index = start; continue; } for (i = 0; i < folio_batch_count(&fbatch); i++) { folio = fbatch.folios[i]; if (xa_is_value(folio)) { long swaps_freed; if (unfalloc) continue; swaps_freed = shmem_free_swap(mapping, indices[i], folio); if (!swaps_freed) { /* Swap was replaced by page: retry */ index = indices[i]; break; } nr_swaps_freed += swaps_freed; continue; } folio_lock(folio); if (!unfalloc || !folio_test_uptodate(folio)) { if (folio_mapping(folio) != mapping) { /* Page was replaced by swap: retry */ folio_unlock(folio); index = indices[i]; break; } VM_BUG_ON_FOLIO(folio_test_writeback(folio), folio); if (!folio_test_large(folio)) { truncate_inode_folio(mapping, folio); } else if (truncate_inode_partial_folio(folio, lstart, lend)) { /* * If we split a page, reset the loop so * that we pick up the new sub pages. * Otherwise the THP was entirely * dropped or the target range was * zeroed, so just continue the loop as * is. */ if (!folio_test_large(folio)) { folio_unlock(folio); index = start; break; } } } folio_unlock(folio); } folio_batch_remove_exceptionals(&fbatch); folio_batch_release(&fbatch); } shmem_recalc_inode(inode, 0, -nr_swaps_freed); } void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { shmem_undo_range(inode, lstart, lend, false); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); inode_inc_iversion(inode); } EXPORT_SYMBOL_GPL(shmem_truncate_range); static int shmem_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = path->dentry->d_inode; struct shmem_inode_info *info = SHMEM_I(inode); if (info->alloced - info->swapped != inode->i_mapping->nrpages) shmem_recalc_inode(inode, 0, 0); if (info->fsflags & FS_APPEND_FL) stat->attributes |= STATX_ATTR_APPEND; if (info->fsflags & FS_IMMUTABLE_FL) stat->attributes |= STATX_ATTR_IMMUTABLE; if (info->fsflags & FS_NODUMP_FL) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= (STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP); generic_fillattr(idmap, request_mask, inode, stat); if (shmem_huge_global_enabled(inode, 0, 0, false, NULL, 0)) stat->blksize = HPAGE_PMD_SIZE; if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime.tv_sec = info->i_crtime.tv_sec; stat->btime.tv_nsec = info->i_crtime.tv_nsec; } return 0; } static int shmem_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); struct shmem_inode_info *info = SHMEM_I(inode); int error; bool update_mtime = false; bool update_ctime = true; error = setattr_prepare(idmap, dentry, attr); if (error) return error; if ((info->seals & F_SEAL_EXEC) && (attr->ia_valid & ATTR_MODE)) { if ((inode->i_mode ^ attr->ia_mode) & 0111) { return -EPERM; } } if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { loff_t oldsize = inode->i_size; loff_t newsize = attr->ia_size; /* protected by i_rwsem */ if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) || (newsize > oldsize && (info->seals & F_SEAL_GROW))) return -EPERM; if (newsize != oldsize) { error = shmem_reacct_size(SHMEM_I(inode)->flags, oldsize, newsize); if (error) return error; i_size_write(inode, newsize); update_mtime = true; } else { update_ctime = false; } if (newsize <= oldsize) { loff_t holebegin = round_up(newsize, PAGE_SIZE); if (oldsize > holebegin) unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); if (info->alloced) shmem_truncate_range(inode, newsize, (loff_t)-1); /* unmap again to remove racily COWed private pages */ if (oldsize > holebegin) unmap_mapping_range(inode->i_mapping, holebegin, 0, 1); } } if (is_quota_modification(idmap, inode, attr)) { error = dquot_initialize(inode); if (error) return error; } /* Transfer quota accounting */ if (i_uid_needs_update(idmap, attr, inode) || i_gid_needs_update(idmap, attr, inode)) { error = dquot_transfer(idmap, inode, attr); if (error) return error; } setattr_copy(idmap, inode, attr); if (attr->ia_valid & ATTR_MODE) error = posix_acl_chmod(idmap, dentry, inode->i_mode); if (!error && update_ctime) { inode_set_ctime_current(inode); if (update_mtime) inode_set_mtime_to_ts(inode, inode_get_ctime(inode)); inode_inc_iversion(inode); } return error; } static void shmem_evict_inode(struct inode *inode) { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); size_t freed = 0; if (shmem_mapping(inode->i_mapping)) { shmem_unacct_size(info->flags, inode->i_size); inode->i_size = 0; mapping_set_exiting(inode->i_mapping); shmem_truncate_range(inode, 0, (loff_t)-1); if (!list_empty(&info->shrinklist)) { spin_lock(&sbinfo->shrinklist_lock); if (!list_empty(&info->shrinklist)) { list_del_init(&info->shrinklist); sbinfo->shrinklist_len--; } spin_unlock(&sbinfo->shrinklist_lock); } while (!list_empty(&info->swaplist)) { /* Wait while shmem_unuse() is scanning this inode... */ wait_var_event(&info->stop_eviction, !atomic_read(&info->stop_eviction)); mutex_lock(&shmem_swaplist_mutex); /* ...but beware of the race if we peeked too early */ if (!atomic_read(&info->stop_eviction)) list_del_init(&info->swaplist); mutex_unlock(&shmem_swaplist_mutex); } } simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL); shmem_free_inode(inode->i_sb, freed); WARN_ON(inode->i_blocks); clear_inode(inode); #ifdef CONFIG_TMPFS_QUOTA dquot_free_inode(inode); dquot_drop(inode); #endif } static int shmem_find_swap_entries(struct address_space *mapping, pgoff_t start, struct folio_batch *fbatch, pgoff_t *indices, unsigned int type) { XA_STATE(xas, &mapping->i_pages, start); struct folio *folio; swp_entry_t entry; rcu_read_lock(); xas_for_each(&xas, folio, ULONG_MAX) { if (xas_retry(&xas, folio)) continue; if (!xa_is_value(folio)) continue; entry = radix_to_swp_entry(folio); /* * swapin error entries can be found in the mapping. But they're * deliberately ignored here as we've done everything we can do. */ if (swp_type(entry) != type) continue; indices[folio_batch_count(fbatch)] = xas.xa_index; if (!folio_batch_add(fbatch, folio)) break; if (need_resched()) { xas_pause(&xas); cond_resched_rcu(); } } rcu_read_unlock(); return xas.xa_index; } /* * Move the swapped pages for an inode to page cache. Returns the count * of pages swapped in, or the error in case of failure. */ static int shmem_unuse_swap_entries(struct inode *inode, struct folio_batch *fbatch, pgoff_t *indices) { int i = 0; int ret = 0; int error = 0; struct address_space *mapping = inode->i_mapping; for (i = 0; i < folio_batch_count(fbatch); i++) { struct folio *folio = fbatch->folios[i]; if (!xa_is_value(folio)) continue; error = shmem_swapin_folio(inode, indices[i], &folio, SGP_CACHE, mapping_gfp_mask(mapping), NULL, NULL); if (error == 0) { folio_unlock(folio); folio_put(folio); ret++; } if (error == -ENOMEM) break; error = 0; } return error ? error : ret; } /* * If swap found in inode, free it and move page from swapcache to filecache. */ static int shmem_unuse_inode(struct inode *inode, unsigned int type) { struct address_space *mapping = inode->i_mapping; pgoff_t start = 0; struct folio_batch fbatch; pgoff_t indices[PAGEVEC_SIZE]; int ret = 0; do { folio_batch_init(&fbatch); shmem_find_swap_entries(mapping, start, &fbatch, indices, type); if (folio_batch_count(&fbatch) == 0) { ret = 0; break; } ret = shmem_unuse_swap_entries(inode, &fbatch, indices); if (ret < 0) break; start = indices[folio_batch_count(&fbatch) - 1]; } while (true); return ret; } /* * Read all the shared memory data that resides in the swap * device 'type' back into memory, so the swap device can be * unused. */ int shmem_unuse(unsigned int type) { struct shmem_inode_info *info, *next; int error = 0; if (list_empty(&shmem_swaplist)) return 0; mutex_lock(&shmem_swaplist_mutex); list_for_each_entry_safe(info, next, &shmem_swaplist, swaplist) { if (!info->swapped) { list_del_init(&info->swaplist); continue; } /* * Drop the swaplist mutex while searching the inode for swap; * but before doing so, make sure shmem_evict_inode() will not * remove placeholder inode from swaplist, nor let it be freed * (igrab() would protect from unlink, but not from unmount). */ atomic_inc(&info->stop_eviction); mutex_unlock(&shmem_swaplist_mutex); error = shmem_unuse_inode(&info->vfs_inode, type); cond_resched(); mutex_lock(&shmem_swaplist_mutex); next = list_next_entry(info, swaplist); if (!info->swapped) list_del_init(&info->swaplist); if (atomic_dec_and_test(&info->stop_eviction)) wake_up_var(&info->stop_eviction); if (error) break; } mutex_unlock(&shmem_swaplist_mutex); return error; } /* * Move the page from the page cache to the swap cache. */ static int shmem_writepage(struct page *page, struct writeback_control *wbc) { struct folio *folio = page_folio(page); struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); swp_entry_t swap; pgoff_t index; int nr_pages; bool split = false; /* * Our capabilities prevent regular writeback or sync from ever calling * shmem_writepage; but a stacking filesystem might use ->writepage of * its underlying filesystem, in which case tmpfs should write out to * swap only in response to memory pressure, and not for the writeback * threads or sync. */ if (WARN_ON_ONCE(!wbc->for_reclaim)) goto redirty; if (WARN_ON_ONCE((info->flags & VM_LOCKED) || sbinfo->noswap)) goto redirty; if (!total_swap_pages) goto redirty; /* * If CONFIG_THP_SWAP is not enabled, the large folio should be * split when swapping. * * And shrinkage of pages beyond i_size does not split swap, so * swapout of a large folio crossing i_size needs to split too * (unless fallocate has been used to preallocate beyond EOF). */ if (folio_test_large(folio)) { index = shmem_fallocend(inode, DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE)); if ((index > folio->index && index < folio_next_index(folio)) || !IS_ENABLED(CONFIG_THP_SWAP)) split = true; } if (split) { try_split: /* Ensure the subpages are still dirty */ folio_test_set_dirty(folio); if (split_huge_page_to_list_to_order(page, wbc->list, 0)) goto redirty; folio = page_folio(page); folio_clear_dirty(folio); } index = folio->index; nr_pages = folio_nr_pages(folio); /* * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC * value into swapfile.c, the only way we can correctly account for a * fallocated folio arriving here is now to initialize it and write it. * * That's okay for a folio already fallocated earlier, but if we have * not yet completed the fallocation, then (a) we want to keep track * of this folio in case we have to undo it, and (b) it may not be a * good idea to continue anyway, once we're pushing into swap. So * reactivate the folio, and let shmem_fallocate() quit when too many. */ if (!folio_test_uptodate(folio)) { if (inode->i_private) { struct shmem_falloc *shmem_falloc; spin_lock(&inode->i_lock); shmem_falloc = inode->i_private; if (shmem_falloc && !shmem_falloc->waitq && index >= shmem_falloc->start && index < shmem_falloc->next) shmem_falloc->nr_unswapped += nr_pages; else shmem_falloc = NULL; spin_unlock(&inode->i_lock); if (shmem_falloc) goto redirty; } folio_zero_range(folio, 0, folio_size(folio)); flush_dcache_folio(folio); folio_mark_uptodate(folio); } swap = folio_alloc_swap(folio); if (!swap.val) { if (nr_pages > 1) goto try_split; goto redirty; } /* * Add inode to shmem_unuse()'s list of swapped-out inodes, * if it's not already there. Do it now before the folio is * moved to swap cache, when its pagelock no longer protects * the inode from eviction. But don't unlock the mutex until * we've incremented swapped, because shmem_unuse_inode() will * prune a !swapped inode from the swaplist under this mutex. */ mutex_lock(&shmem_swaplist_mutex); if (list_empty(&info->swaplist)) list_add(&info->swaplist, &shmem_swaplist); if (add_to_swap_cache(folio, swap, __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN, NULL) == 0) { shmem_recalc_inode(inode, 0, nr_pages); swap_shmem_alloc(swap, nr_pages); shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap)); mutex_unlock(&shmem_swaplist_mutex); BUG_ON(folio_mapped(folio)); return swap_writepage(&folio->page, wbc); } mutex_unlock(&shmem_swaplist_mutex); put_swap_folio(folio, swap); redirty: folio_mark_dirty(folio); if (wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; /* Return with folio locked */ folio_unlock(folio); return 0; } #if defined(CONFIG_NUMA) && defined(CONFIG_TMPFS) static void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { char buffer[64]; if (!mpol || mpol->mode == MPOL_DEFAULT) return; /* show nothing */ mpol_to_str(buffer, sizeof(buffer), mpol); seq_printf(seq, ",mpol=%s", buffer); } static struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) { struct mempolicy *mpol = NULL; if (sbinfo->mpol) { raw_spin_lock(&sbinfo->stat_lock); /* prevent replace/use races */ mpol = sbinfo->mpol; mpol_get(mpol); raw_spin_unlock(&sbinfo->stat_lock); } return mpol; } #else /* !CONFIG_NUMA || !CONFIG_TMPFS */ static inline void shmem_show_mpol(struct seq_file *seq, struct mempolicy *mpol) { } static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo) { return NULL; } #endif /* CONFIG_NUMA && CONFIG_TMPFS */ static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info, pgoff_t index, unsigned int order, pgoff_t *ilx); static struct folio *shmem_swapin_cluster(swp_entry_t swap, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index) { struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; mpol = shmem_get_pgoff_policy(info, index, 0, &ilx); folio = swap_cluster_readahead(swap, gfp, mpol, ilx); mpol_cond_put(mpol); return folio; } /* * Make sure huge_gfp is always more limited than limit_gfp. * Some of the flags set permissions, while others set limitations. */ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) { gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM; gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY; gfp_t zoneflags = limit_gfp & GFP_ZONEMASK; gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK); /* Allow allocations only from the originally specified zones. */ result |= zoneflags; /* * Minimize the result gfp by taking the union with the deny flags, * and the intersection of the allow flags. */ result |= (limit_gfp & denyflags); result |= (huge_gfp & limit_gfp) & allowflags; return result; } #ifdef CONFIG_TRANSPARENT_HUGEPAGE bool shmem_hpage_pmd_enabled(void) { if (shmem_huge == SHMEM_HUGE_DENY) return false; if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_always)) return true; if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_madvise)) return true; if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_within_size)) return true; if (test_bit(HPAGE_PMD_ORDER, &huge_shmem_orders_inherit) && shmem_huge != SHMEM_HUGE_NEVER) return true; return false; } unsigned long shmem_allowable_huge_orders(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, loff_t write_end, bool shmem_huge_force) { unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma ? vma->vm_flags : 0; pgoff_t aligned_index; unsigned int global_orders; loff_t i_size; int order; if (thp_disabled_by_hw() || (vma && vma_thp_disabled(vma, vm_flags))) return 0; global_orders = shmem_huge_global_enabled(inode, index, write_end, shmem_huge_force, vma, vm_flags); /* Tmpfs huge pages allocation */ if (!vma || !vma_is_anon_shmem(vma)) return global_orders; /* * Following the 'deny' semantics of the top level, force the huge * option off from all mounts. */ if (shmem_huge == SHMEM_HUGE_DENY) return 0; /* * Only allow inherit orders if the top-level value is 'force', which * means non-PMD sized THP can not override 'huge' mount option now. */ if (shmem_huge == SHMEM_HUGE_FORCE) return READ_ONCE(huge_shmem_orders_inherit); /* Allow mTHP that will be fully within i_size. */ order = highest_order(within_size_orders); while (within_size_orders) { aligned_index = round_up(index + 1, 1 << order); i_size = round_up(i_size_read(inode), PAGE_SIZE); if (i_size >> PAGE_SHIFT >= aligned_index) { mask |= within_size_orders; break; } order = next_order(&within_size_orders, order); } if (vm_flags & VM_HUGEPAGE) mask |= READ_ONCE(huge_shmem_orders_madvise); if (global_orders > 0) mask |= READ_ONCE(huge_shmem_orders_inherit); return THP_ORDERS_ALL_FILE_DEFAULT & mask; } static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, struct address_space *mapping, pgoff_t index, unsigned long orders) { struct vm_area_struct *vma = vmf ? vmf->vma : NULL; pgoff_t aligned_index; unsigned long pages; int order; if (vma) { orders = thp_vma_suitable_orders(vma, vmf->address, orders); if (!orders) return 0; } /* Find the highest order that can add into the page cache */ order = highest_order(orders); while (orders) { pages = 1UL << order; aligned_index = round_down(index, pages); /* * Check for conflict before waiting on a huge allocation. * Conflict might be that a huge page has just been allocated * and added to page cache by a racing thread, or that there * is already at least one small page in the huge extent. * Be careful to retry when appropriate, but not forever! * Elsewhere -EEXIST would be the right code, but not here. */ if (!xa_find(&mapping->i_pages, &aligned_index, aligned_index + pages - 1, XA_PRESENT)) break; order = next_order(&orders, order); } return orders; } #else static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, struct address_space *mapping, pgoff_t index, unsigned long orders) { return 0; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static struct folio *shmem_alloc_folio(gfp_t gfp, int order, struct shmem_inode_info *info, pgoff_t index) { struct mempolicy *mpol; pgoff_t ilx; struct folio *folio; mpol = shmem_get_pgoff_policy(info, index, order, &ilx); folio = folio_alloc_mpol(gfp, order, mpol, ilx, numa_node_id()); mpol_cond_put(mpol); return folio; } static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf, gfp_t gfp, struct inode *inode, pgoff_t index, struct mm_struct *fault_mm, unsigned long orders) { struct address_space *mapping = inode->i_mapping; struct shmem_inode_info *info = SHMEM_I(inode); unsigned long suitable_orders = 0; struct folio *folio = NULL; long pages; int error, order; if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) orders = 0; if (orders > 0) { suitable_orders = shmem_suitable_orders(inode, vmf, mapping, index, orders); order = highest_order(suitable_orders); while (suitable_orders) { pages = 1UL << order; index = round_down(index, pages); folio = shmem_alloc_folio(gfp, order, info, index); if (folio) goto allocated; if (pages == HPAGE_PMD_NR) count_vm_event(THP_FILE_FALLBACK); count_mthp_stat(order, MTHP_STAT_SHMEM_FALLBACK); order = next_order(&suitable_orders, order); } } else { pages = 1; folio = shmem_alloc_folio(gfp, 0, info, index); } if (!folio) return ERR_PTR(-ENOMEM); allocated: __folio_set_locked(folio); __folio_set_swapbacked(folio); gfp &= GFP_RECLAIM_MASK; error = mem_cgroup_charge(folio, fault_mm, gfp); if (error) { if (xa_find(&mapping->i_pages, &index, index + pages - 1, XA_PRESENT)) { error = -EEXIST; } else if (pages > 1) { if (pages == HPAGE_PMD_NR) { count_vm_event(THP_FILE_FALLBACK); count_vm_event(THP_FILE_FALLBACK_CHARGE); } count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK); count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_FALLBACK_CHARGE); } goto unlock; } error = shmem_add_to_page_cache(folio, mapping, index, NULL, gfp); if (error) goto unlock; error = shmem_inode_acct_blocks(inode, pages); if (error) { struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); long freed; /* * Try to reclaim some space by splitting a few * large folios beyond i_size on the filesystem. */ shmem_unused_huge_shrink(sbinfo, NULL, pages); /* * And do a shmem_recalc_inode() to account for freed pages: * except our folio is there in cache, so not quite balanced. */ spin_lock(&info->lock); freed = pages + info->alloced - info->swapped - READ_ONCE(mapping->nrpages); if (freed > 0) info->alloced -= freed; spin_unlock(&info->lock); if (freed > 0) shmem_inode_unacct_blocks(inode, freed); error = shmem_inode_acct_blocks(inode, pages); if (error) { filemap_remove_folio(folio); goto unlock; } } shmem_recalc_inode(inode, pages, 0); folio_add_lru(folio); return folio; unlock: folio_unlock(folio); folio_put(folio); return ERR_PTR(error); } static struct folio *shmem_swap_alloc_folio(struct inode *inode, struct vm_area_struct *vma, pgoff_t index, swp_entry_t entry, int order, gfp_t gfp) { struct shmem_inode_info *info = SHMEM_I(inode); struct folio *new; void *shadow; int nr_pages; /* * We have arrived here because our zones are constrained, so don't * limit chance of success with further cpuset and node constraints. */ gfp &= ~GFP_CONSTRAINT_MASK; if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && order > 0) { gfp_t huge_gfp = vma_thp_gfp_mask(vma); gfp = limit_gfp_mask(huge_gfp, gfp); } new = shmem_alloc_folio(gfp, order, info, index); if (!new) return ERR_PTR(-ENOMEM); nr_pages = folio_nr_pages(new); if (mem_cgroup_swapin_charge_folio(new, vma ? vma->vm_mm : NULL, gfp, entry)) { folio_put(new); return ERR_PTR(-ENOMEM); } /* * Prevent parallel swapin from proceeding with the swap cache flag. * * Of course there is another possible concurrent scenario as well, * that is to say, the swap cache flag of a large folio has already * been set by swapcache_prepare(), while another thread may have * already split the large swap entry stored in the shmem mapping. * In this case, shmem_add_to_page_cache() will help identify the * concurrent swapin and return -EEXIST. */ if (swapcache_prepare(entry, nr_pages)) { folio_put(new); return ERR_PTR(-EEXIST); } __folio_set_locked(new); __folio_set_swapbacked(new); new->swap = entry; mem_cgroup_swapin_uncharge_swap(entry, nr_pages); shadow = get_shadow_from_swap_cache(entry); if (shadow) workingset_refault(new, shadow); folio_add_lru(new); swap_read_folio(new, NULL); return new; } /* * When a page is moved from swapcache to shmem filecache (either by the * usual swapin of shmem_get_folio_gfp(), or by the less common swapoff of * shmem_unuse_inode()), it may have been read in earlier from swap, in * ignorance of the mapping it belongs to. If that mapping has special * constraints (like the gma500 GEM driver, which requires RAM below 4GB), * we may need to copy to a suitable page before moving to filecache. * * In a future release, this may well be extended to respect cpuset and * NUMA mempolicy, and applied also to anonymous pages in do_swap_page(); * but for now it is a simple matter of zone. */ static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp) { return folio_zonenum(folio) > gfp_zone(gfp); } static int shmem_replace_folio(struct folio **foliop, gfp_t gfp, struct shmem_inode_info *info, pgoff_t index, struct vm_area_struct *vma) { struct folio *new, *old = *foliop; swp_entry_t entry = old->swap; struct address_space *swap_mapping = swap_address_space(entry); pgoff_t swap_index = swap_cache_index(entry); XA_STATE(xas, &swap_mapping->i_pages, swap_index); int nr_pages = folio_nr_pages(old); int error = 0, i; /* * We have arrived here because our zones are constrained, so don't * limit chance of success by further cpuset and node constraints. */ gfp &= ~GFP_CONSTRAINT_MASK; #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (nr_pages > 1) { gfp_t huge_gfp = vma_thp_gfp_mask(vma); gfp = limit_gfp_mask(huge_gfp, gfp); } #endif new = shmem_alloc_folio(gfp, folio_order(old), info, index); if (!new) return -ENOMEM; folio_ref_add(new, nr_pages); folio_copy(new, old); flush_dcache_folio(new); __folio_set_locked(new); __folio_set_swapbacked(new); folio_mark_uptodate(new); new->swap = entry; folio_set_swapcache(new); /* Swap cache still stores N entries instead of a high-order entry */ xa_lock_irq(&swap_mapping->i_pages); for (i = 0; i < nr_pages; i++) { void *item = xas_load(&xas); if (item != old) { error = -ENOENT; break; } xas_store(&xas, new); xas_next(&xas); } if (!error) { mem_cgroup_replace_folio(old, new); shmem_update_stats(new, nr_pages); shmem_update_stats(old, -nr_pages); } xa_unlock_irq(&swap_mapping->i_pages); if (unlikely(error)) { /* * Is this possible? I think not, now that our callers * check both the swapcache flag and folio->private * after getting the folio lock; but be defensive. * Reverse old to newpage for clear and free. */ old = new; } else { folio_add_lru(new); *foliop = new; } folio_clear_swapcache(old); old->private = NULL; folio_unlock(old); /* * The old folio are removed from swap cache, drop the 'nr_pages' * reference, as well as one temporary reference getting from swap * cache. */ folio_put_refs(old, nr_pages + 1); return error; } static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index, struct folio *folio, swp_entry_t swap, bool skip_swapcache) { struct address_space *mapping = inode->i_mapping; swp_entry_t swapin_error; void *old; int nr_pages; swapin_error = make_poisoned_swp_entry(); old = xa_cmpxchg_irq(&mapping->i_pages, index, swp_to_radix_entry(swap), swp_to_radix_entry(swapin_error), 0); if (old != swp_to_radix_entry(swap)) return; nr_pages = folio_nr_pages(folio); folio_wait_writeback(folio); if (!skip_swapcache) delete_from_swap_cache(folio); /* * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks) * in shmem_evict_inode(). */ shmem_recalc_inode(inode, -nr_pages, -nr_pages); swap_free_nr(swap, nr_pages); } static int shmem_split_large_entry(struct inode *inode, pgoff_t index, swp_entry_t swap, gfp_t gfp) { struct address_space *mapping = inode->i_mapping; XA_STATE_ORDER(xas, &mapping->i_pages, index, 0); void *alloced_shadow = NULL; int alloced_order = 0, i; /* Convert user data gfp flags to xarray node gfp flags */ gfp &= GFP_RECLAIM_MASK; for (;;) { int order = -1, split_order = 0; void *old = NULL; xas_lock_irq(&xas); old = xas_load(&xas); if (!xa_is_value(old) || swp_to_radix_entry(swap) != old) { xas_set_err(&xas, -EEXIST); goto unlock; } order = xas_get_order(&xas); /* Swap entry may have changed before we re-acquire the lock */ if (alloced_order && (old != alloced_shadow || order != alloced_order)) { xas_destroy(&xas); alloced_order = 0; } /* Try to split large swap entry in pagecache */ if (order > 0) { if (!alloced_order) { split_order = order; goto unlock; } xas_split(&xas, old, order); /* * Re-set the swap entry after splitting, and the swap * offset of the original large entry must be continuous. */ for (i = 0; i < 1 << order; i++) { pgoff_t aligned_index = round_down(index, 1 << order); swp_entry_t tmp; tmp = swp_entry(swp_type(swap), swp_offset(swap) + i); __xa_store(&mapping->i_pages, aligned_index + i, swp_to_radix_entry(tmp), 0); } } unlock: xas_unlock_irq(&xas); /* split needed, alloc here and retry. */ if (split_order) { xas_split_alloc(&xas, old, split_order, gfp); if (xas_error(&xas)) goto error; alloced_shadow = old; alloced_order = split_order; xas_reset(&xas); continue; } if (!xas_nomem(&xas, gfp)) break; } error: if (xas_error(&xas)) return xas_error(&xas); return alloced_order; } /* * Swap in the folio pointed to by *foliop. * Caller has to make sure that *foliop contains a valid swapped folio. * Returns 0 and the folio in foliop if success. On failure, returns the * error code and NULL in *foliop. */ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct folio **foliop, enum sgp_type sgp, gfp_t gfp, struct vm_area_struct *vma, vm_fault_t *fault_type) { struct address_space *mapping = inode->i_mapping; struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL; struct shmem_inode_info *info = SHMEM_I(inode); struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; swp_entry_t swap; int error, nr_pages; VM_BUG_ON(!*foliop || !xa_is_value(*foliop)); swap = radix_to_swp_entry(*foliop); *foliop = NULL; if (is_poisoned_swp_entry(swap)) return -EIO; si = get_swap_device(swap); if (!si) { if (!shmem_confirm_swap(mapping, index, swap)) return -EEXIST; else return -EINVAL; } /* Look it up and read it in.. */ folio = swap_cache_get_folio(swap, NULL, 0); if (!folio) { int order = xa_get_order(&mapping->i_pages, index); bool fallback_order0 = false; int split_order; /* Or update major stats only when swapin succeeds?? */ if (fault_type) { *fault_type |= VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); count_memcg_event_mm(fault_mm, PGMAJFAULT); } /* * If uffd is active for the vma, we need per-page fault * fidelity to maintain the uffd semantics, then fallback * to swapin order-0 folio, as well as for zswap case. */ if (order > 0 && ((vma && unlikely(userfaultfd_armed(vma))) || !zswap_never_enabled())) fallback_order0 = true; /* Skip swapcache for synchronous device. */ if (!fallback_order0 && data_race(si->flags & SWP_SYNCHRONOUS_IO)) { folio = shmem_swap_alloc_folio(inode, vma, index, swap, order, gfp); if (!IS_ERR(folio)) { skip_swapcache = true; goto alloced; } /* * Fallback to swapin order-0 folio unless the swap entry * already exists. */ error = PTR_ERR(folio); folio = NULL; if (error == -EEXIST) goto failed; } /* * Now swap device can only swap in order 0 folio, then we * should split the large swap entry stored in the pagecache * if necessary. */ split_order = shmem_split_large_entry(inode, index, swap, gfp); if (split_order < 0) { error = split_order; goto failed; } /* * If the large swap entry has already been split, it is * necessary to recalculate the new swap entry based on * the old order alignment. */ if (split_order > 0) { pgoff_t offset = index - round_down(index, 1 << split_order); swap = swp_entry(swp_type(swap), swp_offset(swap) + offset); } /* Here we actually start the io */ folio = shmem_swapin_cluster(swap, gfp, info, index); if (!folio) { error = -ENOMEM; goto failed; } } alloced: /* We have to do this with folio locked to prevent races */ folio_lock(folio); if ((!skip_swapcache && !folio_test_swapcache(folio)) || folio->swap.val != swap.val || !shmem_confirm_swap(mapping, index, swap)) { error = -EEXIST; goto unlock; } if (!folio_test_uptodate(folio)) { error = -EIO; goto failed; } folio_wait_writeback(folio); nr_pages = folio_nr_pages(folio); /* * Some architectures may have to restore extra metadata to the * folio after reading from swap. */ arch_swap_restore(folio_swap(swap, folio), folio); if (shmem_should_replace_folio(folio, gfp)) { error = shmem_replace_folio(&folio, gfp, info, index, vma); if (error) goto failed; } error = shmem_add_to_page_cache(folio, mapping, round_down(index, nr_pages), swp_to_radix_entry(swap), gfp); if (error) goto failed; shmem_recalc_inode(inode, 0, -nr_pages); if (sgp == SGP_WRITE) folio_mark_accessed(folio); if (skip_swapcache) { folio->swap.val = 0; swapcache_clear(si, swap, nr_pages); } else { delete_from_swap_cache(folio); } folio_mark_dirty(folio); swap_free_nr(swap, nr_pages); put_swap_device(si); *foliop = folio; return 0; failed: if (!shmem_confirm_swap(mapping, index, swap)) error = -EEXIST; if (error == -EIO) shmem_set_folio_swapin_error(inode, index, folio, swap, skip_swapcache); unlock: if (skip_swapcache) swapcache_clear(si, swap, folio_nr_pages(folio)); if (folio) { folio_unlock(folio); folio_put(folio); } put_swap_device(si); return error; } /* * shmem_get_folio_gfp - find page in cache, or get from swap, or allocate * * If we allocate a new one we do not mark it dirty. That's up to the * vm. If we swap it in we mark it dirty since we also free the swap * entry since a page cannot live in both the swap and page cache. * * vmf and fault_type are only supplied by shmem_fault: otherwise they are NULL. */ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, loff_t write_end, struct folio **foliop, enum sgp_type sgp, gfp_t gfp, struct vm_fault *vmf, vm_fault_t *fault_type) { struct vm_area_struct *vma = vmf ? vmf->vma : NULL; struct mm_struct *fault_mm; struct folio *folio; int error; bool alloced; unsigned long orders = 0; if (WARN_ON_ONCE(!shmem_mapping(inode->i_mapping))) return -EINVAL; if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT)) return -EFBIG; repeat: if (sgp <= SGP_CACHE && ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) return -EINVAL; alloced = false; fault_mm = vma ? vma->vm_mm : NULL; folio = filemap_get_entry(inode->i_mapping, index); if (folio && vma && userfaultfd_minor(vma)) { if (!xa_is_value(folio)) folio_put(folio); *fault_type = handle_userfault(vmf, VM_UFFD_MINOR); return 0; } if (xa_is_value(folio)) { error = shmem_swapin_folio(inode, index, &folio, sgp, gfp, vma, fault_type); if (error == -EEXIST) goto repeat; *foliop = folio; return error; } if (folio) { folio_lock(folio); /* Has the folio been truncated or swapped out? */ if (unlikely(folio->mapping != inode->i_mapping)) { folio_unlock(folio); folio_put(folio); goto repeat; } if (sgp == SGP_WRITE) folio_mark_accessed(folio); if (folio_test_uptodate(folio)) goto out; /* fallocated folio */ if (sgp != SGP_READ) goto clear; folio_unlock(folio); folio_put(folio); } /* * SGP_READ: succeed on hole, with NULL folio, letting caller zero. * SGP_NOALLOC: fail on hole, with NULL folio, letting caller fail. */ *foliop = NULL; if (sgp == SGP_READ) return 0; if (sgp == SGP_NOALLOC) return -ENOENT; /* * Fast cache lookup and swap lookup did not find it: allocate. */ if (vma && userfaultfd_missing(vma)) { *fault_type = handle_userfault(vmf, VM_UFFD_MISSING); return 0; } /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */ orders = shmem_allowable_huge_orders(inode, vma, index, write_end, false); if (orders > 0) { gfp_t huge_gfp; huge_gfp = vma_thp_gfp_mask(vma); huge_gfp = limit_gfp_mask(huge_gfp, gfp); folio = shmem_alloc_and_add_folio(vmf, huge_gfp, inode, index, fault_mm, orders); if (!IS_ERR(folio)) { if (folio_test_pmd_mappable(folio)) count_vm_event(THP_FILE_ALLOC); count_mthp_stat(folio_order(folio), MTHP_STAT_SHMEM_ALLOC); goto alloced; } if (PTR_ERR(folio) == -EEXIST) goto repeat; } folio = shmem_alloc_and_add_folio(vmf, gfp, inode, index, fault_mm, 0); if (IS_ERR(folio)) { error = PTR_ERR(folio); if (error == -EEXIST) goto repeat; folio = NULL; goto unlock; } alloced: alloced = true; if (folio_test_large(folio) && DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) < folio_next_index(folio)) { struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_inode_info *info = SHMEM_I(inode); /* * Part of the large folio is beyond i_size: subject * to shrink under memory pressure. */ spin_lock(&sbinfo->shrinklist_lock); /* * _careful to defend against unlocked access to * ->shrink_list in shmem_unused_huge_shrink() */ if (list_empty_careful(&info->shrinklist)) { list_add_tail(&info->shrinklist, &sbinfo->shrinklist); sbinfo->shrinklist_len++; } spin_unlock(&sbinfo->shrinklist_lock); } if (sgp == SGP_WRITE) folio_set_referenced(folio); /* * Let SGP_FALLOC use the SGP_WRITE optimization on a new folio. */ if (sgp == SGP_FALLOC) sgp = SGP_WRITE; clear: /* * Let SGP_WRITE caller clear ends if write does not fill folio; * but SGP_FALLOC on a folio fallocated earlier must initialize * it now, lest undo on failure cancel our earlier guarantee. */ if (sgp != SGP_WRITE && !folio_test_uptodate(folio)) { long i, n = folio_nr_pages(folio); for (i = 0; i < n; i++) clear_highpage(folio_page(folio, i)); flush_dcache_folio(folio); folio_mark_uptodate(folio); } /* Perhaps the file has been truncated since we checked */ if (sgp <= SGP_CACHE && ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) { error = -EINVAL; goto unlock; } out: *foliop = folio; return 0; /* * Error recovery. */ unlock: if (alloced) filemap_remove_folio(folio); shmem_recalc_inode(inode, 0, 0); if (folio) { folio_unlock(folio); folio_put(folio); } return error; } /** * shmem_get_folio - find, and lock a shmem folio. * @inode: inode to search * @index: the page index. * @write_end: end of a write, could extend inode size * @foliop: pointer to the folio if found * @sgp: SGP_* flags to control behavior * * Looks up the page cache entry at @inode & @index. If a folio is * present, it is returned locked with an increased refcount. * * If the caller modifies data in the folio, it must call folio_mark_dirty() * before unlocking the folio to ensure that the folio is not reclaimed. * There is no need to reserve space before calling folio_mark_dirty(). * * When no folio is found, the behavior depends on @sgp: * - for SGP_READ, *@foliop is %NULL and 0 is returned * - for SGP_NOALLOC, *@foliop is %NULL and -ENOENT is returned * - for all other flags a new folio is allocated, inserted into the * page cache and returned locked in @foliop. * * Context: May sleep. * Return: 0 if successful, else a negative error code. */ int shmem_get_folio(struct inode *inode, pgoff_t index, loff_t write_end, struct folio **foliop, enum sgp_type sgp) { return shmem_get_folio_gfp(inode, index, write_end, foliop, sgp, mapping_gfp_mask(inode->i_mapping), NULL, NULL); } EXPORT_SYMBOL_GPL(shmem_get_folio); /* * This is like autoremove_wake_function, but it removes the wait queue * entry unconditionally - even if something else had already woken the * target. */ static int synchronous_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) { int ret = default_wake_function(wait, mode, sync, key); list_del_init(&wait->entry); return ret; } /* * Trinity finds that probing a hole which tmpfs is punching can * prevent the hole-punch from ever completing: which in turn * locks writers out with its hold on i_rwsem. So refrain from * faulting pages into the hole while it's being punched. Although * shmem_undo_range() does remove the additions, it may be unable to * keep up, as each new page needs its own unmap_mapping_range() call, * and the i_mmap tree grows ever slower to scan if new vmas are added. * * It does not matter if we sometimes reach this check just before the * hole-punch begins, so that one fault then races with the punch: * we just need to make racing faults a rare case. * * The implementation below would be much simpler if we just used a * standard mutex or completion: but we cannot take i_rwsem in fault, * and bloating every shmem inode for this unlikely case would be sad. */ static vm_fault_t shmem_falloc_wait(struct vm_fault *vmf, struct inode *inode) { struct shmem_falloc *shmem_falloc; struct file *fpin = NULL; vm_fault_t ret = 0; spin_lock(&inode->i_lock); shmem_falloc = inode->i_private; if (shmem_falloc && shmem_falloc->waitq && vmf->pgoff >= shmem_falloc->start && vmf->pgoff < shmem_falloc->next) { wait_queue_head_t *shmem_falloc_waitq; DEFINE_WAIT_FUNC(shmem_fault_wait, synchronous_wake_function); ret = VM_FAULT_NOPAGE; fpin = maybe_unlock_mmap_for_io(vmf, NULL); shmem_falloc_waitq = shmem_falloc->waitq; prepare_to_wait(shmem_falloc_waitq, &shmem_fault_wait, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); schedule(); /* * shmem_falloc_waitq points into the shmem_fallocate() * stack of the hole-punching task: shmem_falloc_waitq * is usually invalid by the time we reach here, but * finish_wait() does not dereference it in that case; * though i_lock needed lest racing with wake_up_all(). */ spin_lock(&inode->i_lock); finish_wait(shmem_falloc_waitq, &shmem_fault_wait); } spin_unlock(&inode->i_lock); if (fpin) { fput(fpin); ret = VM_FAULT_RETRY; } return ret; } static vm_fault_t shmem_fault(struct vm_fault *vmf) { struct inode *inode = file_inode(vmf->vma->vm_file); gfp_t gfp = mapping_gfp_mask(inode->i_mapping); struct folio *folio = NULL; vm_fault_t ret = 0; int err; /* * Trinity finds that probing a hole which tmpfs is punching can * prevent the hole-punch from ever completing: noted in i_private. */ if (unlikely(inode->i_private)) { ret = shmem_falloc_wait(vmf, inode); if (ret) return ret; } WARN_ON_ONCE(vmf->page != NULL); err = shmem_get_folio_gfp(inode, vmf->pgoff, 0, &folio, SGP_CACHE, gfp, vmf, &ret); if (err) return vmf_error(err); if (folio) { vmf->page = folio_file_page(folio, vmf->pgoff); ret |= VM_FAULT_LOCKED; } return ret; } unsigned long shmem_get_unmapped_area(struct file *file, unsigned long uaddr, unsigned long len, unsigned long pgoff, unsigned long flags) { unsigned long addr; unsigned long offset; unsigned long inflated_len; unsigned long inflated_addr; unsigned long inflated_offset; unsigned long hpage_size; if (len > TASK_SIZE) return -ENOMEM; addr = mm_get_unmapped_area(current->mm, file, uaddr, len, pgoff, flags); if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return addr; if (IS_ERR_VALUE(addr)) return addr; if (addr & ~PAGE_MASK) return addr; if (addr > TASK_SIZE - len) return addr; if (shmem_huge == SHMEM_HUGE_DENY) return addr; if (flags & MAP_FIXED) return addr; /* * Our priority is to support MAP_SHARED mapped hugely; * and support MAP_PRIVATE mapped hugely too, until it is COWed. * But if caller specified an address hint and we allocated area there * successfully, respect that as before. */ if (uaddr == addr) return addr; hpage_size = HPAGE_PMD_SIZE; if (shmem_huge != SHMEM_HUGE_FORCE) { struct super_block *sb; unsigned long __maybe_unused hpage_orders; int order = 0; if (file) { VM_BUG_ON(file->f_op != &shmem_file_operations); sb = file_inode(file)->i_sb; } else { /* * Called directly from mm/mmap.c, or drivers/char/mem.c * for "/dev/zero", to create a shared anonymous object. */ if (IS_ERR(shm_mnt)) return addr; sb = shm_mnt->mnt_sb; /* * Find the highest mTHP order used for anonymous shmem to * provide a suitable alignment address. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE hpage_orders = READ_ONCE(huge_shmem_orders_always); hpage_orders |= READ_ONCE(huge_shmem_orders_within_size); hpage_orders |= READ_ONCE(huge_shmem_orders_madvise); if (SHMEM_SB(sb)->huge != SHMEM_HUGE_NEVER) hpage_orders |= READ_ONCE(huge_shmem_orders_inherit); if (hpage_orders > 0) { order = highest_order(hpage_orders); hpage_size = PAGE_SIZE << order; } #endif } if (SHMEM_SB(sb)->huge == SHMEM_HUGE_NEVER && !order) return addr; } if (len < hpage_size) return addr; offset = (pgoff << PAGE_SHIFT) & (hpage_size - 1); if (offset && offset + len < 2 * hpage_size) return addr; if ((addr & (hpage_size - 1)) == offset) return addr; inflated_len = len + hpage_size - PAGE_SIZE; if (inflated_len > TASK_SIZE) return addr; if (inflated_len < len) return addr; inflated_addr = mm_get_unmapped_area(current->mm, NULL, uaddr, inflated_len, 0, flags); if (IS_ERR_VALUE(inflated_addr)) return addr; if (inflated_addr & ~PAGE_MASK) return addr; inflated_offset = inflated_addr & (hpage_size - 1); inflated_addr += offset - inflated_offset; if (inflated_offset > offset) inflated_addr += hpage_size; if (inflated_addr > TASK_SIZE - len) return addr; return inflated_addr; } #ifdef CONFIG_NUMA static int shmem_set_policy(struct vm_area_struct *vma, struct mempolicy *mpol) { struct inode *inode = file_inode(vma->vm_file); return mpol_set_shared_policy(&SHMEM_I(inode)->policy, vma, mpol); } static struct mempolicy *shmem_get_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx) { struct inode *inode = file_inode(vma->vm_file); pgoff_t index; /* * Bias interleave by inode number to distribute better across nodes; * but this interface is independent of which page order is used, so * supplies only that bias, letting caller apply the offset (adjusted * by page order, as in shmem_get_pgoff_policy() and get_vma_policy()). */ *ilx = inode->i_ino; index = ((addr - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; return mpol_shared_policy_lookup(&SHMEM_I(inode)->policy, index); } static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info, pgoff_t index, unsigned int order, pgoff_t *ilx) { struct mempolicy *mpol; /* Bias interleave by inode number to distribute better across nodes */ *ilx = info->vfs_inode.i_ino + (index >> order); mpol = mpol_shared_policy_lookup(&info->policy, index); return mpol ? mpol : get_task_policy(current); } #else static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info, pgoff_t index, unsigned int order, pgoff_t *ilx) { *ilx = 0; return NULL; } #endif /* CONFIG_NUMA */ int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) { struct inode *inode = file_inode(file); struct shmem_inode_info *info = SHMEM_I(inode); int retval = -ENOMEM; /* * What serializes the accesses to info->flags? * ipc_lock_object() when called from shmctl_do_lock(), * no serialization needed when called from shm_destroy(). */ if (lock && !(info->flags & VM_LOCKED)) { if (!user_shm_lock(inode->i_size, ucounts)) goto out_nomem; info->flags |= VM_LOCKED; mapping_set_unevictable(file->f_mapping); } if (!lock && (info->flags & VM_LOCKED) && ucounts) { user_shm_unlock(inode->i_size, ucounts); info->flags &= ~VM_LOCKED; mapping_clear_unevictable(file->f_mapping); } retval = 0; out_nomem: return retval; } static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); file_accessed(file); /* This is anonymous shared memory if it is unlinked at the time of mmap */ if (inode->i_nlink) vma->vm_ops = &shmem_vm_ops; else vma->vm_ops = &shmem_anon_vm_ops; return 0; } static int shmem_file_open(struct inode *inode, struct file *file) { file->f_mode |= FMODE_CAN_ODIRECT; return generic_file_open(inode, file); } #ifdef CONFIG_TMPFS_XATTR static int shmem_initxattrs(struct inode *, const struct xattr *, void *); #if IS_ENABLED(CONFIG_UNICODE) /* * shmem_inode_casefold_flags - Deal with casefold file attribute flag * * The casefold file attribute needs some special checks. I can just be added to * an empty dir, and can't be removed from a non-empty dir. */ static int shmem_inode_casefold_flags(struct inode *inode, unsigned int fsflags, struct dentry *dentry, unsigned int *i_flags) { unsigned int old = inode->i_flags; struct super_block *sb = inode->i_sb; if (fsflags & FS_CASEFOLD_FL) { if (!(old & S_CASEFOLD)) { if (!sb->s_encoding) return -EOPNOTSUPP; if (!S_ISDIR(inode->i_mode)) return -ENOTDIR; if (dentry && !simple_empty(dentry)) return -ENOTEMPTY; } *i_flags = *i_flags | S_CASEFOLD; } else if (old & S_CASEFOLD) { if (dentry && !simple_empty(dentry)) return -ENOTEMPTY; } return 0; } #else static int shmem_inode_casefold_flags(struct inode *inode, unsigned int fsflags, struct dentry *dentry, unsigned int *i_flags) { if (fsflags & FS_CASEFOLD_FL) return -EOPNOTSUPP; return 0; } #endif /* * chattr's fsflags are unrelated to extended attributes, * but tmpfs has chosen to enable them under the same config option. */ static int shmem_set_inode_flags(struct inode *inode, unsigned int fsflags, struct dentry *dentry) { unsigned int i_flags = 0; int ret; ret = shmem_inode_casefold_flags(inode, fsflags, dentry, &i_flags); if (ret) return ret; if (fsflags & FS_NOATIME_FL) i_flags |= S_NOATIME; if (fsflags & FS_APPEND_FL) i_flags |= S_APPEND; if (fsflags & FS_IMMUTABLE_FL) i_flags |= S_IMMUTABLE; /* * But FS_NODUMP_FL does not require any action in i_flags. */ inode_set_flags(inode, i_flags, S_NOATIME | S_APPEND | S_IMMUTABLE | S_CASEFOLD); return 0; } #else static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags, struct dentry *dentry) { } #define shmem_initxattrs NULL #endif static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode) { return &SHMEM_I(inode)->dir_offsets; } static struct inode *__shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev, unsigned long flags) { struct inode *inode; struct shmem_inode_info *info; struct shmem_sb_info *sbinfo = SHMEM_SB(sb); ino_t ino; int err; err = shmem_reserve_inode(sb, &ino); if (err) return ERR_PTR(err); inode = new_inode(sb); if (!inode) { shmem_free_inode(sb, 0); return ERR_PTR(-ENOSPC); } inode->i_ino = ino; inode_init_owner(idmap, inode, dir, mode); inode->i_blocks = 0; simple_inode_init_ts(inode); inode->i_generation = get_random_u32(); info = SHMEM_I(inode); memset(info, 0, (char *)inode - (char *)info); spin_lock_init(&info->lock); atomic_set(&info->stop_eviction, 0); info->seals = F_SEAL_SEAL; info->flags = flags & VM_NORESERVE; info->i_crtime = inode_get_mtime(inode); info->fsflags = (dir == NULL) ? 0 : SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED; if (info->fsflags) shmem_set_inode_flags(inode, info->fsflags, NULL); INIT_LIST_HEAD(&info->shrinklist); INIT_LIST_HEAD(&info->swaplist); simple_xattrs_init(&info->xattrs); cache_no_acl(inode); if (sbinfo->noswap) mapping_set_unevictable(inode->i_mapping); /* Don't consider 'deny' for emergencies and 'force' for testing */ if (sbinfo->huge) mapping_set_large_folios(inode->i_mapping); switch (mode & S_IFMT) { default: inode->i_op = &shmem_special_inode_operations; init_special_inode(inode, mode, dev); break; case S_IFREG: inode->i_mapping->a_ops = &shmem_aops; inode->i_op = &shmem_inode_operations; inode->i_fop = &shmem_file_operations; mpol_shared_policy_init(&info->policy, shmem_get_sbmpol(sbinfo)); break; case S_IFDIR: inc_nlink(inode); /* Some things misbehave if size == 0 on a directory */ inode->i_size = 2 * BOGO_DIRENT_SIZE; inode->i_op = &shmem_dir_inode_operations; inode->i_fop = &simple_offset_dir_operations; simple_offset_init(shmem_get_offset_ctx(inode)); break; case S_IFLNK: /* * Must not load anything in the rbtree, * mpol_free_shared_policy will not be called. */ mpol_shared_policy_init(&info->policy, NULL); break; } lockdep_annotate_inode_mutex_key(inode); return inode; } #ifdef CONFIG_TMPFS_QUOTA static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev, unsigned long flags) { int err; struct inode *inode; inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags); if (IS_ERR(inode)) return inode; err = dquot_initialize(inode); if (err) goto errout; err = dquot_alloc_inode(inode); if (err) { dquot_drop(inode); goto errout; } return inode; errout: inode->i_flags |= S_NOQUOTA; iput(inode); return ERR_PTR(err); } #else static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev, unsigned long flags) { return __shmem_get_inode(idmap, sb, dir, mode, dev, flags); } #endif /* CONFIG_TMPFS_QUOTA */ #ifdef CONFIG_USERFAULTFD int shmem_mfill_atomic_pte(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, unsigned long src_addr, uffd_flags_t flags, struct folio **foliop) { struct inode *inode = file_inode(dst_vma->vm_file); struct shmem_inode_info *info = SHMEM_I(inode); struct address_space *mapping = inode->i_mapping; gfp_t gfp = mapping_gfp_mask(mapping); pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); void *page_kaddr; struct folio *folio; int ret; pgoff_t max_off; if (shmem_inode_acct_blocks(inode, 1)) { /* * We may have got a page, returned -ENOENT triggering a retry, * and now we find ourselves with -ENOMEM. Release the page, to * avoid a BUG_ON in our caller. */ if (unlikely(*foliop)) { folio_put(*foliop); *foliop = NULL; } return -ENOMEM; } if (!*foliop) { ret = -ENOMEM; folio = shmem_alloc_folio(gfp, 0, info, pgoff); if (!folio) goto out_unacct_blocks; if (uffd_flags_mode_is(flags, MFILL_ATOMIC_COPY)) { page_kaddr = kmap_local_folio(folio, 0); /* * The read mmap_lock is held here. Despite the * mmap_lock being read recursive a deadlock is still * possible if a writer has taken a lock. For example: * * process A thread 1 takes read lock on own mmap_lock * process A thread 2 calls mmap, blocks taking write lock * process B thread 1 takes page fault, read lock on own mmap lock * process B thread 2 calls mmap, blocks taking write lock * process A thread 1 blocks taking read lock on process B * process B thread 1 blocks taking read lock on process A * * Disable page faults to prevent potential deadlock * and retry the copy outside the mmap_lock. */ pagefault_disable(); ret = copy_from_user(page_kaddr, (const void __user *)src_addr, PAGE_SIZE); pagefault_enable(); kunmap_local(page_kaddr); /* fallback to copy_from_user outside mmap_lock */ if (unlikely(ret)) { *foliop = folio; ret = -ENOENT; /* don't free the page */ goto out_unacct_blocks; } flush_dcache_folio(folio); } else { /* ZEROPAGE */ clear_user_highpage(&folio->page, dst_addr); } } else { folio = *foliop; VM_BUG_ON_FOLIO(folio_test_large(folio), folio); *foliop = NULL; } VM_BUG_ON(folio_test_locked(folio)); VM_BUG_ON(folio_test_swapbacked(folio)); __folio_set_locked(folio); __folio_set_swapbacked(folio); __folio_mark_uptodate(folio); ret = -EFAULT; max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); if (unlikely(pgoff >= max_off)) goto out_release; ret = mem_cgroup_charge(folio, dst_vma->vm_mm, gfp); if (ret) goto out_release; ret = shmem_add_to_page_cache(folio, mapping, pgoff, NULL, gfp); if (ret) goto out_release; ret = mfill_atomic_install_pte(dst_pmd, dst_vma, dst_addr, &folio->page, true, flags); if (ret) goto out_delete_from_cache; shmem_recalc_inode(inode, 1, 0); folio_unlock(folio); return 0; out_delete_from_cache: filemap_remove_folio(folio); out_release: folio_unlock(folio); folio_put(folio); out_unacct_blocks: shmem_inode_unacct_blocks(inode, 1); return ret; } #endif /* CONFIG_USERFAULTFD */ #ifdef CONFIG_TMPFS static const struct inode_operations shmem_symlink_inode_operations; static const struct inode_operations shmem_short_symlink_operations; static int shmem_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; struct shmem_inode_info *info = SHMEM_I(inode); pgoff_t index = pos >> PAGE_SHIFT; struct folio *folio; int ret = 0; /* i_rwsem is held by caller */ if (unlikely(info->seals & (F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) { if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) return -EPERM; if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size) return -EPERM; } ret = shmem_get_folio(inode, index, pos + len, &folio, SGP_WRITE); if (ret) return ret; if (folio_test_hwpoison(folio) || (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { folio_unlock(folio); folio_put(folio); return -EIO; } *foliop = folio; return 0; } static int shmem_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; if (pos + copied > inode->i_size) i_size_write(inode, pos + copied); if (!folio_test_uptodate(folio)) { if (copied < folio_size(folio)) { size_t from = offset_in_folio(folio, pos); folio_zero_segments(folio, 0, from, from + copied, folio_size(folio)); } folio_mark_uptodate(folio); } folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); return copied; } static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; pgoff_t index; unsigned long offset; int error = 0; ssize_t retval = 0; for (;;) { struct folio *folio = NULL; struct page *page = NULL; unsigned long nr, ret; loff_t end_offset, i_size = i_size_read(inode); bool fallback_page_copy = false; size_t fsize; if (unlikely(iocb->ki_pos >= i_size)) break; index = iocb->ki_pos >> PAGE_SHIFT; error = shmem_get_folio(inode, index, 0, &folio, SGP_READ); if (error) { if (error == -EINVAL) error = 0; break; } if (folio) { folio_unlock(folio); page = folio_file_page(folio, index); if (PageHWPoison(page)) { folio_put(folio); error = -EIO; break; } if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)) fallback_page_copy = true; } /* * We must evaluate after, since reads (unlike writes) * are called without i_rwsem protection against truncate */ i_size = i_size_read(inode); if (unlikely(iocb->ki_pos >= i_size)) { if (folio) folio_put(folio); break; } end_offset = min_t(loff_t, i_size, iocb->ki_pos + to->count); if (folio && likely(!fallback_page_copy)) fsize = folio_size(folio); else fsize = PAGE_SIZE; offset = iocb->ki_pos & (fsize - 1); nr = min_t(loff_t, end_offset - iocb->ki_pos, fsize - offset); if (folio) { /* * If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ if (mapping_writably_mapped(mapping)) { if (likely(!fallback_page_copy)) flush_dcache_folio(folio); else flush_dcache_page(page); } /* * Mark the folio accessed if we read the beginning. */ if (!offset) folio_mark_accessed(folio); /* * Ok, we have the page, and it's up-to-date, so * now we can copy it to user space... */ if (likely(!fallback_page_copy)) ret = copy_folio_to_iter(folio, offset, nr, to); else ret = copy_page_to_iter(page, offset, nr, to); folio_put(folio); } else if (user_backed_iter(to)) { /* * Copy to user tends to be so well optimized, but * clear_user() not so much, that it is noticeably * faster to copy the zero page instead of clearing. */ ret = copy_page_to_iter(ZERO_PAGE(0), offset, nr, to); } else { /* * But submitting the same page twice in a row to * splice() - or others? - can result in confusion: * so don't attempt that optimization on pipes etc. */ ret = iov_iter_zero(nr, to); } retval += ret; iocb->ki_pos += ret; if (!iov_iter_count(to)) break; if (ret < nr) { error = -EFAULT; break; } cond_resched(); } file_accessed(file); return retval ? retval : error; } static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; inode_lock(inode); ret = generic_write_checks(iocb, from); if (ret <= 0) goto unlock; ret = file_remove_privs(file); if (ret) goto unlock; ret = file_update_time(file); if (ret) goto unlock; ret = generic_perform_write(iocb, from); unlock: inode_unlock(inode); return ret; } static bool zero_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { return true; } static void zero_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { } static bool zero_pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { return false; } static const struct pipe_buf_operations zero_pipe_buf_ops = { .release = zero_pipe_buf_release, .try_steal = zero_pipe_buf_try_steal, .get = zero_pipe_buf_get, }; static size_t splice_zeropage_into_pipe(struct pipe_inode_info *pipe, loff_t fpos, size_t size) { size_t offset = fpos & ~PAGE_MASK; size = min_t(size_t, size, PAGE_SIZE - offset); if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { struct pipe_buffer *buf = pipe_head_buf(pipe); *buf = (struct pipe_buffer) { .ops = &zero_pipe_buf_ops, .page = ZERO_PAGE(0), .offset = offset, .len = size, }; pipe->head++; } return size; } static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct inode *inode = file_inode(in); struct address_space *mapping = inode->i_mapping; struct folio *folio = NULL; size_t total_spliced = 0, used, npages, n, part; loff_t isize; int error = 0; /* Work out how much data we can actually add into the pipe */ used = pipe_occupancy(pipe->head, pipe->tail); npages = max_t(ssize_t, pipe->max_usage - used, 0); len = min_t(size_t, len, npages * PAGE_SIZE); do { bool fallback_page_splice = false; struct page *page = NULL; pgoff_t index; size_t size; if (*ppos >= i_size_read(inode)) break; index = *ppos >> PAGE_SHIFT; error = shmem_get_folio(inode, index, 0, &folio, SGP_READ); if (error) { if (error == -EINVAL) error = 0; break; } if (folio) { folio_unlock(folio); page = folio_file_page(folio, index); if (PageHWPoison(page)) { error = -EIO; break; } if (folio_test_large(folio) && folio_test_has_hwpoisoned(folio)) fallback_page_splice = true; } /* * i_size must be checked after we know the pages are Uptodate. * * Checking i_size after the check allows us to calculate * the correct value for "nr", which means the zero-filled * part of the page is not copied back to userspace (unless * another truncate extends the file - this is desired though). */ isize = i_size_read(inode); if (unlikely(*ppos >= isize)) break; /* * Fallback to PAGE_SIZE splice if the large folio has hwpoisoned * pages. */ size = len; if (unlikely(fallback_page_splice)) { size_t offset = *ppos & ~PAGE_MASK; size = umin(size, PAGE_SIZE - offset); } part = min_t(loff_t, isize - *ppos, size); if (folio) { /* * If users can be writing to this page using arbitrary * virtual addresses, take care about potential aliasing * before reading the page on the kernel side. */ if (mapping_writably_mapped(mapping)) { if (likely(!fallback_page_splice)) flush_dcache_folio(folio); else flush_dcache_page(page); } folio_mark_accessed(folio); /* * Ok, we have the page, and it's up-to-date, so we can * now splice it into the pipe. */ n = splice_folio_into_pipe(pipe, folio, *ppos, part); folio_put(folio); folio = NULL; } else { n = splice_zeropage_into_pipe(pipe, *ppos, part); } if (!n) break; len -= n; total_spliced += n; *ppos += n; in->f_ra.prev_pos = *ppos; if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) break; cond_resched(); } while (len); if (folio) folio_put(folio); file_accessed(in); return total_spliced ? total_spliced : error; } static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence) { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; if (whence != SEEK_DATA && whence != SEEK_HOLE) return generic_file_llseek_size(file, offset, whence, MAX_LFS_FILESIZE, i_size_read(inode)); if (offset < 0) return -ENXIO; inode_lock(inode); /* We're holding i_rwsem so we can access i_size directly */ offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence); if (offset >= 0) offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE); inode_unlock(inode); return offset; } static long shmem_fallocate(struct file *file, int mode, loff_t offset, loff_t len) { struct inode *inode = file_inode(file); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_falloc shmem_falloc; pgoff_t start, index, end, undo_fallocend; int error; if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; inode_lock(inode); if (mode & FALLOC_FL_PUNCH_HOLE) { struct address_space *mapping = file->f_mapping; loff_t unmap_start = round_up(offset, PAGE_SIZE); loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq); /* protected by i_rwsem */ if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { error = -EPERM; goto out; } shmem_falloc.waitq = &shmem_falloc_waitq; shmem_falloc.start = (u64)unmap_start >> PAGE_SHIFT; shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; spin_lock(&inode->i_lock); inode->i_private = &shmem_falloc; spin_unlock(&inode->i_lock); if ((u64)unmap_end > (u64)unmap_start) unmap_mapping_range(mapping, unmap_start, 1 + unmap_end - unmap_start, 0); shmem_truncate_range(inode, offset, offset + len - 1); /* No need to unmap again: hole-punching leaves COWed pages */ spin_lock(&inode->i_lock); inode->i_private = NULL; wake_up_all(&shmem_falloc_waitq); WARN_ON_ONCE(!list_empty(&shmem_falloc_waitq.head)); spin_unlock(&inode->i_lock); error = 0; goto out; } /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ error = inode_newsize_ok(inode, offset + len); if (error) goto out; if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) { error = -EPERM; goto out; } start = offset >> PAGE_SHIFT; end = (offset + len + PAGE_SIZE - 1) >> PAGE_SHIFT; /* Try to avoid a swapstorm if len is impossible to satisfy */ if (sbinfo->max_blocks && end - start > sbinfo->max_blocks) { error = -ENOSPC; goto out; } shmem_falloc.waitq = NULL; shmem_falloc.start = start; shmem_falloc.next = start; shmem_falloc.nr_falloced = 0; shmem_falloc.nr_unswapped = 0; spin_lock(&inode->i_lock); inode->i_private = &shmem_falloc; spin_unlock(&inode->i_lock); /* * info->fallocend is only relevant when huge pages might be * involved: to prevent split_huge_page() freeing fallocated * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size. */ undo_fallocend = info->fallocend; if (info->fallocend < end) info->fallocend = end; for (index = start; index < end; ) { struct folio *folio; /* * Check for fatal signal so that we abort early in OOM * situations. We don't want to abort in case of non-fatal * signals as large fallocate can take noticeable time and * e.g. periodic timers may result in fallocate constantly * restarting. */ if (fatal_signal_pending(current)) error = -EINTR; else if (shmem_falloc.nr_unswapped > shmem_falloc.nr_falloced) error = -ENOMEM; else error = shmem_get_folio(inode, index, offset + len, &folio, SGP_FALLOC); if (error) { info->fallocend = undo_fallocend; /* Remove the !uptodate folios we added */ if (index > start) { shmem_undo_range(inode, (loff_t)start << PAGE_SHIFT, ((loff_t)index << PAGE_SHIFT) - 1, true); } goto undone; } /* * Here is a more important optimization than it appears: * a second SGP_FALLOC on the same large folio will clear it, * making it uptodate and un-undoable if we fail later. */ index = folio_next_index(folio); /* Beware 32-bit wraparound */ if (!index) index--; /* * Inform shmem_writepage() how far we have reached. * No need for lock or barrier: we have the page lock. */ if (!folio_test_uptodate(folio)) shmem_falloc.nr_falloced += index - shmem_falloc.next; shmem_falloc.next = index; /* * If !uptodate, leave it that way so that freeable folios * can be recognized if we need to rollback on error later. * But mark it dirty so that memory pressure will swap rather * than free the folios we are allocating (and SGP_CACHE folios * might still be clean: we now need to mark those dirty too). */ folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); cond_resched(); } if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); undone: spin_lock(&inode->i_lock); inode->i_private = NULL; spin_unlock(&inode->i_lock); out: if (!error) file_modified(file); inode_unlock(inode); return error; } static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) { struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); buf->f_type = TMPFS_MAGIC; buf->f_bsize = PAGE_SIZE; buf->f_namelen = NAME_MAX; if (sbinfo->max_blocks) { buf->f_blocks = sbinfo->max_blocks; buf->f_bavail = buf->f_bfree = sbinfo->max_blocks - percpu_counter_sum(&sbinfo->used_blocks); } if (sbinfo->max_inodes) { buf->f_files = sbinfo->max_inodes; buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE; } /* else leave those fields 0 like simple_statfs */ buf->f_fsid = uuid_to_fsid(dentry->d_sb->s_uuid.b); return 0; } /* * File creation. Allocate an inode, and we're done.. */ static int shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev) { struct inode *inode; int error; if (!generic_ci_validate_strict_name(dir, &dentry->d_name)) return -EINVAL; inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE); if (IS_ERR(inode)) return PTR_ERR(inode); error = simple_acl_create(dir, inode); if (error) goto out_iput; error = security_inode_init_security(inode, dir, &dentry->d_name, shmem_initxattrs, NULL); if (error && error != -EOPNOTSUPP) goto out_iput; error = simple_offset_add(shmem_get_offset_ctx(dir), dentry); if (error) goto out_iput; dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_add(dentry, inode); else d_instantiate(dentry, inode); dget(dentry); /* Extra count - pin the dentry in core */ return error; out_iput: iput(inode); return error; } static int shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { struct inode *inode; int error; inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE); if (IS_ERR(inode)) { error = PTR_ERR(inode); goto err_out; } error = security_inode_init_security(inode, dir, NULL, shmem_initxattrs, NULL); if (error && error != -EOPNOTSUPP) goto out_iput; error = simple_acl_create(dir, inode); if (error) goto out_iput; d_tmpfile(file, inode); err_out: return finish_open_simple(file, error); out_iput: iput(inode); return error; } static int shmem_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { int error; error = shmem_mknod(idmap, dir, dentry, mode | S_IFDIR, 0); if (error) return error; inc_nlink(dir); return 0; } static int shmem_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return shmem_mknod(idmap, dir, dentry, mode | S_IFREG, 0); } /* * Link a file.. */ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); int ret = 0; /* * No ordinary (disk based) filesystem counts links as inodes; * but each new link needs a new dentry, pinning lowmem, and * tmpfs dentries cannot be pruned until they are unlinked. * But if an O_TMPFILE file is linked into the tmpfs, the * first link must skip that, to get the accounting right. */ if (inode->i_nlink) { ret = shmem_reserve_inode(inode->i_sb, NULL); if (ret) goto out; } ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry); if (ret) { if (inode->i_nlink) shmem_free_inode(inode->i_sb, 0); goto out; } dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inode_inc_iversion(dir); inc_nlink(inode); ihold(inode); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_add(dentry, inode); else d_instantiate(dentry, inode); out: return ret; } static int shmem_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) shmem_free_inode(inode->i_sb, 0); simple_offset_remove(shmem_get_offset_ctx(dir), dentry); dir->i_size -= BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inode_inc_iversion(dir); drop_nlink(inode); dput(dentry); /* Undo the count from "create" - does all the work */ /* * For now, VFS can't deal with case-insensitive negative dentries, so * we invalidate them */ if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_invalidate(dentry); return 0; } static int shmem_rmdir(struct inode *dir, struct dentry *dentry) { if (!simple_empty(dentry)) return -ENOTEMPTY; drop_nlink(d_inode(dentry)); drop_nlink(dir); return shmem_unlink(dir, dentry); } static int shmem_whiteout(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry) { struct dentry *whiteout; int error; whiteout = d_alloc(old_dentry->d_parent, &old_dentry->d_name); if (!whiteout) return -ENOMEM; error = shmem_mknod(idmap, old_dir, whiteout, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV); dput(whiteout); if (error) return error; /* * Cheat and hash the whiteout while the old dentry is still in * place, instead of playing games with FS_RENAME_DOES_D_MOVE. * * d_lookup() will consistently find one of them at this point, * not sure which one, but that isn't even important. */ d_rehash(whiteout); return 0; } /* * The VFS layer already does all the dentry stuff for rename, * we just have to decrement the usage count for the target if * it exists so that the VFS layer correctly free's it when it * gets overwritten. */ static int shmem_rename2(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct inode *inode = d_inode(old_dentry); int they_are_dirs = S_ISDIR(inode->i_mode); int error; if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT)) return -EINVAL; if (flags & RENAME_EXCHANGE) return simple_offset_rename_exchange(old_dir, old_dentry, new_dir, new_dentry); if (!simple_empty(new_dentry)) return -ENOTEMPTY; if (flags & RENAME_WHITEOUT) { error = shmem_whiteout(idmap, old_dir, old_dentry); if (error) return error; } error = simple_offset_rename(old_dir, old_dentry, new_dir, new_dentry); if (error) return error; if (d_really_is_positive(new_dentry)) { (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) { drop_nlink(d_inode(new_dentry)); drop_nlink(old_dir); } } else if (they_are_dirs) { drop_nlink(old_dir); inc_nlink(new_dir); } old_dir->i_size -= BOGO_DIRENT_SIZE; new_dir->i_size += BOGO_DIRENT_SIZE; simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry); inode_inc_iversion(old_dir); inode_inc_iversion(new_dir); return 0; } static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) { int error; int len; struct inode *inode; struct folio *folio; char *link; len = strlen(symname) + 1; if (len > PAGE_SIZE) return -ENAMETOOLONG; inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0, VM_NORESERVE); if (IS_ERR(inode)) return PTR_ERR(inode); error = security_inode_init_security(inode, dir, &dentry->d_name, shmem_initxattrs, NULL); if (error && error != -EOPNOTSUPP) goto out_iput; error = simple_offset_add(shmem_get_offset_ctx(dir), dentry); if (error) goto out_iput; inode->i_size = len-1; if (len <= SHORT_SYMLINK_LEN) { link = kmemdup(symname, len, GFP_KERNEL); if (!link) { error = -ENOMEM; goto out_remove_offset; } inode->i_op = &shmem_short_symlink_operations; inode_set_cached_link(inode, link, len - 1); } else { inode_nohighmem(inode); inode->i_mapping->a_ops = &shmem_aops; error = shmem_get_folio(inode, 0, 0, &folio, SGP_WRITE); if (error) goto out_remove_offset; inode->i_op = &shmem_symlink_inode_operations; memcpy(folio_address(folio), symname, len); folio_mark_uptodate(folio); folio_mark_dirty(folio); folio_unlock(folio); folio_put(folio); } dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) d_add(dentry, inode); else d_instantiate(dentry, inode); dget(dentry); return 0; out_remove_offset: simple_offset_remove(shmem_get_offset_ctx(dir), dentry); out_iput: iput(inode); return error; } static void shmem_put_link(void *arg) { folio_mark_accessed(arg); folio_put(arg); } static const char *shmem_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct folio *folio = NULL; int error; if (!dentry) { folio = filemap_get_folio(inode->i_mapping, 0); if (IS_ERR(folio)) return ERR_PTR(-ECHILD); if (PageHWPoison(folio_page(folio, 0)) || !folio_test_uptodate(folio)) { folio_put(folio); return ERR_PTR(-ECHILD); } } else { error = shmem_get_folio(inode, 0, 0, &folio, SGP_READ); if (error) return ERR_PTR(error); if (!folio) return ERR_PTR(-ECHILD); if (PageHWPoison(folio_page(folio, 0))) { folio_unlock(folio); folio_put(folio); return ERR_PTR(-ECHILD); } folio_unlock(folio); } set_delayed_call(done, shmem_put_link, folio); return folio_address(folio); } #ifdef CONFIG_TMPFS_XATTR static int shmem_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); fileattr_fill_flags(fa, info->fsflags & SHMEM_FL_USER_VISIBLE); return 0; } static int shmem_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct shmem_inode_info *info = SHMEM_I(inode); int ret, flags; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; if (fa->flags & ~SHMEM_FL_USER_MODIFIABLE) return -EOPNOTSUPP; flags = (info->fsflags & ~SHMEM_FL_USER_MODIFIABLE) | (fa->flags & SHMEM_FL_USER_MODIFIABLE); ret = shmem_set_inode_flags(inode, flags, dentry); if (ret) return ret; info->fsflags = flags; inode_set_ctime_current(inode); inode_inc_iversion(inode); return 0; } /* * Superblocks without xattr inode operations may get some security.* xattr * support from the LSM "for free". As soon as we have any other xattrs * like ACLs, we also need to implement the security.* handlers at * filesystem level, though. */ /* * Callback for security_inode_init_security() for acquiring xattrs. */ static int shmem_initxattrs(struct inode *inode, const struct xattr *xattr_array, void *fs_info) { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); const struct xattr *xattr; struct simple_xattr *new_xattr; size_t ispace = 0; size_t len; if (sbinfo->max_inodes) { for (xattr = xattr_array; xattr->name != NULL; xattr++) { ispace += simple_xattr_space(xattr->name, xattr->value_len + XATTR_SECURITY_PREFIX_LEN); } if (ispace) { raw_spin_lock(&sbinfo->stat_lock); if (sbinfo->free_ispace < ispace) ispace = 0; else sbinfo->free_ispace -= ispace; raw_spin_unlock(&sbinfo->stat_lock); if (!ispace) return -ENOSPC; } } for (xattr = xattr_array; xattr->name != NULL; xattr++) { new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len); if (!new_xattr) break; len = strlen(xattr->name) + 1; new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len, GFP_KERNEL_ACCOUNT); if (!new_xattr->name) { kvfree(new_xattr); break; } memcpy(new_xattr->name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); memcpy(new_xattr->name + XATTR_SECURITY_PREFIX_LEN, xattr->name, len); simple_xattr_add(&info->xattrs, new_xattr); } if (xattr->name != NULL) { if (ispace) { raw_spin_lock(&sbinfo->stat_lock); sbinfo->free_ispace += ispace; raw_spin_unlock(&sbinfo->stat_lock); } simple_xattrs_free(&info->xattrs, NULL); return -ENOMEM; } return 0; } static int shmem_xattr_handler_get(const struct xattr_handler *handler, struct dentry *unused, struct inode *inode, const char *name, void *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(inode); name = xattr_full_name(handler, name); return simple_xattr_get(&info->xattrs, name, buffer, size); } static int shmem_xattr_handler_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb); struct simple_xattr *old_xattr; size_t ispace = 0; name = xattr_full_name(handler, name); if (value && sbinfo->max_inodes) { ispace = simple_xattr_space(name, size); raw_spin_lock(&sbinfo->stat_lock); if (sbinfo->free_ispace < ispace) ispace = 0; else sbinfo->free_ispace -= ispace; raw_spin_unlock(&sbinfo->stat_lock); if (!ispace) return -ENOSPC; } old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags); if (!IS_ERR(old_xattr)) { ispace = 0; if (old_xattr && sbinfo->max_inodes) ispace = simple_xattr_space(old_xattr->name, old_xattr->size); simple_xattr_free(old_xattr); old_xattr = NULL; inode_set_ctime_current(inode); inode_inc_iversion(inode); } if (ispace) { raw_spin_lock(&sbinfo->stat_lock); sbinfo->free_ispace += ispace; raw_spin_unlock(&sbinfo->stat_lock); } return PTR_ERR(old_xattr); } static const struct xattr_handler shmem_security_xattr_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = shmem_xattr_handler_get, .set = shmem_xattr_handler_set, }; static const struct xattr_handler shmem_trusted_xattr_handler = { .prefix = XATTR_TRUSTED_PREFIX, .get = shmem_xattr_handler_get, .set = shmem_xattr_handler_set, }; static const struct xattr_handler shmem_user_xattr_handler = { .prefix = XATTR_USER_PREFIX, .get = shmem_xattr_handler_get, .set = shmem_xattr_handler_set, }; static const struct xattr_handler * const shmem_xattr_handlers[] = { &shmem_security_xattr_handler, &shmem_trusted_xattr_handler, &shmem_user_xattr_handler, NULL }; static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size) { struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); return simple_xattr_list(d_inode(dentry), &info->xattrs, buffer, size); } #endif /* CONFIG_TMPFS_XATTR */ static const struct inode_operations shmem_short_symlink_operations = { .getattr = shmem_getattr, .setattr = shmem_setattr, .get_link = simple_get_link, #ifdef CONFIG_TMPFS_XATTR .listxattr = shmem_listxattr, #endif }; static const struct inode_operations shmem_symlink_inode_operations = { .getattr = shmem_getattr, .setattr = shmem_setattr, .get_link = shmem_get_link, #ifdef CONFIG_TMPFS_XATTR .listxattr = shmem_listxattr, #endif }; static struct dentry *shmem_get_parent(struct dentry *child) { return ERR_PTR(-ESTALE); } static int shmem_match(struct inode *ino, void *vfh) { __u32 *fh = vfh; __u64 inum = fh[2]; inum = (inum << 32) | fh[1]; return ino->i_ino == inum && fh[0] == ino->i_generation; } /* Find any alias of inode, but prefer a hashed alias */ static struct dentry *shmem_find_alias(struct inode *inode) { struct dentry *alias = d_find_alias(inode); return alias ?: d_find_any_alias(inode); } static struct dentry *shmem_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type) { struct inode *inode; struct dentry *dentry = NULL; u64 inum; if (fh_len < 3) return NULL; inum = fid->raw[2]; inum = (inum << 32) | fid->raw[1]; inode = ilookup5(sb, (unsigned long)(inum + fid->raw[0]), shmem_match, fid->raw); if (inode) { dentry = shmem_find_alias(inode); iput(inode); } return dentry; } static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, struct inode *parent) { if (*len < 3) { *len = 3; return FILEID_INVALID; } if (inode_unhashed(inode)) { /* Unfortunately insert_inode_hash is not idempotent, * so as we hash inodes here rather than at creation * time, we need a lock to ensure we only try * to do it once */ static DEFINE_SPINLOCK(lock); spin_lock(&lock); if (inode_unhashed(inode)) __insert_inode_hash(inode, inode->i_ino + inode->i_generation); spin_unlock(&lock); } fh[0] = inode->i_generation; fh[1] = inode->i_ino; fh[2] = ((__u64)inode->i_ino) >> 32; *len = 3; return 1; } static const struct export_operations shmem_export_ops = { .get_parent = shmem_get_parent, .encode_fh = shmem_encode_fh, .fh_to_dentry = shmem_fh_to_dentry, }; enum shmem_param { Opt_gid, Opt_huge, Opt_mode, Opt_mpol, Opt_nr_blocks, Opt_nr_inodes, Opt_size, Opt_uid, Opt_inode32, Opt_inode64, Opt_noswap, Opt_quota, Opt_usrquota, Opt_grpquota, Opt_usrquota_block_hardlimit, Opt_usrquota_inode_hardlimit, Opt_grpquota_block_hardlimit, Opt_grpquota_inode_hardlimit, Opt_casefold_version, Opt_casefold, Opt_strict_encoding, }; static const struct constant_table shmem_param_enums_huge[] = { {"never", SHMEM_HUGE_NEVER }, {"always", SHMEM_HUGE_ALWAYS }, {"within_size", SHMEM_HUGE_WITHIN_SIZE }, {"advise", SHMEM_HUGE_ADVISE }, {} }; const struct fs_parameter_spec shmem_fs_parameters[] = { fsparam_gid ("gid", Opt_gid), fsparam_enum ("huge", Opt_huge, shmem_param_enums_huge), fsparam_u32oct("mode", Opt_mode), fsparam_string("mpol", Opt_mpol), fsparam_string("nr_blocks", Opt_nr_blocks), fsparam_string("nr_inodes", Opt_nr_inodes), fsparam_string("size", Opt_size), fsparam_uid ("uid", Opt_uid), fsparam_flag ("inode32", Opt_inode32), fsparam_flag ("inode64", Opt_inode64), fsparam_flag ("noswap", Opt_noswap), #ifdef CONFIG_TMPFS_QUOTA fsparam_flag ("quota", Opt_quota), fsparam_flag ("usrquota", Opt_usrquota), fsparam_flag ("grpquota", Opt_grpquota), fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit), fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit), fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit), fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit), #endif fsparam_string("casefold", Opt_casefold_version), fsparam_flag ("casefold", Opt_casefold), fsparam_flag ("strict_encoding", Opt_strict_encoding), {} }; #if IS_ENABLED(CONFIG_UNICODE) static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param, bool latest_version) { struct shmem_options *ctx = fc->fs_private; int version = UTF8_LATEST; struct unicode_map *encoding; char *version_str = param->string + 5; if (!latest_version) { if (strncmp(param->string, "utf8-", 5)) return invalfc(fc, "Only UTF-8 encodings are supported " "in the format: utf8-<version number>"); version = utf8_parse_version(version_str); if (version < 0) return invalfc(fc, "Invalid UTF-8 version: %s", version_str); } encoding = utf8_load(version); if (IS_ERR(encoding)) { return invalfc(fc, "Failed loading UTF-8 version: utf8-%u.%u.%u\n", unicode_major(version), unicode_minor(version), unicode_rev(version)); } pr_info("tmpfs: Using encoding : utf8-%u.%u.%u\n", unicode_major(version), unicode_minor(version), unicode_rev(version)); ctx->encoding = encoding; return 0; } #else static int shmem_parse_opt_casefold(struct fs_context *fc, struct fs_parameter *param, bool latest_version) { return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n"); } #endif static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param) { struct shmem_options *ctx = fc->fs_private; struct fs_parse_result result; unsigned long long size; char *rest; int opt; kuid_t kuid; kgid_t kgid; opt = fs_parse(fc, shmem_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_size: size = memparse(param->string, &rest); if (*rest == '%') { size <<= PAGE_SHIFT; size *= totalram_pages(); do_div(size, 100); rest++; } if (*rest) goto bad_value; ctx->blocks = DIV_ROUND_UP(size, PAGE_SIZE); ctx->seen |= SHMEM_SEEN_BLOCKS; break; case Opt_nr_blocks: ctx->blocks = memparse(param->string, &rest); if (*rest || ctx->blocks > LONG_MAX) goto bad_value; ctx->seen |= SHMEM_SEEN_BLOCKS; break; case Opt_nr_inodes: ctx->inodes = memparse(param->string, &rest); if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE) goto bad_value; ctx->seen |= SHMEM_SEEN_INODES; break; case Opt_mode: ctx->mode = result.uint_32 & 07777; break; case Opt_uid: kuid = result.uid; /* * The requested uid must be representable in the * filesystem's idmapping. */ if (!kuid_has_mapping(fc->user_ns, kuid)) goto bad_value; ctx->uid = kuid; break; case Opt_gid: kgid = result.gid; /* * The requested gid must be representable in the * filesystem's idmapping. */ if (!kgid_has_mapping(fc->user_ns, kgid)) goto bad_value; ctx->gid = kgid; break; case Opt_huge: ctx->huge = result.uint_32; if (ctx->huge != SHMEM_HUGE_NEVER && !(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && has_transparent_hugepage())) goto unsupported_parameter; ctx->seen |= SHMEM_SEEN_HUGE; break; case Opt_mpol: if (IS_ENABLED(CONFIG_NUMA)) { mpol_put(ctx->mpol); ctx->mpol = NULL; if (mpol_parse_str(param->string, &ctx->mpol)) goto bad_value; break; } goto unsupported_parameter; case Opt_inode32: ctx->full_inums = false; ctx->seen |= SHMEM_SEEN_INUMS; break; case Opt_inode64: if (sizeof(ino_t) < 8) { return invalfc(fc, "Cannot use inode64 with <64bit inums in kernel\n"); } ctx->full_inums = true; ctx->seen |= SHMEM_SEEN_INUMS; break; case Opt_noswap: if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) { return invalfc(fc, "Turning off swap in unprivileged tmpfs mounts unsupported"); } ctx->noswap = true; ctx->seen |= SHMEM_SEEN_NOSWAP; break; case Opt_quota: if (fc->user_ns != &init_user_ns) return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); ctx->seen |= SHMEM_SEEN_QUOTA; ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP); break; case Opt_usrquota: if (fc->user_ns != &init_user_ns) return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); ctx->seen |= SHMEM_SEEN_QUOTA; ctx->quota_types |= QTYPE_MASK_USR; break; case Opt_grpquota: if (fc->user_ns != &init_user_ns) return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported"); ctx->seen |= SHMEM_SEEN_QUOTA; ctx->quota_types |= QTYPE_MASK_GRP; break; case Opt_usrquota_block_hardlimit: size = memparse(param->string, &rest); if (*rest || !size) goto bad_value; if (size > SHMEM_QUOTA_MAX_SPC_LIMIT) return invalfc(fc, "User quota block hardlimit too large."); ctx->qlimits.usrquota_bhardlimit = size; break; case Opt_grpquota_block_hardlimit: size = memparse(param->string, &rest); if (*rest || !size) goto bad_value; if (size > SHMEM_QUOTA_MAX_SPC_LIMIT) return invalfc(fc, "Group quota block hardlimit too large."); ctx->qlimits.grpquota_bhardlimit = size; break; case Opt_usrquota_inode_hardlimit: size = memparse(param->string, &rest); if (*rest || !size) goto bad_value; if (size > SHMEM_QUOTA_MAX_INO_LIMIT) return invalfc(fc, "User quota inode hardlimit too large."); ctx->qlimits.usrquota_ihardlimit = size; break; case Opt_grpquota_inode_hardlimit: size = memparse(param->string, &rest); if (*rest || !size) goto bad_value; if (size > SHMEM_QUOTA_MAX_INO_LIMIT) return invalfc(fc, "Group quota inode hardlimit too large."); ctx->qlimits.grpquota_ihardlimit = size; break; case Opt_casefold_version: return shmem_parse_opt_casefold(fc, param, false); case Opt_casefold: return shmem_parse_opt_casefold(fc, param, true); case Opt_strict_encoding: #if IS_ENABLED(CONFIG_UNICODE) ctx->strict_encoding = true; break; #else return invalfc(fc, "tmpfs: Kernel not built with CONFIG_UNICODE\n"); #endif } return 0; unsupported_parameter: return invalfc(fc, "Unsupported parameter '%s'", param->key); bad_value: return invalfc(fc, "Bad value for '%s'", param->key); } static char *shmem_next_opt(char **s) { char *sbegin = *s; char *p; if (sbegin == NULL) return NULL; /* * NUL-terminate this option: unfortunately, * mount options form a comma-separated list, * but mpol's nodelist may also contain commas. */ for (;;) { p = strchr(*s, ','); if (p == NULL) break; *s = p + 1; if (!isdigit(*(p+1))) { *p = '\0'; return sbegin; } } *s = NULL; return sbegin; } static int shmem_parse_monolithic(struct fs_context *fc, void *data) { return vfs_parse_monolithic_sep(fc, data, shmem_next_opt); } /* * Reconfigure a shmem filesystem. */ static int shmem_reconfigure(struct fs_context *fc) { struct shmem_options *ctx = fc->fs_private; struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb); unsigned long used_isp; struct mempolicy *mpol = NULL; const char *err; raw_spin_lock(&sbinfo->stat_lock); used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace; if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) { if (!sbinfo->max_blocks) { err = "Cannot retroactively limit size"; goto out; } if (percpu_counter_compare(&sbinfo->used_blocks, ctx->blocks) > 0) { err = "Too small a size for current use"; goto out; } } if ((ctx->seen & SHMEM_SEEN_INODES) && ctx->inodes) { if (!sbinfo->max_inodes) { err = "Cannot retroactively limit inodes"; goto out; } if (ctx->inodes * BOGO_INODE_SIZE < used_isp) { err = "Too few inodes for current use"; goto out; } } if ((ctx->seen & SHMEM_SEEN_INUMS) && !ctx->full_inums && sbinfo->next_ino > UINT_MAX) { err = "Current inum too high to switch to 32-bit inums"; goto out; } if ((ctx->seen & SHMEM_SEEN_NOSWAP) && ctx->noswap && !sbinfo->noswap) { err = "Cannot disable swap on remount"; goto out; } if (!(ctx->seen & SHMEM_SEEN_NOSWAP) && !ctx->noswap && sbinfo->noswap) { err = "Cannot enable swap on remount if it was disabled on first mount"; goto out; } if (ctx->seen & SHMEM_SEEN_QUOTA && !sb_any_quota_loaded(fc->root->d_sb)) { err = "Cannot enable quota on remount"; goto out; } #ifdef CONFIG_TMPFS_QUOTA #define CHANGED_LIMIT(name) \ (ctx->qlimits.name## hardlimit && \ (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit)) if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) || CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) { err = "Cannot change global quota limit on remount"; goto out; } #endif /* CONFIG_TMPFS_QUOTA */ if (ctx->seen & SHMEM_SEEN_HUGE) sbinfo->huge = ctx->huge; if (ctx->seen & SHMEM_SEEN_INUMS) sbinfo->full_inums = ctx->full_inums; if (ctx->seen & SHMEM_SEEN_BLOCKS) sbinfo->max_blocks = ctx->blocks; if (ctx->seen & SHMEM_SEEN_INODES) { sbinfo->max_inodes = ctx->inodes; sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp; } /* * Preserve previous mempolicy unless mpol remount option was specified. */ if (ctx->mpol) { mpol = sbinfo->mpol; sbinfo->mpol = ctx->mpol; /* transfers initial ref */ ctx->mpol = NULL; } if (ctx->noswap) sbinfo->noswap = true; raw_spin_unlock(&sbinfo->stat_lock); mpol_put(mpol); return 0; out: raw_spin_unlock(&sbinfo->stat_lock); return invalfc(fc, "%s", err); } static int shmem_show_options(struct seq_file *seq, struct dentry *root) { struct shmem_sb_info *sbinfo = SHMEM_SB(root->d_sb); struct mempolicy *mpol; if (sbinfo->max_blocks != shmem_default_max_blocks()) seq_printf(seq, ",size=%luk", K(sbinfo->max_blocks)); if (sbinfo->max_inodes != shmem_default_max_inodes()) seq_printf(seq, ",nr_inodes=%lu", sbinfo->max_inodes); if (sbinfo->mode != (0777 | S_ISVTX)) seq_printf(seq, ",mode=%03ho", sbinfo->mode); if (!uid_eq(sbinfo->uid, GLOBAL_ROOT_UID)) seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, sbinfo->uid)); if (!gid_eq(sbinfo->gid, GLOBAL_ROOT_GID)) seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbinfo->gid)); /* * Showing inode{64,32} might be useful even if it's the system default, * since then people don't have to resort to checking both here and * /proc/config.gz to confirm 64-bit inums were successfully applied * (which may not even exist if IKCONFIG_PROC isn't enabled). * * We hide it when inode64 isn't the default and we are using 32-bit * inodes, since that probably just means the feature isn't even under * consideration. * * As such: * * +-----------------+-----------------+ * | TMPFS_INODE64=y | TMPFS_INODE64=n | * +------------------+-----------------+-----------------+ * | full_inums=true | show | show | * | full_inums=false | show | hide | * +------------------+-----------------+-----------------+ * */ if (IS_ENABLED(CONFIG_TMPFS_INODE64) || sbinfo->full_inums) seq_printf(seq, ",inode%d", (sbinfo->full_inums ? 64 : 32)); #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* Rightly or wrongly, show huge mount option unmasked by shmem_huge */ if (sbinfo->huge) seq_printf(seq, ",huge=%s", shmem_format_huge(sbinfo->huge)); #endif mpol = shmem_get_sbmpol(sbinfo); shmem_show_mpol(seq, mpol); mpol_put(mpol); if (sbinfo->noswap) seq_printf(seq, ",noswap"); #ifdef CONFIG_TMPFS_QUOTA if (sb_has_quota_active(root->d_sb, USRQUOTA)) seq_printf(seq, ",usrquota"); if (sb_has_quota_active(root->d_sb, GRPQUOTA)) seq_printf(seq, ",grpquota"); if (sbinfo->qlimits.usrquota_bhardlimit) seq_printf(seq, ",usrquota_block_hardlimit=%lld", sbinfo->qlimits.usrquota_bhardlimit); if (sbinfo->qlimits.grpquota_bhardlimit) seq_printf(seq, ",grpquota_block_hardlimit=%lld", sbinfo->qlimits.grpquota_bhardlimit); if (sbinfo->qlimits.usrquota_ihardlimit) seq_printf(seq, ",usrquota_inode_hardlimit=%lld", sbinfo->qlimits.usrquota_ihardlimit); if (sbinfo->qlimits.grpquota_ihardlimit) seq_printf(seq, ",grpquota_inode_hardlimit=%lld", sbinfo->qlimits.grpquota_ihardlimit); #endif return 0; } #endif /* CONFIG_TMPFS */ static void shmem_put_super(struct super_block *sb) { struct shmem_sb_info *sbinfo = SHMEM_SB(sb); #if IS_ENABLED(CONFIG_UNICODE) if (sb->s_encoding) utf8_unload(sb->s_encoding); #endif #ifdef CONFIG_TMPFS_QUOTA shmem_disable_quotas(sb); #endif free_percpu(sbinfo->ino_batch); percpu_counter_destroy(&sbinfo->used_blocks); mpol_put(sbinfo->mpol); kfree(sbinfo); sb->s_fs_info = NULL; } #if IS_ENABLED(CONFIG_UNICODE) && defined(CONFIG_TMPFS) static const struct dentry_operations shmem_ci_dentry_ops = { .d_hash = generic_ci_d_hash, .d_compare = generic_ci_d_compare, .d_delete = always_delete_dentry, }; #endif static int shmem_fill_super(struct super_block *sb, struct fs_context *fc) { struct shmem_options *ctx = fc->fs_private; struct inode *inode; struct shmem_sb_info *sbinfo; int error = -ENOMEM; /* Round up to L1_CACHE_BYTES to resist false sharing */ sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info), L1_CACHE_BYTES), GFP_KERNEL); if (!sbinfo) return error; sb->s_fs_info = sbinfo; #ifdef CONFIG_TMPFS /* * Per default we only allow half of the physical ram per * tmpfs instance, limiting inodes to one per page of lowmem; * but the internal instance is left unlimited. */ if (!(sb->s_flags & SB_KERNMOUNT)) { if (!(ctx->seen & SHMEM_SEEN_BLOCKS)) ctx->blocks = shmem_default_max_blocks(); if (!(ctx->seen & SHMEM_SEEN_INODES)) ctx->inodes = shmem_default_max_inodes(); if (!(ctx->seen & SHMEM_SEEN_INUMS)) ctx->full_inums = IS_ENABLED(CONFIG_TMPFS_INODE64); sbinfo->noswap = ctx->noswap; } else { sb->s_flags |= SB_NOUSER; } sb->s_export_op = &shmem_export_ops; sb->s_flags |= SB_NOSEC | SB_I_VERSION; #if IS_ENABLED(CONFIG_UNICODE) if (!ctx->encoding && ctx->strict_encoding) { pr_err("tmpfs: strict_encoding option without encoding is forbidden\n"); error = -EINVAL; goto failed; } if (ctx->encoding) { sb->s_encoding = ctx->encoding; sb->s_d_op = &shmem_ci_dentry_ops; if (ctx->strict_encoding) sb->s_encoding_flags = SB_ENC_STRICT_MODE_FL; } #endif #else sb->s_flags |= SB_NOUSER; #endif /* CONFIG_TMPFS */ sbinfo->max_blocks = ctx->blocks; sbinfo->max_inodes = ctx->inodes; sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE; if (sb->s_flags & SB_KERNMOUNT) { sbinfo->ino_batch = alloc_percpu(ino_t); if (!sbinfo->ino_batch) goto failed; } sbinfo->uid = ctx->uid; sbinfo->gid = ctx->gid; sbinfo->full_inums = ctx->full_inums; sbinfo->mode = ctx->mode; #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (ctx->seen & SHMEM_SEEN_HUGE) sbinfo->huge = ctx->huge; else sbinfo->huge = tmpfs_huge; #endif sbinfo->mpol = ctx->mpol; ctx->mpol = NULL; raw_spin_lock_init(&sbinfo->stat_lock); if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL)) goto failed; spin_lock_init(&sbinfo->shrinklist_lock); INIT_LIST_HEAD(&sbinfo->shrinklist); sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; sb->s_blocksize_bits = PAGE_SHIFT; sb->s_magic = TMPFS_MAGIC; sb->s_op = &shmem_ops; sb->s_time_gran = 1; #ifdef CONFIG_TMPFS_XATTR sb->s_xattr = shmem_xattr_handlers; #endif #ifdef CONFIG_TMPFS_POSIX_ACL sb->s_flags |= SB_POSIXACL; #endif uuid_t uuid; uuid_gen(&uuid); super_set_uuid(sb, uuid.b, sizeof(uuid)); #ifdef CONFIG_TMPFS_QUOTA if (ctx->seen & SHMEM_SEEN_QUOTA) { sb->dq_op = &shmem_quota_operations; sb->s_qcop = &dquot_quotactl_sysfile_ops; sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP; /* Copy the default limits from ctx into sbinfo */ memcpy(&sbinfo->qlimits, &ctx->qlimits, sizeof(struct shmem_quota_limits)); if (shmem_enable_quotas(sb, ctx->quota_types)) goto failed; } #endif /* CONFIG_TMPFS_QUOTA */ inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0, VM_NORESERVE); if (IS_ERR(inode)) { error = PTR_ERR(inode); goto failed; } inode->i_uid = sbinfo->uid; inode->i_gid = sbinfo->gid; sb->s_root = d_make_root(inode); if (!sb->s_root) goto failed; return 0; failed: shmem_put_super(sb); return error; } static int shmem_get_tree(struct fs_context *fc) { return get_tree_nodev(fc, shmem_fill_super); } static void shmem_free_fc(struct fs_context *fc) { struct shmem_options *ctx = fc->fs_private; if (ctx) { mpol_put(ctx->mpol); kfree(ctx); } } static const struct fs_context_operations shmem_fs_context_ops = { .free = shmem_free_fc, .get_tree = shmem_get_tree, #ifdef CONFIG_TMPFS .parse_monolithic = shmem_parse_monolithic, .parse_param = shmem_parse_one, .reconfigure = shmem_reconfigure, #endif }; static struct kmem_cache *shmem_inode_cachep __ro_after_init; static struct inode *shmem_alloc_inode(struct super_block *sb) { struct shmem_inode_info *info; info = alloc_inode_sb(sb, shmem_inode_cachep, GFP_KERNEL); if (!info) return NULL; return &info->vfs_inode; } static void shmem_free_in_core_inode(struct inode *inode) { if (S_ISLNK(inode->i_mode)) kfree(inode->i_link); kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } static void shmem_destroy_inode(struct inode *inode) { if (S_ISREG(inode->i_mode)) mpol_free_shared_policy(&SHMEM_I(inode)->policy); if (S_ISDIR(inode->i_mode)) simple_offset_destroy(shmem_get_offset_ctx(inode)); } static void shmem_init_inode(void *foo) { struct shmem_inode_info *info = foo; inode_init_once(&info->vfs_inode); } static void __init shmem_init_inodecache(void) { shmem_inode_cachep = kmem_cache_create("shmem_inode_cache", sizeof(struct shmem_inode_info), 0, SLAB_PANIC|SLAB_ACCOUNT, shmem_init_inode); } static void __init shmem_destroy_inodecache(void) { kmem_cache_destroy(shmem_inode_cachep); } /* Keep the page in page cache instead of truncating it */ static int shmem_error_remove_folio(struct address_space *mapping, struct folio *folio) { return 0; } static const struct address_space_operations shmem_aops = { .writepage = shmem_writepage, .dirty_folio = noop_dirty_folio, #ifdef CONFIG_TMPFS .write_begin = shmem_write_begin, .write_end = shmem_write_end, #endif #ifdef CONFIG_MIGRATION .migrate_folio = migrate_folio, #endif .error_remove_folio = shmem_error_remove_folio, }; static const struct file_operations shmem_file_operations = { .mmap = shmem_mmap, .open = shmem_file_open, .get_unmapped_area = shmem_get_unmapped_area, #ifdef CONFIG_TMPFS .llseek = shmem_file_llseek, .read_iter = shmem_file_read_iter, .write_iter = shmem_file_write_iter, .fsync = noop_fsync, .splice_read = shmem_file_splice_read, .splice_write = iter_file_splice_write, .fallocate = shmem_fallocate, #endif }; static const struct inode_operations shmem_inode_operations = { .getattr = shmem_getattr, .setattr = shmem_setattr, #ifdef CONFIG_TMPFS_XATTR .listxattr = shmem_listxattr, .set_acl = simple_set_acl, .fileattr_get = shmem_fileattr_get, .fileattr_set = shmem_fileattr_set, #endif }; static const struct inode_operations shmem_dir_inode_operations = { #ifdef CONFIG_TMPFS .getattr = shmem_getattr, .create = shmem_create, .lookup = simple_lookup, .link = shmem_link, .unlink = shmem_unlink, .symlink = shmem_symlink, .mkdir = shmem_mkdir, .rmdir = shmem_rmdir, .mknod = shmem_mknod, .rename = shmem_rename2, .tmpfile = shmem_tmpfile, .get_offset_ctx = shmem_get_offset_ctx, #endif #ifdef CONFIG_TMPFS_XATTR .listxattr = shmem_listxattr, .fileattr_get = shmem_fileattr_get, .fileattr_set = shmem_fileattr_set, #endif #ifdef CONFIG_TMPFS_POSIX_ACL .setattr = shmem_setattr, .set_acl = simple_set_acl, #endif }; static const struct inode_operations shmem_special_inode_operations = { .getattr = shmem_getattr, #ifdef CONFIG_TMPFS_XATTR .listxattr = shmem_listxattr, #endif #ifdef CONFIG_TMPFS_POSIX_ACL .setattr = shmem_setattr, .set_acl = simple_set_acl, #endif }; static const struct super_operations shmem_ops = { .alloc_inode = shmem_alloc_inode, .free_inode = shmem_free_in_core_inode, .destroy_inode = shmem_destroy_inode, #ifdef CONFIG_TMPFS .statfs = shmem_statfs, .show_options = shmem_show_options, #endif #ifdef CONFIG_TMPFS_QUOTA .get_dquots = shmem_get_dquots, #endif .evict_inode = shmem_evict_inode, .drop_inode = generic_delete_inode, .put_super = shmem_put_super, #ifdef CONFIG_TRANSPARENT_HUGEPAGE .nr_cached_objects = shmem_unused_huge_count, .free_cached_objects = shmem_unused_huge_scan, #endif }; static const struct vm_operations_struct shmem_vm_ops = { .fault = shmem_fault, .map_pages = filemap_map_pages, #ifdef CONFIG_NUMA .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif }; static const struct vm_operations_struct shmem_anon_vm_ops = { .fault = shmem_fault, .map_pages = filemap_map_pages, #ifdef CONFIG_NUMA .set_policy = shmem_set_policy, .get_policy = shmem_get_policy, #endif }; int shmem_init_fs_context(struct fs_context *fc) { struct shmem_options *ctx; ctx = kzalloc(sizeof(struct shmem_options), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->mode = 0777 | S_ISVTX; ctx->uid = current_fsuid(); ctx->gid = current_fsgid(); #if IS_ENABLED(CONFIG_UNICODE) ctx->encoding = NULL; #endif fc->fs_private = ctx; fc->ops = &shmem_fs_context_ops; return 0; } static struct file_system_type shmem_fs_type = { .owner = THIS_MODULE, .name = "tmpfs", .init_fs_context = shmem_init_fs_context, #ifdef CONFIG_TMPFS .parameters = shmem_fs_parameters, #endif .kill_sb = kill_litter_super, .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME, }; #if defined(CONFIG_SYSFS) && defined(CONFIG_TMPFS) #define __INIT_KOBJ_ATTR(_name, _mode, _show, _store) \ { \ .attr = { .name = __stringify(_name), .mode = _mode }, \ .show = _show, \ .store = _store, \ } #define TMPFS_ATTR_W(_name, _store) \ static struct kobj_attribute tmpfs_attr_##_name = \ __INIT_KOBJ_ATTR(_name, 0200, NULL, _store) #define TMPFS_ATTR_RW(_name, _show, _store) \ static struct kobj_attribute tmpfs_attr_##_name = \ __INIT_KOBJ_ATTR(_name, 0644, _show, _store) #define TMPFS_ATTR_RO(_name, _show) \ static struct kobj_attribute tmpfs_attr_##_name = \ __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) #if IS_ENABLED(CONFIG_UNICODE) static ssize_t casefold_show(struct kobject *kobj, struct kobj_attribute *a, char *buf) { return sysfs_emit(buf, "supported\n"); } TMPFS_ATTR_RO(casefold, casefold_show); #endif static struct attribute *tmpfs_attributes[] = { #if IS_ENABLED(CONFIG_UNICODE) &tmpfs_attr_casefold.attr, #endif NULL }; static const struct attribute_group tmpfs_attribute_group = { .attrs = tmpfs_attributes, .name = "features" }; static struct kobject *tmpfs_kobj; static int __init tmpfs_sysfs_init(void) { int ret; tmpfs_kobj = kobject_create_and_add("tmpfs", fs_kobj); if (!tmpfs_kobj) return -ENOMEM; ret = sysfs_create_group(tmpfs_kobj, &tmpfs_attribute_group); if (ret) kobject_put(tmpfs_kobj); return ret; } #endif /* CONFIG_SYSFS && CONFIG_TMPFS */ void __init shmem_init(void) { int error; shmem_init_inodecache(); #ifdef CONFIG_TMPFS_QUOTA register_quota_format(&shmem_quota_format); #endif error = register_filesystem(&shmem_fs_type); if (error) { pr_err("Could not register tmpfs\n"); goto out2; } shm_mnt = kern_mount(&shmem_fs_type); if (IS_ERR(shm_mnt)) { error = PTR_ERR(shm_mnt); pr_err("Could not kern_mount tmpfs\n"); goto out1; } #if defined(CONFIG_SYSFS) && defined(CONFIG_TMPFS) error = tmpfs_sysfs_init(); if (error) { pr_err("Could not init tmpfs sysfs\n"); goto out1; } #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; else shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */ /* * Default to setting PMD-sized THP to inherit the global setting and * disable all other multi-size THPs. */ if (!shmem_orders_configured) huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER); #endif return; out1: unregister_filesystem(&shmem_fs_type); out2: #ifdef CONFIG_TMPFS_QUOTA unregister_quota_format(&shmem_quota_format); #endif shmem_destroy_inodecache(); shm_mnt = ERR_PTR(error); } #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && defined(CONFIG_SYSFS) static ssize_t shmem_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { static const int values[] = { SHMEM_HUGE_ALWAYS, SHMEM_HUGE_WITHIN_SIZE, SHMEM_HUGE_ADVISE, SHMEM_HUGE_NEVER, SHMEM_HUGE_DENY, SHMEM_HUGE_FORCE, }; int len = 0; int i; for (i = 0; i < ARRAY_SIZE(values); i++) { len += sysfs_emit_at(buf, len, shmem_huge == values[i] ? "%s[%s]" : "%s%s", i ? " " : "", shmem_format_huge(values[i])); } len += sysfs_emit_at(buf, len, "\n"); return len; } static ssize_t shmem_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { char tmp[16]; int huge, err; if (count + 1 > sizeof(tmp)) return -EINVAL; memcpy(tmp, buf, count); tmp[count] = '\0'; if (count && tmp[count - 1] == '\n') tmp[count - 1] = '\0'; huge = shmem_parse_huge(tmp); if (huge == -EINVAL) return huge; shmem_huge = huge; if (shmem_huge > SHMEM_HUGE_DENY) SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge; err = start_stop_khugepaged(); return err ? err : count; } struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled); static DEFINE_SPINLOCK(huge_shmem_orders_lock); static ssize_t thpsize_shmem_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { int order = to_thpsize(kobj)->order; const char *output; if (test_bit(order, &huge_shmem_orders_always)) output = "[always] inherit within_size advise never"; else if (test_bit(order, &huge_shmem_orders_inherit)) output = "always [inherit] within_size advise never"; else if (test_bit(order, &huge_shmem_orders_within_size)) output = "always inherit [within_size] advise never"; else if (test_bit(order, &huge_shmem_orders_madvise)) output = "always inherit within_size [advise] never"; else output = "always inherit within_size advise [never]"; return sysfs_emit(buf, "%s\n", output); } static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int order = to_thpsize(kobj)->order; ssize_t ret = count; if (sysfs_streq(buf, "always")) { spin_lock(&huge_shmem_orders_lock); clear_bit(order, &huge_shmem_orders_inherit); clear_bit(order, &huge_shmem_orders_madvise); clear_bit(order, &huge_shmem_orders_within_size); set_bit(order, &huge_shmem_orders_always); spin_unlock(&huge_shmem_orders_lock); } else if (sysfs_streq(buf, "inherit")) { /* Do not override huge allocation policy with non-PMD sized mTHP */ if (shmem_huge == SHMEM_HUGE_FORCE && order != HPAGE_PMD_ORDER) return -EINVAL; spin_lock(&huge_shmem_orders_lock); clear_bit(order, &huge_shmem_orders_always); clear_bit(order, &huge_shmem_orders_madvise); clear_bit(order, &huge_shmem_orders_within_size); set_bit(order, &huge_shmem_orders_inherit); spin_unlock(&huge_shmem_orders_lock); } else if (sysfs_streq(buf, "within_size")) { spin_lock(&huge_shmem_orders_lock); clear_bit(order, &huge_shmem_orders_always); clear_bit(order, &huge_shmem_orders_inherit); clear_bit(order, &huge_shmem_orders_madvise); set_bit(order, &huge_shmem_orders_within_size); spin_unlock(&huge_shmem_orders_lock); } else if (sysfs_streq(buf, "advise")) { spin_lock(&huge_shmem_orders_lock); clear_bit(order, &huge_shmem_orders_always); clear_bit(order, &huge_shmem_orders_inherit); clear_bit(order, &huge_shmem_orders_within_size); set_bit(order, &huge_shmem_orders_madvise); spin_unlock(&huge_shmem_orders_lock); } else if (sysfs_streq(buf, "never")) { spin_lock(&huge_shmem_orders_lock); clear_bit(order, &huge_shmem_orders_always); clear_bit(order, &huge_shmem_orders_inherit); clear_bit(order, &huge_shmem_orders_within_size); clear_bit(order, &huge_shmem_orders_madvise); spin_unlock(&huge_shmem_orders_lock); } else { ret = -EINVAL; } if (ret > 0) { int err = start_stop_khugepaged(); if (err) ret = err; } return ret; } struct kobj_attribute thpsize_shmem_enabled_attr = __ATTR(shmem_enabled, 0644, thpsize_shmem_enabled_show, thpsize_shmem_enabled_store); #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */ #if defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __init setup_transparent_hugepage_shmem(char *str) { int huge; huge = shmem_parse_huge(str); if (huge == -EINVAL) { pr_warn("transparent_hugepage_shmem= cannot parse, ignored\n"); return huge; } shmem_huge = huge; return 1; } __setup("transparent_hugepage_shmem=", setup_transparent_hugepage_shmem); static int __init setup_transparent_hugepage_tmpfs(char *str) { int huge; huge = shmem_parse_huge(str); if (huge < 0) { pr_warn("transparent_hugepage_tmpfs= cannot parse, ignored\n"); return huge; } tmpfs_huge = huge; return 1; } __setup("transparent_hugepage_tmpfs=", setup_transparent_hugepage_tmpfs); static char str_dup[PAGE_SIZE] __initdata; static int __init setup_thp_shmem(char *str) { char *token, *range, *policy, *subtoken; unsigned long always, inherit, madvise, within_size; char *start_size, *end_size; int start, end, nr; char *p; if (!str || strlen(str) + 1 > PAGE_SIZE) goto err; strscpy(str_dup, str); always = huge_shmem_orders_always; inherit = huge_shmem_orders_inherit; madvise = huge_shmem_orders_madvise; within_size = huge_shmem_orders_within_size; p = str_dup; while ((token = strsep(&p, ";")) != NULL) { range = strsep(&token, ":"); policy = token; if (!policy) goto err; while ((subtoken = strsep(&range, ",")) != NULL) { if (strchr(subtoken, '-')) { start_size = strsep(&subtoken, "-"); end_size = subtoken; start = get_order_from_str(start_size, THP_ORDERS_ALL_FILE_DEFAULT); end = get_order_from_str(end_size, THP_ORDERS_ALL_FILE_DEFAULT); } else { start_size = end_size = subtoken; start = end = get_order_from_str(subtoken, THP_ORDERS_ALL_FILE_DEFAULT); } if (start == -EINVAL) { pr_err("invalid size %s in thp_shmem boot parameter\n", start_size); goto err; } if (end == -EINVAL) { pr_err("invalid size %s in thp_shmem boot parameter\n", end_size); goto err; } if (start < 0 || end < 0 || start > end) goto err; nr = end - start + 1; if (!strcmp(policy, "always")) { bitmap_set(&always, start, nr); bitmap_clear(&inherit, start, nr); bitmap_clear(&madvise, start, nr); bitmap_clear(&within_size, start, nr); } else if (!strcmp(policy, "advise")) { bitmap_set(&madvise, start, nr); bitmap_clear(&inherit, start, nr); bitmap_clear(&always, start, nr); bitmap_clear(&within_size, start, nr); } else if (!strcmp(policy, "inherit")) { bitmap_set(&inherit, start, nr); bitmap_clear(&madvise, start, nr); bitmap_clear(&always, start, nr); bitmap_clear(&within_size, start, nr); } else if (!strcmp(policy, "within_size")) { bitmap_set(&within_size, start, nr); bitmap_clear(&inherit, start, nr); bitmap_clear(&madvise, start, nr); bitmap_clear(&always, start, nr); } else if (!strcmp(policy, "never")) { bitmap_clear(&inherit, start, nr); bitmap_clear(&madvise, start, nr); bitmap_clear(&always, start, nr); bitmap_clear(&within_size, start, nr); } else { pr_err("invalid policy %s in thp_shmem boot parameter\n", policy); goto err; } } } huge_shmem_orders_always = always; huge_shmem_orders_madvise = madvise; huge_shmem_orders_inherit = inherit; huge_shmem_orders_within_size = within_size; shmem_orders_configured = true; return 1; err: pr_warn("thp_shmem=%s: error parsing string, ignoring setting\n", str); return 0; } __setup("thp_shmem=", setup_thp_shmem); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #else /* !CONFIG_SHMEM */ /* * tiny-shmem: simple shmemfs and tmpfs using ramfs code * * This is intended for small system where the benefits of the full * shmem code (swap-backed and resource-limited) are outweighed by * their complexity. On systems without swap this code should be * effectively equivalent, but much lighter weight. */ static struct file_system_type shmem_fs_type = { .name = "tmpfs", .init_fs_context = ramfs_init_fs_context, .parameters = ramfs_fs_parameters, .kill_sb = ramfs_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; void __init shmem_init(void) { BUG_ON(register_filesystem(&shmem_fs_type) != 0); shm_mnt = kern_mount(&shmem_fs_type); BUG_ON(IS_ERR(shm_mnt)); } int shmem_unuse(unsigned int type) { return 0; } int shmem_lock(struct file *file, int lock, struct ucounts *ucounts) { return 0; } void shmem_unlock_mapping(struct address_space *mapping) { } #ifdef CONFIG_MMU unsigned long shmem_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); } #endif void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) { truncate_inode_pages_range(inode->i_mapping, lstart, lend); } EXPORT_SYMBOL_GPL(shmem_truncate_range); #define shmem_vm_ops generic_file_vm_ops #define shmem_anon_vm_ops generic_file_vm_ops #define shmem_file_operations ramfs_file_operations #define shmem_acct_size(flags, size) 0 #define shmem_unacct_size(flags, size) do {} while (0) static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir, umode_t mode, dev_t dev, unsigned long flags) { struct inode *inode = ramfs_get_inode(sb, dir, mode, dev); return inode ? inode : ERR_PTR(-ENOSPC); } #endif /* CONFIG_SHMEM */ /* common code */ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags, unsigned int i_flags) { struct inode *inode; struct file *res; if (IS_ERR(mnt)) return ERR_CAST(mnt); if (size < 0 || size > MAX_LFS_FILESIZE) return ERR_PTR(-EINVAL); if (shmem_acct_size(flags, size)) return ERR_PTR(-ENOMEM); if (is_idmapped_mnt(mnt)) return ERR_PTR(-EINVAL); inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL, S_IFREG | S_IRWXUGO, 0, flags); if (IS_ERR(inode)) { shmem_unacct_size(flags, size); return ERR_CAST(inode); } inode->i_flags |= i_flags; inode->i_size = size; clear_nlink(inode); /* It is unlinked */ res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size)); if (!IS_ERR(res)) res = alloc_file_pseudo(inode, mnt, name, O_RDWR, &shmem_file_operations); if (IS_ERR(res)) iput(inode); return res; } /** * shmem_kernel_file_setup - get an unlinked file living in tmpfs which must be * kernel internal. There will be NO LSM permission checks against the * underlying inode. So users of this interface must do LSM checks at a * higher layer. The users are the big_key and shm implementations. LSM * checks are provided at the key or shm level rather than the inode. * @name: name for dentry (to be seen in /proc/<pid>/maps * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags) { return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE); } EXPORT_SYMBOL_GPL(shmem_kernel_file_setup); /** * shmem_file_setup - get an unlinked file living in tmpfs * @name: name for dentry (to be seen in /proc/<pid>/maps * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags) { return __shmem_file_setup(shm_mnt, name, size, flags, 0); } EXPORT_SYMBOL_GPL(shmem_file_setup); /** * shmem_file_setup_with_mnt - get an unlinked file living in tmpfs * @mnt: the tmpfs mount where the file will be created * @name: name for dentry (to be seen in /proc/<pid>/maps * @size: size to be set for the file * @flags: VM_NORESERVE suppresses pre-accounting of the entire object size */ struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, const char *name, loff_t size, unsigned long flags) { return __shmem_file_setup(mnt, name, size, flags, 0); } EXPORT_SYMBOL_GPL(shmem_file_setup_with_mnt); /** * shmem_zero_setup - setup a shared anonymous mapping * @vma: the vma to be mmapped is prepared by do_mmap */ int shmem_zero_setup(struct vm_area_struct *vma) { struct file *file; loff_t size = vma->vm_end - vma->vm_start; /* * Cloning a new file under mmap_lock leads to a lock ordering conflict * between XFS directory reading and selinux: since this file is only * accessible to the user through its mapping, use S_PRIVATE flag to * bypass file security, in the same way as shmem_kernel_file_setup(). */ file = shmem_kernel_file_setup("dev/zero", size, vma->vm_flags); if (IS_ERR(file)) return PTR_ERR(file); if (vma->vm_file) fput(vma->vm_file); vma->vm_file = file; vma->vm_ops = &shmem_anon_vm_ops; return 0; } /** * shmem_read_folio_gfp - read into page cache, using specified page allocation flags. * @mapping: the folio's address_space * @index: the folio index * @gfp: the page allocator flags to use if allocating * * This behaves as a tmpfs "read_cache_page_gfp(mapping, index, gfp)", * with any new page allocations done using the specified allocation flags. * But read_cache_page_gfp() uses the ->read_folio() method: which does not * suit tmpfs, since it may have pages in swapcache, and needs to find those * for itself; although drivers/gpu/drm i915 and ttm rely upon this support. * * i915_gem_object_get_pages_gtt() mixes __GFP_NORETRY | __GFP_NOWARN in * with the mapping_gfp_mask(), to avoid OOMing the machine unnecessarily. */ struct folio *shmem_read_folio_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp) { #ifdef CONFIG_SHMEM struct inode *inode = mapping->host; struct folio *folio; int error; error = shmem_get_folio_gfp(inode, index, 0, &folio, SGP_CACHE, gfp, NULL, NULL); if (error) return ERR_PTR(error); folio_unlock(folio); return folio; #else /* * The tiny !SHMEM case uses ramfs without swap */ return mapping_read_folio_gfp(mapping, index, gfp); #endif } EXPORT_SYMBOL_GPL(shmem_read_folio_gfp); struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp) { struct folio *folio = shmem_read_folio_gfp(mapping, index, gfp); struct page *page; if (IS_ERR(folio)) return &folio->page; page = folio_file_page(folio, index); if (PageHWPoison(page)) { folio_put(folio); return ERR_PTR(-EIO); } return page; } EXPORT_SYMBOL_GPL(shmem_read_mapping_page_gfp);
2446 384 74 4105 4104 4095 2441 2128 3 3641 311 313 307 306 305 304 305 2015 2017 1586 449 2018 5 5 5 5 5 994 969 81 151 995 185 315 812 994 995 10 10 10 2 5 10 10 10 1067 1067 1066 1064 1064 213 213 211 213 1094 97 1 179 15 45 7 1123 344 1 946 80 1135 1 1138 1137 100 6 42 1296 1343 557 538 23 23 3 19 5 428 411 18 424 17 18 11 8 1 3 3 1 3 8 121 139 1066 6 1065 1061 1062 1065 1053 30 1065 1066 1065 1067 1067 1064 1195 1193 1194 1192 1193 1179 25 1196 1181 25 1192 29 12 1193 1194 1192 1 1 1 1194 1193 1194 1192 2 1193 1196 1196 2 1192 1 1196 1193 1193 1 1193 6 1 1193 1194 1157 253 1192 1195 1193 1193 1192 769 692 1195 1191 2 1196 1189 2 358 41 1170 1194 1193 1194 1195 1195 5 1194 1194 1183 25 1195 1 1193 1194 4 1193 2 3 1193 1 1 1 2 2 1 2 1 2 3 2 1 1 1 1 1 1 2 1 5 1171 132 250 1004 33 2 1195 1135 1193 1192 1196 1194 2 1195 1194 43 4 1148 83 81 1 1193 138 2 137 1 138 138 138 4 4 139 138 139 13 13 126 126 1 71 1 2 72 124 138 138 138 138 139 139 5 5 306 5 316 316 5 306 5 5 310 285 284 285 283 1 282 1 1 281 1 280 1 280 1 265 14 2 10 10 2 6 6 6 6 6 6 6 6 6 6 140 28 26 2 65 2 132 6 1 3 2 3 3 285 11 17 256 274 273 255 8 12 243 5 21 262 2 246 3 14 210 51 256 6 262 257 259 106 148 201 2 4 193 196 1 3 197 1 196 2 197 1 197 1 191 8 190 171 10 8 4 6 6 172 7 170 3 161 10 5 171 6 153 1 1 10 2 1 7 4 141 127 63 142 63 63 1 3 59 4 55 55 55 51 1 1 6 52 55 3 3 3 3 1 102 117 1 2 4 109 1 103 4 2 39 103 39 1 3 109 1 1 232 232 1 2 2 1 236 60 2 2 1 10 263 1 5 256 9 5 6 252 250 1 249 251 250 256 1 11 12 12 11 1 7 1 5 2 4 3 3 2 7 11 1 6 3 1 2 1 1 10 58 58 29 6 51 6 6 6 36 36 30 36 35 36 29 25 1056 1060 6 1058 159 929 34 1038 911 907 16 7 2 16 2 9 9 10 1 7 2 9 5 26 1 26 1 1 10 2 2 22 19 4 1 10 15 3 1 2 12 6 6 2 1 2 2 1 1 5 7 2 2 5 1 7 1 7 9 9 7 7 6 1 3 10 10 1 9 9 9 8 6 6 7 9 7 1 4 4 1 1 1 1 8 8 1 12 3 8 5 1 8 12 12 4 12 12 12 1 12 11 1 12 12 41 1 5 18 23 19 1 19 1 2 21 14 23 23 2 21 22 1 2 2 18 1 1 12 10 7 7 1 6 31 18 18 18 1 1 1 18 2 18 1 18 3 18 18 18 18 7 7 31 31 3 27 4 11 31 16 1 14 31 14 8 5 9 31 1 18 31 1 24 24 24 1 9 24 24 16 16 9 12 2 6 4 7 4 1 5 7 5 7 7 7 7 7 35 4 31 3 47 19 19 2 3 2 1 12 24 23 1 3 19 19 19 8 8 1 1 2 2 1 10 10 10 1 10 6 1 1 2 2 13 10 1 1 1 8 1 1 2 1 1 1 1 1 2 10 1 5 1 1 1 1 5 1 1 1 1 1 16 7 9 10 1 1 1 1 402 384 17 245 1 150 7 62 310 2015 2 1 2015 2012 1720 312 30 253 58 311 250 310 1718 283 1711 1 753 968 6 3 2042 1093 1085 416 75 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Routing netlink socket interface: protocol independent part. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Fixes: * Vitaly E. Lavrov RTA_OK arithmetic was wrong. */ #include <linux/bitops.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/capability.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/security.h> #include <linux/mutex.h> #include <linux/if_addr.h> #include <linux/if_bridge.h> #include <linux/if_vlan.h> #include <linux/pci.h> #include <linux/etherdevice.h> #include <linux/bpf.h> #include <linux/uaccess.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/protocol.h> #include <net/arp.h> #include <net/route.h> #include <net/udp.h> #include <net/tcp.h> #include <net/sock.h> #include <net/pkt_sched.h> #include <net/fib_rules.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/devlink.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/addrconf.h> #endif #include <linux/dpll.h> #include "dev.h" #define RTNL_MAX_TYPE 50 #define RTNL_SLAVE_MAX_TYPE 44 struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; struct module *owner; unsigned int flags; struct rcu_head rcu; }; static DEFINE_MUTEX(rtnl_mutex); void rtnl_lock(void) { mutex_lock(&rtnl_mutex); } EXPORT_SYMBOL(rtnl_lock); int rtnl_lock_killable(void) { return mutex_lock_killable(&rtnl_mutex); } static struct sk_buff *defer_kfree_skb_list; void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail) { if (head && tail) { tail->next = defer_kfree_skb_list; defer_kfree_skb_list = head; } } EXPORT_SYMBOL(rtnl_kfree_skbs); void __rtnl_unlock(void) { struct sk_buff *head = defer_kfree_skb_list; defer_kfree_skb_list = NULL; /* Ensure that we didn't actually add any TODO item when __rtnl_unlock() * is used. In some places, e.g. in cfg80211, we have code that will do * something like * rtnl_lock() * wiphy_lock() * ... * rtnl_unlock() * * and because netdev_run_todo() acquires the RTNL for items on the list * we could cause a situation such as this: * Thread 1 Thread 2 * rtnl_lock() * unregister_netdevice() * __rtnl_unlock() * rtnl_lock() * wiphy_lock() * rtnl_unlock() * netdev_run_todo() * __rtnl_unlock() * * // list not empty now * // because of thread 2 * rtnl_lock() * while (!list_empty(...)) * rtnl_lock() * wiphy_lock() * **** DEADLOCK **** * * However, usage of __rtnl_unlock() is rare, and so we can ensure that * it's not used in cases where something is added to do the list. */ WARN_ON(!list_empty(&net_todo_list)); mutex_unlock(&rtnl_mutex); while (head) { struct sk_buff *next = head->next; kfree_skb(head); cond_resched(); head = next; } } void rtnl_unlock(void) { /* This fellow will unlock it for us. */ netdev_run_todo(); } EXPORT_SYMBOL(rtnl_unlock); int rtnl_trylock(void) { return mutex_trylock(&rtnl_mutex); } EXPORT_SYMBOL(rtnl_trylock); int rtnl_is_locked(void) { return mutex_is_locked(&rtnl_mutex); } EXPORT_SYMBOL(rtnl_is_locked); bool refcount_dec_and_rtnl_lock(refcount_t *r) { return refcount_dec_and_mutex_lock(r, &rtnl_mutex); } EXPORT_SYMBOL(refcount_dec_and_rtnl_lock); #ifdef CONFIG_PROVE_LOCKING bool lockdep_rtnl_is_held(void) { return lockdep_is_held(&rtnl_mutex); } EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ #ifdef CONFIG_DEBUG_NET_SMALL_RTNL void __rtnl_net_lock(struct net *net) { ASSERT_RTNL(); mutex_lock(&net->rtnl_mutex); } EXPORT_SYMBOL(__rtnl_net_lock); void __rtnl_net_unlock(struct net *net) { ASSERT_RTNL(); mutex_unlock(&net->rtnl_mutex); } EXPORT_SYMBOL(__rtnl_net_unlock); void rtnl_net_lock(struct net *net) { rtnl_lock(); __rtnl_net_lock(net); } EXPORT_SYMBOL(rtnl_net_lock); void rtnl_net_unlock(struct net *net) { __rtnl_net_unlock(net); rtnl_unlock(); } EXPORT_SYMBOL(rtnl_net_unlock); int rtnl_net_trylock(struct net *net) { int ret = rtnl_trylock(); if (ret) __rtnl_net_lock(net); return ret; } EXPORT_SYMBOL(rtnl_net_trylock); int rtnl_net_lock_killable(struct net *net) { int ret = rtnl_lock_killable(); if (!ret) __rtnl_net_lock(net); return ret; } static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) { if (net_eq(net_a, net_b)) return 0; /* always init_net first */ if (net_eq(net_a, &init_net)) return -1; if (net_eq(net_b, &init_net)) return 1; /* otherwise lock in ascending order */ return net_a < net_b ? -1 : 1; } int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b) { const struct net *net_a, *net_b; net_a = container_of(a, struct net, rtnl_mutex.dep_map); net_b = container_of(b, struct net, rtnl_mutex.dep_map); return rtnl_net_cmp_locks(net_a, net_b); } bool rtnl_net_is_locked(struct net *net) { return rtnl_is_locked() && mutex_is_locked(&net->rtnl_mutex); } EXPORT_SYMBOL(rtnl_net_is_locked); bool lockdep_rtnl_net_is_held(struct net *net) { return lockdep_rtnl_is_held() && lockdep_is_held(&net->rtnl_mutex); } EXPORT_SYMBOL(lockdep_rtnl_net_is_held); #else static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b) { /* No need to swap */ return -1; } #endif struct rtnl_nets { /* ->newlink() needs to freeze 3 netns at most; * 2 for the new device, 1 for its peer. */ struct net *net[3]; unsigned char len; }; static void rtnl_nets_init(struct rtnl_nets *rtnl_nets) { memset(rtnl_nets, 0, sizeof(*rtnl_nets)); } static void rtnl_nets_destroy(struct rtnl_nets *rtnl_nets) { int i; for (i = 0; i < rtnl_nets->len; i++) { put_net(rtnl_nets->net[i]); rtnl_nets->net[i] = NULL; } rtnl_nets->len = 0; } /** * rtnl_nets_add - Add netns to be locked before ->newlink(). * * @rtnl_nets: rtnl_nets pointer passed to ->get_peer_net(). * @net: netns pointer with an extra refcnt held. * * The extra refcnt is released in rtnl_nets_destroy(). */ static void rtnl_nets_add(struct rtnl_nets *rtnl_nets, struct net *net) { int i; DEBUG_NET_WARN_ON_ONCE(rtnl_nets->len == ARRAY_SIZE(rtnl_nets->net)); for (i = 0; i < rtnl_nets->len; i++) { switch (rtnl_net_cmp_locks(rtnl_nets->net[i], net)) { case 0: put_net(net); return; case 1: swap(rtnl_nets->net[i], net); } } rtnl_nets->net[i] = net; rtnl_nets->len++; } static void rtnl_nets_lock(struct rtnl_nets *rtnl_nets) { int i; rtnl_lock(); for (i = 0; i < rtnl_nets->len; i++) __rtnl_net_lock(rtnl_nets->net[i]); } static void rtnl_nets_unlock(struct rtnl_nets *rtnl_nets) { int i; for (i = 0; i < rtnl_nets->len; i++) __rtnl_net_unlock(rtnl_nets->net[i]); rtnl_unlock(); } static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { int msgindex = msgtype - RTM_BASE; /* * msgindex < 0 implies someone tried to register a netlink * control code. msgindex >= RTM_NR_MSGTYPES may indicate that * the message type has not been added to linux/rtnetlink.h */ BUG_ON(msgindex < 0 || msgindex >= RTM_NR_MSGTYPES); return msgindex; } static struct rtnl_link *rtnl_get_link(int protocol, int msgtype) { struct rtnl_link __rcu **tab; if (protocol >= ARRAY_SIZE(rtnl_msg_handlers)) protocol = PF_UNSPEC; tab = rcu_dereference_rtnl(rtnl_msg_handlers[protocol]); if (!tab) tab = rcu_dereference_rtnl(rtnl_msg_handlers[PF_UNSPEC]); return rcu_dereference_rtnl(tab[msgtype]); } static int rtnl_register_internal(struct module *owner, int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, unsigned int flags) { struct rtnl_link *link, *old; struct rtnl_link __rcu **tab; int msgindex; int ret = -ENOBUFS; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); rtnl_lock(); tab = rtnl_dereference(rtnl_msg_handlers[protocol]); if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(void *), GFP_KERNEL); if (!tab) goto unlock; /* ensures we see the 0 stores */ rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); } old = rtnl_dereference(tab[msgindex]); if (old) { link = kmemdup(old, sizeof(*old), GFP_KERNEL); if (!link) goto unlock; } else { link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto unlock; } WARN_ON(link->owner && link->owner != owner); link->owner = owner; WARN_ON(doit && link->doit && link->doit != doit); if (doit) link->doit = doit; WARN_ON(dumpit && link->dumpit && link->dumpit != dumpit); if (dumpit) link->dumpit = dumpit; WARN_ON(rtnl_msgtype_kind(msgtype) != RTNL_KIND_DEL && (flags & RTNL_FLAG_BULK_DEL_SUPPORTED)); link->flags |= flags; /* publish protocol:msgtype */ rcu_assign_pointer(tab[msgindex], link); ret = 0; if (old) kfree_rcu(old, rcu); unlock: rtnl_unlock(); return ret; } /** * rtnl_unregister - Unregister a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * * Returns 0 on success or a negative error code. */ static int rtnl_unregister(int protocol, int msgtype) { struct rtnl_link __rcu **tab; struct rtnl_link *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); rtnl_lock(); tab = rtnl_dereference(rtnl_msg_handlers[protocol]); if (!tab) { rtnl_unlock(); return -ENOENT; } link = rcu_replace_pointer_rtnl(tab[msgindex], NULL); rtnl_unlock(); kfree_rcu(link, rcu); return 0; } /** * rtnl_unregister_all - Unregister all rtnetlink message type of a protocol * @protocol : Protocol family or PF_UNSPEC * * Identical to calling rtnl_unregster() for all registered message types * of a certain protocol family. */ void rtnl_unregister_all(int protocol) { struct rtnl_link __rcu **tab; struct rtnl_link *link; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); rtnl_lock(); tab = rcu_replace_pointer_rtnl(rtnl_msg_handlers[protocol], NULL); if (!tab) { rtnl_unlock(); return; } for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) { link = rcu_replace_pointer_rtnl(tab[msgindex], NULL); kfree_rcu(link, rcu); } rtnl_unlock(); synchronize_net(); kfree(tab); } EXPORT_SYMBOL_GPL(rtnl_unregister_all); /** * __rtnl_register_many - Register rtnetlink message types * @handlers: Array of struct rtnl_msg_handlers * @n: The length of @handlers * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the * specified protocol family and message type is received. * * The special protocol family PF_UNSPEC may be used to define fallback * function pointers for the case when no entry for the specific protocol * family exists. * * When one element of @handlers fails to register, * 1) built-in: panics. * 2) modules : the previous successful registrations are unwinded * and an error is returned. * * Use rtnl_register_many(). */ int __rtnl_register_many(const struct rtnl_msg_handler *handlers, int n) { const struct rtnl_msg_handler *handler; int i, err; for (i = 0, handler = handlers; i < n; i++, handler++) { err = rtnl_register_internal(handler->owner, handler->protocol, handler->msgtype, handler->doit, handler->dumpit, handler->flags); if (err) { if (!handler->owner) panic("Unable to register rtnetlink message " "handlers, %pS\n", handlers); __rtnl_unregister_many(handlers, i); break; } } return err; } EXPORT_SYMBOL_GPL(__rtnl_register_many); void __rtnl_unregister_many(const struct rtnl_msg_handler *handlers, int n) { const struct rtnl_msg_handler *handler; int i; for (i = n - 1, handler = handlers + n - 1; i >= 0; i--, handler--) rtnl_unregister(handler->protocol, handler->msgtype); } EXPORT_SYMBOL_GPL(__rtnl_unregister_many); static DEFINE_MUTEX(link_ops_mutex); static LIST_HEAD(link_ops); static struct rtnl_link_ops *rtnl_link_ops_get(const char *kind, int *srcu_index) { struct rtnl_link_ops *ops; rcu_read_lock(); list_for_each_entry_rcu(ops, &link_ops, list) { if (!strcmp(ops->kind, kind)) { *srcu_index = srcu_read_lock(&ops->srcu); goto unlock; } } ops = NULL; unlock: rcu_read_unlock(); return ops; } static void rtnl_link_ops_put(struct rtnl_link_ops *ops, int srcu_index) { srcu_read_unlock(&ops->srcu, srcu_index); } /** * rtnl_link_register - Register rtnl_link_ops with rtnetlink. * @ops: struct rtnl_link_ops * to register * * Returns 0 on success or a negative error code. */ int rtnl_link_register(struct rtnl_link_ops *ops) { struct rtnl_link_ops *tmp; int err; /* Sanity-check max sizes to avoid stack buffer overflow. */ if (WARN_ON(ops->maxtype > RTNL_MAX_TYPE || ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)) return -EINVAL; /* The check for alloc/setup is here because if ops * does not have that filled up, it is not possible * to use the ops for creating device. So do not * fill up dellink as well. That disables rtnl_dellink. */ if ((ops->alloc || ops->setup) && !ops->dellink) ops->dellink = unregister_netdevice_queue; err = init_srcu_struct(&ops->srcu); if (err) return err; mutex_lock(&link_ops_mutex); list_for_each_entry(tmp, &link_ops, list) { if (!strcmp(ops->kind, tmp->kind)) { err = -EEXIST; goto unlock; } } list_add_tail_rcu(&ops->list, &link_ops); unlock: mutex_unlock(&link_ops_mutex); return err; } EXPORT_SYMBOL_GPL(rtnl_link_register); static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) { struct net_device *dev; LIST_HEAD(list_kill); for_each_netdev(net, dev) { if (dev->rtnl_link_ops == ops) ops->dellink(dev, &list_kill); } unregister_netdevice_many(&list_kill); } /* Return with the rtnl_lock held when there are no network * devices unregistering in any network namespace. */ static void rtnl_lock_unregistering_all(void) { DEFINE_WAIT_FUNC(wait, woken_wake_function); add_wait_queue(&netdev_unregistering_wq, &wait); for (;;) { rtnl_lock(); /* We held write locked pernet_ops_rwsem, and parallel * setup_net() and cleanup_net() are not possible. */ if (!atomic_read(&dev_unreg_count)) break; __rtnl_unlock(); wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); } remove_wait_queue(&netdev_unregistering_wq, &wait); } /** * rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister */ void rtnl_link_unregister(struct rtnl_link_ops *ops) { struct net *net; mutex_lock(&link_ops_mutex); list_del_rcu(&ops->list); mutex_unlock(&link_ops_mutex); synchronize_srcu(&ops->srcu); cleanup_srcu_struct(&ops->srcu); /* Close the race with setup_net() and cleanup_net() */ down_write(&pernet_ops_rwsem); rtnl_lock_unregistering_all(); for_each_net(net) __rtnl_kill_links(net, ops); rtnl_unlock(); up_write(&pernet_ops_rwsem); } EXPORT_SYMBOL_GPL(rtnl_link_unregister); static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) { struct net_device *master_dev; const struct rtnl_link_ops *ops; size_t size = 0; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (!master_dev) goto out; ops = master_dev->rtnl_link_ops; if (!ops || !ops->get_slave_size) goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ size = nla_total_size(sizeof(struct nlattr)) + ops->get_slave_size(master_dev, dev); out: rcu_read_unlock(); return size; } static size_t rtnl_link_get_size(const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; size_t size; if (!ops) return 0; size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */ nla_total_size(strlen(ops->kind) + 1); /* IFLA_INFO_KIND */ if (ops->get_size) /* IFLA_INFO_DATA + nested data */ size += nla_total_size(sizeof(struct nlattr)) + ops->get_size(dev); if (ops->get_xstats_size) /* IFLA_INFO_XSTATS */ size += nla_total_size(ops->get_xstats_size(dev)); size += rtnl_link_get_slave_info_data_size(dev); return size; } static LIST_HEAD(rtnl_af_ops); static struct rtnl_af_ops *rtnl_af_lookup(const int family, int *srcu_index) { struct rtnl_af_ops *ops; ASSERT_RTNL(); rcu_read_lock(); list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { if (ops->family == family) { *srcu_index = srcu_read_lock(&ops->srcu); goto unlock; } } ops = NULL; unlock: rcu_read_unlock(); return ops; } static void rtnl_af_put(struct rtnl_af_ops *ops, int srcu_index) { srcu_read_unlock(&ops->srcu, srcu_index); } /** * rtnl_af_register - Register rtnl_af_ops with rtnetlink. * @ops: struct rtnl_af_ops * to register * * Return: 0 on success or a negative error code. */ int rtnl_af_register(struct rtnl_af_ops *ops) { int err = init_srcu_struct(&ops->srcu); if (err) return err; rtnl_lock(); list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); return 0; } EXPORT_SYMBOL_GPL(rtnl_af_register); /** * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. * @ops: struct rtnl_af_ops * to unregister */ void rtnl_af_unregister(struct rtnl_af_ops *ops) { rtnl_lock(); list_del_rcu(&ops->list); rtnl_unlock(); synchronize_rcu(); synchronize_srcu(&ops->srcu); cleanup_srcu_struct(&ops->srcu); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); static size_t rtnl_link_get_af_size(const struct net_device *dev, u32 ext_filter_mask) { struct rtnl_af_ops *af_ops; size_t size; /* IFLA_AF_SPEC */ size = nla_total_size(sizeof(struct nlattr)); rcu_read_lock(); list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + af_ops->get_link_af_size(dev, ext_filter_mask); } } rcu_read_unlock(); return size; } static bool rtnl_have_link_slave_info(const struct net_device *dev) { struct net_device *master_dev; bool ret = false; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (master_dev && master_dev->rtnl_link_ops) ret = true; rcu_read_unlock(); return ret; } static int rtnl_link_slave_info_fill(struct sk_buff *skb, const struct net_device *dev) { struct net_device *master_dev; const struct rtnl_link_ops *ops; struct nlattr *slave_data; int err; master_dev = netdev_master_upper_dev_get((struct net_device *) dev); if (!master_dev) return 0; ops = master_dev->rtnl_link_ops; if (!ops) return 0; if (nla_put_string(skb, IFLA_INFO_SLAVE_KIND, ops->kind) < 0) return -EMSGSIZE; if (ops->fill_slave_info) { slave_data = nla_nest_start_noflag(skb, IFLA_INFO_SLAVE_DATA); if (!slave_data) return -EMSGSIZE; err = ops->fill_slave_info(skb, master_dev, dev); if (err < 0) goto err_cancel_slave_data; nla_nest_end(skb, slave_data); } return 0; err_cancel_slave_data: nla_nest_cancel(skb, slave_data); return err; } static int rtnl_link_info_fill(struct sk_buff *skb, const struct net_device *dev) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; struct nlattr *data; int err; if (!ops) return 0; if (nla_put_string(skb, IFLA_INFO_KIND, ops->kind) < 0) return -EMSGSIZE; if (ops->fill_xstats) { err = ops->fill_xstats(skb, dev); if (err < 0) return err; } if (ops->fill_info) { data = nla_nest_start_noflag(skb, IFLA_INFO_DATA); if (data == NULL) return -EMSGSIZE; err = ops->fill_info(skb, dev); if (err < 0) goto err_cancel_data; nla_nest_end(skb, data); } return 0; err_cancel_data: nla_nest_cancel(skb, data); return err; } static int rtnl_link_fill(struct sk_buff *skb, const struct net_device *dev) { struct nlattr *linkinfo; int err = -EMSGSIZE; linkinfo = nla_nest_start_noflag(skb, IFLA_LINKINFO); if (linkinfo == NULL) goto out; err = rtnl_link_info_fill(skb, dev); if (err < 0) goto err_cancel_link; err = rtnl_link_slave_info_fill(skb, dev); if (err < 0) goto err_cancel_link; nla_nest_end(skb, linkinfo); return 0; err_cancel_link: nla_nest_cancel(skb, linkinfo); out: return err; } int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, unsigned int group, int echo) { struct sock *rtnl = net->rtnl; return nlmsg_notify(rtnl, skb, pid, group, echo, GFP_KERNEL); } int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) { struct sock *rtnl = net->rtnl; return nlmsg_unicast(rtnl, skb, pid); } EXPORT_SYMBOL(rtnl_unicast); void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, const struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; nlmsg_notify(rtnl, skb, pid, group, nlmsg_report(nlh), flags); } EXPORT_SYMBOL(rtnl_notify); void rtnl_set_sk_err(struct net *net, u32 group, int error) { struct sock *rtnl = net->rtnl; netlink_set_err(rtnl, 0, group, error); } EXPORT_SYMBOL(rtnl_set_sk_err); int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { struct nlattr *mx; int i, valid = 0; /* nothing is dumped for dst_default_metrics, so just skip the loop */ if (metrics == dst_default_metrics.metrics) return 0; mx = nla_nest_start_noflag(skb, RTA_METRICS); if (mx == NULL) return -ENOBUFS; for (i = 0; i < RTAX_MAX; i++) { if (metrics[i]) { if (i == RTAX_CC_ALGO - 1) { char tmp[TCP_CA_NAME_MAX], *name; name = tcp_ca_get_name_by_key(metrics[i], tmp); if (!name) continue; if (nla_put_string(skb, i + 1, name)) goto nla_put_failure; } else if (i == RTAX_FEATURES - 1) { u32 user_features = metrics[i] & RTAX_FEATURE_MASK; if (!user_features) continue; BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK); if (nla_put_u32(skb, i + 1, user_features)) goto nla_put_failure; } else { if (nla_put_u32(skb, i + 1, metrics[i])) goto nla_put_failure; } valid++; } } if (!valid) { nla_nest_cancel(skb, mx); return 0; } return nla_nest_end(skb, mx); nla_put_failure: nla_nest_cancel(skb, mx); return -EMSGSIZE; } EXPORT_SYMBOL(rtnetlink_put_metrics); int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { .rta_error = error, .rta_id = id, }; if (dst) { ci.rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse); ci.rta_used = dst->__use; ci.rta_clntref = rcuref_read(&dst->__rcuref); } if (expires) { unsigned long clock; clock = jiffies_to_clock_t(abs(expires)); clock = min_t(unsigned long, clock, INT_MAX); ci.rta_expires = (expires > 0) ? clock : -clock; } return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); void netdev_set_operstate(struct net_device *dev, int newstate) { unsigned int old = READ_ONCE(dev->operstate); do { if (old == newstate) return; } while (!try_cmpxchg(&dev->operstate, &old, newstate)); netdev_state_change(dev); } EXPORT_SYMBOL(netdev_set_operstate); static void set_operstate(struct net_device *dev, unsigned char transition) { unsigned char operstate = READ_ONCE(dev->operstate); switch (transition) { case IF_OPER_UP: if ((operstate == IF_OPER_DORMANT || operstate == IF_OPER_TESTING || operstate == IF_OPER_UNKNOWN) && !netif_dormant(dev) && !netif_testing(dev)) operstate = IF_OPER_UP; break; case IF_OPER_TESTING: if (netif_oper_up(dev)) operstate = IF_OPER_TESTING; break; case IF_OPER_DORMANT: if (netif_oper_up(dev)) operstate = IF_OPER_DORMANT; break; } netdev_set_operstate(dev, operstate); } static unsigned int rtnl_dev_get_flags(const struct net_device *dev) { return (dev->flags & ~(IFF_PROMISC | IFF_ALLMULTI)) | (dev->gflags & (IFF_PROMISC | IFF_ALLMULTI)); } static unsigned int rtnl_dev_combine_flags(const struct net_device *dev, const struct ifinfomsg *ifm) { unsigned int flags = ifm->ifi_flags; /* bugwards compatibility: ifi_change == 0 is treated as ~0 */ if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | (rtnl_dev_get_flags(dev) & ~ifm->ifi_change); return flags; } static void copy_rtnl_link_stats(struct rtnl_link_stats *a, const struct rtnl_link_stats64 *b) { a->rx_packets = b->rx_packets; a->tx_packets = b->tx_packets; a->rx_bytes = b->rx_bytes; a->tx_bytes = b->tx_bytes; a->rx_errors = b->rx_errors; a->tx_errors = b->tx_errors; a->rx_dropped = b->rx_dropped; a->tx_dropped = b->tx_dropped; a->multicast = b->multicast; a->collisions = b->collisions; a->rx_length_errors = b->rx_length_errors; a->rx_over_errors = b->rx_over_errors; a->rx_crc_errors = b->rx_crc_errors; a->rx_frame_errors = b->rx_frame_errors; a->rx_fifo_errors = b->rx_fifo_errors; a->rx_missed_errors = b->rx_missed_errors; a->tx_aborted_errors = b->tx_aborted_errors; a->tx_carrier_errors = b->tx_carrier_errors; a->tx_fifo_errors = b->tx_fifo_errors; a->tx_heartbeat_errors = b->tx_heartbeat_errors; a->tx_window_errors = b->tx_window_errors; a->rx_compressed = b->rx_compressed; a->tx_compressed = b->tx_compressed; a->rx_nohandler = b->rx_nohandler; } /* All VF info */ static inline int rtnl_vfinfo_size(const struct net_device *dev, u32 ext_filter_mask) { if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF)) { int num_vfs = dev_num_vf(dev->dev.parent); size_t size = nla_total_size(0); size += num_vfs * (nla_total_size(0) + nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_broadcast)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + nla_total_size(0) + /* nest IFLA_VF_VLAN_LIST */ nla_total_size(MAX_VLAN_LIST_LEN * sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + nla_total_size(sizeof(struct ifla_vf_rss_query_en)) + nla_total_size(sizeof(struct ifla_vf_trust))); if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { size += num_vfs * (nla_total_size(0) + /* nest IFLA_VF_STATS */ /* IFLA_VF_STATS_RX_PACKETS */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_PACKETS */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_RX_BYTES */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_BYTES */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_BROADCAST */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_MULTICAST */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_RX_DROPPED */ nla_total_size_64bit(sizeof(__u64)) + /* IFLA_VF_STATS_TX_DROPPED */ nla_total_size_64bit(sizeof(__u64))); } return size; } else return 0; } static size_t rtnl_port_size(const struct net_device *dev, u32 ext_filter_mask) { size_t port_size = nla_total_size(4) /* PORT_VF */ + nla_total_size(PORT_PROFILE_MAX) /* PORT_PROFILE */ + nla_total_size(PORT_UUID_MAX) /* PORT_INSTANCE_UUID */ + nla_total_size(PORT_UUID_MAX) /* PORT_HOST_UUID */ + nla_total_size(1) /* PROT_VDP_REQUEST */ + nla_total_size(2); /* PORT_VDP_RESPONSE */ size_t vf_ports_size = nla_total_size(sizeof(struct nlattr)); size_t vf_port_size = nla_total_size(sizeof(struct nlattr)) + port_size; size_t port_self_size = nla_total_size(sizeof(struct nlattr)) + port_size; if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; if (dev_num_vf(dev->dev.parent)) return port_self_size + vf_ports_size + vf_port_size * dev_num_vf(dev->dev.parent); else return port_self_size; } static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ nla_total_size(1) + /* XDP_ATTACHED */ nla_total_size(4) + /* XDP_PROG_ID (or 1st mode) */ nla_total_size(4); /* XDP_<mode>_PROG_ID */ return xdp_size; } static size_t rtnl_prop_list_size(const struct net_device *dev) { struct netdev_name_node *name_node; unsigned int cnt = 0; rcu_read_lock(); list_for_each_entry_rcu(name_node, &dev->name_node->list, list) cnt++; rcu_read_unlock(); if (!cnt) return 0; return nla_total_size(0) + cnt * nla_total_size(ALTIFNAMSIZ); } static size_t rtnl_proto_down_size(const struct net_device *dev) { size_t size = nla_total_size(1); /* Assume dev->proto_down_reason is not zero. */ size += nla_total_size(0) + nla_total_size(4); return size; } static size_t rtnl_devlink_port_size(const struct net_device *dev) { size_t size = nla_total_size(0); /* nest IFLA_DEVLINK_PORT */ if (dev->devlink_port) size += devlink_nl_port_handle_size(dev->devlink_port); return size; } static size_t rtnl_dpll_pin_size(const struct net_device *dev) { size_t size = nla_total_size(0); /* nest IFLA_DPLL_PIN */ size += dpll_netdev_pin_handle_size(dev); return size; } static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size_64bit(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) + nla_total_size_64bit(sizeof(struct rtnl_link_stats64)) + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */ + nla_total_size(4) /* IFLA_TXQLEN */ + nla_total_size(4) /* IFLA_WEIGHT */ + nla_total_size(4) /* IFLA_MTU */ + nla_total_size(4) /* IFLA_LINK */ + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + nla_total_size(4) /* IFLA_ALLMULTI */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ + nla_total_size(4) /* IFLA_GSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_MAX_SIZE */ + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */ + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */ + nla_total_size(1) /* IFLA_OPERSTATE */ + nla_total_size(1) /* IFLA_LINKMODE */ + nla_total_size(4) /* IFLA_CARRIER_CHANGES */ + nla_total_size(4) /* IFLA_LINK_NETNSID */ + nla_total_size(4) /* IFLA_GROUP */ + nla_total_size(ext_filter_mask & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */ + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */ + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */ + rtnl_link_get_size(dev) /* IFLA_LINKINFO */ + rtnl_link_get_af_size(dev, ext_filter_mask) /* IFLA_AF_SPEC */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(4) /* IFLA_NEW_IFINDEX */ + rtnl_proto_down_size(dev) /* proto down */ + nla_total_size(4) /* IFLA_TARGET_NETNSID */ + nla_total_size(4) /* IFLA_CARRIER_UP_COUNT */ + nla_total_size(4) /* IFLA_CARRIER_DOWN_COUNT */ + nla_total_size(4) /* IFLA_MIN_MTU */ + nla_total_size(4) /* IFLA_MAX_MTU */ + rtnl_prop_list_size(dev) + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */ + rtnl_devlink_port_size(dev) + rtnl_dpll_pin_size(dev) + nla_total_size(8) /* IFLA_MAX_PACING_OFFLOAD_HORIZON */ + 0; } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *vf_ports; struct nlattr *vf_port; int vf; int err; vf_ports = nla_nest_start_noflag(skb, IFLA_VF_PORTS); if (!vf_ports) return -EMSGSIZE; for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) { vf_port = nla_nest_start_noflag(skb, IFLA_VF_PORT); if (!vf_port) goto nla_put_failure; if (nla_put_u32(skb, IFLA_PORT_VF, vf)) goto nla_put_failure; err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb); if (err == -EMSGSIZE) goto nla_put_failure; if (err) { nla_nest_cancel(skb, vf_port); continue; } nla_nest_end(skb, vf_port); } nla_nest_end(skb, vf_ports); return 0; nla_put_failure: nla_nest_cancel(skb, vf_ports); return -EMSGSIZE; } static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *port_self; int err; port_self = nla_nest_start_noflag(skb, IFLA_PORT_SELF); if (!port_self) return -EMSGSIZE; err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb); if (err) { nla_nest_cancel(skb, port_self); return (err == -EMSGSIZE) ? err : 0; } nla_nest_end(skb, port_self); return 0; } static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev, u32 ext_filter_mask) { int err; if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent || !(ext_filter_mask & RTEXT_FILTER_VF)) return 0; err = rtnl_port_self_fill(skb, dev); if (err) return err; if (dev_num_vf(dev->dev.parent)) { err = rtnl_vf_ports_fill(skb, dev); if (err) return err; } return 0; } static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev) { int err; struct netdev_phys_item_id ppid; err = dev_get_phys_port_id(dev, &ppid); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } if (nla_put(skb, IFLA_PHYS_PORT_ID, ppid.id_len, ppid.id)) return -EMSGSIZE; return 0; } static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev) { char name[IFNAMSIZ]; int err; err = dev_get_phys_port_name(dev, name, sizeof(name)); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } if (nla_put_string(skb, IFLA_PHYS_PORT_NAME, name)) return -EMSGSIZE; return 0; } static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev) { struct netdev_phys_item_id ppid = { }; int err; err = dev_get_port_parent_id(dev, &ppid, false); if (err) { if (err == -EOPNOTSUPP) return 0; return err; } if (nla_put(skb, IFLA_PHYS_SWITCH_ID, ppid.id_len, ppid.id)) return -EMSGSIZE; return 0; } static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb, struct net_device *dev) { struct rtnl_link_stats64 *sp; struct nlattr *attr; attr = nla_reserve_64bit(skb, IFLA_STATS64, sizeof(struct rtnl_link_stats64), IFLA_PAD); if (!attr) return -EMSGSIZE; sp = nla_data(attr); dev_get_stats(dev, sp); attr = nla_reserve(skb, IFLA_STATS, sizeof(struct rtnl_link_stats)); if (!attr) return -EMSGSIZE; copy_rtnl_link_stats(nla_data(attr), sp); return 0; } static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct net_device *dev, int vfs_num, u32 ext_filter_mask) { struct ifla_vf_rss_query_en vf_rss_query_en; struct nlattr *vf, *vfstats, *vfvlanlist; struct ifla_vf_link_state vf_linkstate; struct ifla_vf_vlan_info vf_vlan_info; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_stats vf_stats; struct ifla_vf_trust vf_trust; struct ifla_vf_vlan vf_vlan; struct ifla_vf_rate vf_rate; struct ifla_vf_mac vf_mac; struct ifla_vf_broadcast vf_broadcast; struct ifla_vf_info ivi; struct ifla_vf_guid node_guid; struct ifla_vf_guid port_guid; memset(&ivi, 0, sizeof(ivi)); /* Not all SR-IOV capable drivers support the * spoofcheck and "RSS query enable" query. Preset to * -1 so the user space tool can detect that the driver * didn't report anything. */ ivi.spoofchk = -1; ivi.rss_query_en = -1; ivi.trusted = -1; /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero */ ivi.linkstate = 0; /* VLAN Protocol by default is 802.1Q */ ivi.vlan_proto = htons(ETH_P_8021Q); if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; memset(&vf_vlan_info, 0, sizeof(vf_vlan_info)); memset(&node_guid, 0, sizeof(node_guid)); memset(&port_guid, 0, sizeof(port_guid)); vf_mac.vf = vf_vlan.vf = vf_vlan_info.vf = vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = vf_linkstate.vf = vf_rss_query_en.vf = vf_trust.vf = node_guid.vf = port_guid.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); memcpy(vf_broadcast.broadcast, dev->broadcast, dev->addr_len); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_vlan_info.vlan = ivi.vlan; vf_vlan_info.qos = ivi.qos; vf_vlan_info.vlan_proto = ivi.vlan_proto; vf_tx_rate.rate = ivi.max_tx_rate; vf_rate.min_tx_rate = ivi.min_tx_rate; vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; vf = nla_nest_start_noflag(skb, IFLA_VF_INFO); if (!vf) return -EMSGSIZE; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_BROADCAST, sizeof(vf_broadcast), &vf_broadcast) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), &vf_rate) || nla_put(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate) || nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), &vf_spoofchk) || nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), &vf_linkstate) || nla_put(skb, IFLA_VF_RSS_QUERY_EN, sizeof(vf_rss_query_en), &vf_rss_query_en) || nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) goto nla_put_vf_failure; if (dev->netdev_ops->ndo_get_vf_guid && !dev->netdev_ops->ndo_get_vf_guid(dev, vfs_num, &node_guid, &port_guid)) { if (nla_put(skb, IFLA_VF_IB_NODE_GUID, sizeof(node_guid), &node_guid) || nla_put(skb, IFLA_VF_IB_PORT_GUID, sizeof(port_guid), &port_guid)) goto nla_put_vf_failure; } vfvlanlist = nla_nest_start_noflag(skb, IFLA_VF_VLAN_LIST); if (!vfvlanlist) goto nla_put_vf_failure; if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), &vf_vlan_info)) { nla_nest_cancel(skb, vfvlanlist); goto nla_put_vf_failure; } nla_nest_end(skb, vfvlanlist); if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) { memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS); if (!vfstats) goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, vf_stats.rx_packets, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, vf_stats.tx_packets, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES, vf_stats.rx_bytes, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES, vf_stats.tx_bytes, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, vf_stats.broadcast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, vf_stats.multicast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED, vf_stats.rx_dropped, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED, vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) { nla_nest_cancel(skb, vfstats); goto nla_put_vf_failure; } nla_nest_end(skb, vfstats); } nla_nest_end(skb, vf); return 0; nla_put_vf_failure: nla_nest_cancel(skb, vf); return -EMSGSIZE; } static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, struct net_device *dev, u32 ext_filter_mask) { struct nlattr *vfinfo; int i, num_vfs; if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0)) return 0; num_vfs = dev_num_vf(dev->dev.parent); if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs)) return -EMSGSIZE; if (!dev->netdev_ops->ndo_get_vf_config) return 0; vfinfo = nla_nest_start_noflag(skb, IFLA_VFINFO_LIST); if (!vfinfo) return -EMSGSIZE; for (i = 0; i < num_vfs; i++) { if (rtnl_fill_vfinfo(skb, dev, i, ext_filter_mask)) { nla_nest_cancel(skb, vfinfo); return -EMSGSIZE; } } nla_nest_end(skb, vfinfo); return 0; } static int rtnl_fill_link_ifmap(struct sk_buff *skb, const struct net_device *dev) { struct rtnl_link_ifmap map; memset(&map, 0, sizeof(map)); map.mem_start = READ_ONCE(dev->mem_start); map.mem_end = READ_ONCE(dev->mem_end); map.base_addr = READ_ONCE(dev->base_addr); map.irq = READ_ONCE(dev->irq); map.dma = READ_ONCE(dev->dma); map.port = READ_ONCE(dev->if_port); if (nla_put_64bit(skb, IFLA_MAP, sizeof(map), &map, IFLA_PAD)) return -EMSGSIZE; return 0; } static u32 rtnl_xdp_prog_skb(struct net_device *dev) { const struct bpf_prog *generic_xdp_prog; u32 res = 0; rcu_read_lock(); generic_xdp_prog = rcu_dereference(dev->xdp_prog); if (generic_xdp_prog) res = generic_xdp_prog->aux->id; rcu_read_unlock(); return res; } static u32 rtnl_xdp_prog_drv(struct net_device *dev) { return dev_xdp_prog_id(dev, XDP_MODE_DRV); } static u32 rtnl_xdp_prog_hw(struct net_device *dev) { return dev_xdp_prog_id(dev, XDP_MODE_HW); } static int rtnl_xdp_report_one(struct sk_buff *skb, struct net_device *dev, u32 *prog_id, u8 *mode, u8 tgt_mode, u32 attr, u32 (*get_prog_id)(struct net_device *dev)) { u32 curr_id; int err; curr_id = get_prog_id(dev); if (!curr_id) return 0; *prog_id = curr_id; err = nla_put_u32(skb, attr, curr_id); if (err) return err; if (*mode != XDP_ATTACHED_NONE) *mode = XDP_ATTACHED_MULTI; else *mode = tgt_mode; return 0; } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *xdp; u32 prog_id; int err; u8 mode; xdp = nla_nest_start_noflag(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; prog_id = 0; mode = XDP_ATTACHED_NONE; err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_SKB, IFLA_XDP_SKB_PROG_ID, rtnl_xdp_prog_skb); if (err) goto err_cancel; err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_DRV, IFLA_XDP_DRV_PROG_ID, rtnl_xdp_prog_drv); if (err) goto err_cancel; err = rtnl_xdp_report_one(skb, dev, &prog_id, &mode, XDP_ATTACHED_HW, IFLA_XDP_HW_PROG_ID, rtnl_xdp_prog_hw); if (err) goto err_cancel; err = nla_put_u8(skb, IFLA_XDP_ATTACHED, mode); if (err) goto err_cancel; if (prog_id && mode != XDP_ATTACHED_MULTI) { err = nla_put_u32(skb, IFLA_XDP_PROG_ID, prog_id); if (err) goto err_cancel; } nla_nest_end(skb, xdp); return 0; err_cancel: nla_nest_cancel(skb, xdp); return err; } static u32 rtnl_get_event(unsigned long event) { u32 rtnl_event_type = IFLA_EVENT_NONE; switch (event) { case NETDEV_REBOOT: rtnl_event_type = IFLA_EVENT_REBOOT; break; case NETDEV_FEAT_CHANGE: rtnl_event_type = IFLA_EVENT_FEATURES; break; case NETDEV_BONDING_FAILOVER: rtnl_event_type = IFLA_EVENT_BONDING_FAILOVER; break; case NETDEV_NOTIFY_PEERS: rtnl_event_type = IFLA_EVENT_NOTIFY_PEERS; break; case NETDEV_RESEND_IGMP: rtnl_event_type = IFLA_EVENT_IGMP_RESEND; break; case NETDEV_CHANGEINFODATA: rtnl_event_type = IFLA_EVENT_BONDING_OPTIONS; break; default: break; } return rtnl_event_type; } static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) { const struct net_device *upper_dev; int ret = 0; rcu_read_lock(); upper_dev = netdev_master_upper_dev_get_rcu(dev); if (upper_dev) ret = nla_put_u32(skb, IFLA_MASTER, READ_ONCE(upper_dev->ifindex)); rcu_read_unlock(); return ret; } static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev, bool force) { int iflink = dev_get_iflink(dev); if (force || READ_ONCE(dev->ifindex) != iflink) return nla_put_u32(skb, IFLA_LINK, iflink); return 0; } static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, struct net_device *dev) { char buf[IFALIASZ]; int ret; ret = dev_get_alias(dev, buf, sizeof(buf)); return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0; } static int rtnl_fill_link_netnsid(struct sk_buff *skb, const struct net_device *dev, struct net *src_net, gfp_t gfp) { bool put_iflink = false; if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); if (!net_eq(dev_net(dev), link_net)) { int id = peernet2id_alloc(src_net, link_net, gfp); if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) return -EMSGSIZE; put_iflink = true; } } return nla_put_iflink(skb, dev, put_iflink); } static int rtnl_fill_link_af(struct sk_buff *skb, const struct net_device *dev, u32 ext_filter_mask) { const struct rtnl_af_ops *af_ops; struct nlattr *af_spec; af_spec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!af_spec) return -EMSGSIZE; list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { struct nlattr *af; int err; if (!af_ops->fill_link_af) continue; af = nla_nest_start_noflag(skb, af_ops->family); if (!af) return -EMSGSIZE; err = af_ops->fill_link_af(skb, dev, ext_filter_mask); /* * Caller may return ENODATA to indicate that there * was no data to be dumped. This is not an error, it * means we should trim the attribute header and * continue. */ if (err == -ENODATA) nla_nest_cancel(skb, af); else if (err < 0) return -EMSGSIZE; nla_nest_end(skb, af); } nla_nest_end(skb, af_spec); return 0; } static int rtnl_fill_alt_ifnames(struct sk_buff *skb, const struct net_device *dev) { struct netdev_name_node *name_node; int count = 0; list_for_each_entry_rcu(name_node, &dev->name_node->list, list) { if (nla_put_string(skb, IFLA_ALT_IFNAME, name_node->name)) return -EMSGSIZE; count++; } return count; } /* RCU protected. */ static int rtnl_fill_prop_list(struct sk_buff *skb, const struct net_device *dev) { struct nlattr *prop_list; int ret; prop_list = nla_nest_start(skb, IFLA_PROP_LIST); if (!prop_list) return -EMSGSIZE; ret = rtnl_fill_alt_ifnames(skb, dev); if (ret <= 0) goto nest_cancel; nla_nest_end(skb, prop_list); return 0; nest_cancel: nla_nest_cancel(skb, prop_list); return ret; } static int rtnl_fill_proto_down(struct sk_buff *skb, const struct net_device *dev) { struct nlattr *pr; u32 preason; if (nla_put_u8(skb, IFLA_PROTO_DOWN, READ_ONCE(dev->proto_down))) goto nla_put_failure; preason = READ_ONCE(dev->proto_down_reason); if (!preason) return 0; pr = nla_nest_start(skb, IFLA_PROTO_DOWN_REASON); if (!pr) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_PROTO_DOWN_REASON_VALUE, preason)) { nla_nest_cancel(skb, pr); goto nla_put_failure; } nla_nest_end(skb, pr); return 0; nla_put_failure: return -EMSGSIZE; } static int rtnl_fill_devlink_port(struct sk_buff *skb, const struct net_device *dev) { struct nlattr *devlink_port_nest; int ret; devlink_port_nest = nla_nest_start(skb, IFLA_DEVLINK_PORT); if (!devlink_port_nest) return -EMSGSIZE; if (dev->devlink_port) { ret = devlink_nl_port_handle_fill(skb, dev->devlink_port); if (ret < 0) goto nest_cancel; } nla_nest_end(skb, devlink_port_nest); return 0; nest_cancel: nla_nest_cancel(skb, devlink_port_nest); return ret; } static int rtnl_fill_dpll_pin(struct sk_buff *skb, const struct net_device *dev) { struct nlattr *dpll_pin_nest; int ret; dpll_pin_nest = nla_nest_start(skb, IFLA_DPLL_PIN); if (!dpll_pin_nest) return -EMSGSIZE; ret = dpll_netdev_add_pin_handle(skb, dev); if (ret < 0) goto nest_cancel; nla_nest_end(skb, dpll_pin_nest); return 0; nest_cancel: nla_nest_cancel(skb, dpll_pin_nest); return ret; } static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, u32 event, int *new_nsid, int new_ifindex, int tgt_netnsid, gfp_t gfp) { char devname[IFNAMSIZ]; struct ifinfomsg *ifm; struct nlmsghdr *nlh; struct Qdisc *qdisc; ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifi_family = AF_UNSPEC; ifm->__ifi_pad = 0; ifm->ifi_type = READ_ONCE(dev->type); ifm->ifi_index = READ_ONCE(dev->ifindex); ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure; netdev_copy_name(dev, devname); if (nla_put_string(skb, IFLA_IFNAME, devname)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_TXQLEN, READ_ONCE(dev->tx_queue_len)) || nla_put_u8(skb, IFLA_OPERSTATE, netif_running(dev) ? READ_ONCE(dev->operstate) : IF_OPER_DOWN) || nla_put_u8(skb, IFLA_LINKMODE, READ_ONCE(dev->link_mode)) || nla_put_u32(skb, IFLA_MTU, READ_ONCE(dev->mtu)) || nla_put_u32(skb, IFLA_MIN_MTU, READ_ONCE(dev->min_mtu)) || nla_put_u32(skb, IFLA_MAX_MTU, READ_ONCE(dev->max_mtu)) || nla_put_u32(skb, IFLA_GROUP, READ_ONCE(dev->group)) || nla_put_u32(skb, IFLA_PROMISCUITY, READ_ONCE(dev->promiscuity)) || nla_put_u32(skb, IFLA_ALLMULTI, READ_ONCE(dev->allmulti)) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, READ_ONCE(dev->num_tx_queues)) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS, READ_ONCE(dev->gso_max_segs)) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, READ_ONCE(dev->gso_max_size)) || nla_put_u32(skb, IFLA_GRO_MAX_SIZE, READ_ONCE(dev->gro_max_size)) || nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, READ_ONCE(dev->gso_ipv4_max_size)) || nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, READ_ONCE(dev->gro_ipv4_max_size)) || nla_put_u32(skb, IFLA_TSO_MAX_SIZE, READ_ONCE(dev->tso_max_size)) || nla_put_u32(skb, IFLA_TSO_MAX_SEGS, READ_ONCE(dev->tso_max_segs)) || nla_put_uint(skb, IFLA_MAX_PACING_OFFLOAD_HORIZON, READ_ONCE(dev->max_pacing_offload_horizon)) || #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, READ_ONCE(dev->num_rx_queues)) || #endif put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + atomic_read(&dev->carrier_down_count)) || nla_put_u32(skb, IFLA_CARRIER_UP_COUNT, atomic_read(&dev->carrier_up_count)) || nla_put_u32(skb, IFLA_CARRIER_DOWN_COUNT, atomic_read(&dev->carrier_down_count))) goto nla_put_failure; if (rtnl_fill_proto_down(skb, dev)) goto nla_put_failure; if (event != IFLA_EVENT_NONE) { if (nla_put_u32(skb, IFLA_EVENT, event)) goto nla_put_failure; } if (dev->addr_len) { if (nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr) || nla_put(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast)) goto nla_put_failure; } if (rtnl_phys_port_id_fill(skb, dev)) goto nla_put_failure; if (rtnl_phys_port_name_fill(skb, dev)) goto nla_put_failure; if (rtnl_phys_switch_id_fill(skb, dev)) goto nla_put_failure; if (rtnl_fill_stats(skb, dev)) goto nla_put_failure; if (rtnl_fill_vf(skb, dev, ext_filter_mask)) goto nla_put_failure; if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; if (rtnl_xdp_fill(skb, dev)) goto nla_put_failure; if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { if (rtnl_link_fill(skb, dev) < 0) goto nla_put_failure; } if (new_nsid && nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) goto nla_put_failure; if (new_ifindex && nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0) goto nla_put_failure; if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) && nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr)) goto nla_put_failure; rcu_read_lock(); if (rtnl_fill_link_netnsid(skb, dev, src_net, GFP_ATOMIC)) goto nla_put_failure_rcu; qdisc = rcu_dereference(dev->qdisc); if (qdisc && nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) goto nla_put_failure_rcu; if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) goto nla_put_failure_rcu; if (rtnl_fill_link_ifmap(skb, dev)) goto nla_put_failure_rcu; if (rtnl_fill_prop_list(skb, dev)) goto nla_put_failure_rcu; rcu_read_unlock(); if (dev->dev.parent && nla_put_string(skb, IFLA_PARENT_DEV_NAME, dev_name(dev->dev.parent))) goto nla_put_failure; if (dev->dev.parent && dev->dev.parent->bus && nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME, dev->dev.parent->bus->name)) goto nla_put_failure; if (rtnl_fill_devlink_port(skb, dev)) goto nla_put_failure; if (rtnl_fill_dpll_pin(skb, dev)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure_rcu: rcu_read_unlock(); nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_UNSPEC] = { .strict_start_type = IFLA_DPLL_PIN }, [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, [IFLA_MTU] = { .type = NLA_U32 }, [IFLA_LINK] = { .type = NLA_U32 }, [IFLA_MASTER] = { .type = NLA_U32 }, [IFLA_CARRIER] = { .type = NLA_U8 }, [IFLA_TXQLEN] = { .type = NLA_U32 }, [IFLA_WEIGHT] = { .type = NLA_U32 }, [IFLA_OPERSTATE] = { .type = NLA_U8 }, [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_NET_NS_FD] = { .type = NLA_U32 }, /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to * allow 0-length string (needed to remove an alias). */ [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, [IFLA_AF_SPEC] = { .type = NLA_NESTED }, [IFLA_EXT_MASK] = { .type = NLA_U32 }, [IFLA_PROMISCUITY] = { .type = NLA_U32 }, [IFLA_NUM_TX_QUEUES] = { .type = NLA_U32 }, [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_GSO_MAX_SEGS] = { .type = NLA_U32 }, [IFLA_GSO_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */ [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, [IFLA_XDP] = { .type = NLA_NESTED }, [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, [IFLA_TARGET_NETNSID] = { .type = NLA_S32 }, [IFLA_CARRIER_UP_COUNT] = { .type = NLA_U32 }, [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 }, [IFLA_MIN_MTU] = { .type = NLA_U32 }, [IFLA_MAX_MTU] = { .type = NLA_U32 }, [IFLA_PROP_LIST] = { .type = NLA_NESTED }, [IFLA_ALT_IFNAME] = { .type = NLA_STRING, .len = ALTIFNAMSIZ - 1 }, [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, [IFLA_ALLMULTI] = { .type = NLA_REJECT }, [IFLA_GSO_IPV4_MAX_SIZE] = NLA_POLICY_MIN(NLA_U32, MAX_TCP_HEADER + 1), [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_KIND] = { .type = NLA_STRING }, [IFLA_INFO_DATA] = { .type = NLA_NESTED }, [IFLA_INFO_SLAVE_KIND] = { .type = NLA_STRING }, [IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_BROADCAST] = { .type = NLA_REJECT }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED }, [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, [IFLA_VF_STATS] = { .type = NLA_NESTED }, [IFLA_VF_TRUST] = { .len = sizeof(struct ifla_vf_trust) }, [IFLA_VF_IB_NODE_GUID] = { .len = sizeof(struct ifla_vf_guid) }, [IFLA_VF_IB_PORT_GUID] = { .len = sizeof(struct ifla_vf_guid) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_VF] = { .type = NLA_U32 }, [IFLA_PORT_PROFILE] = { .type = NLA_STRING, .len = PORT_PROFILE_MAX }, [IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY, .len = PORT_UUID_MAX }, [IFLA_PORT_HOST_UUID] = { .type = NLA_STRING, .len = PORT_UUID_MAX }, [IFLA_PORT_REQUEST] = { .type = NLA_U8, }, [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, /* Unused, but we need to keep it here since user space could * fill it. It's also broken with regard to NLA_BINARY use in * combination with structs. */ [IFLA_PORT_VSI_TYPE] = { .type = NLA_BINARY, .len = sizeof(struct ifla_port_vsi) }, }; static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { [IFLA_XDP_UNSPEC] = { .strict_start_type = IFLA_XDP_EXPECTED_FD }, [IFLA_XDP_FD] = { .type = NLA_S32 }, [IFLA_XDP_EXPECTED_FD] = { .type = NLA_S32 }, [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, [IFLA_XDP_FLAGS] = { .type = NLA_U32 }, [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; static struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla, int *ops_srcu_index) { struct nlattr *linfo[IFLA_INFO_MAX + 1]; struct rtnl_link_ops *ops = NULL; if (nla_parse_nested_deprecated(linfo, IFLA_INFO_MAX, nla, ifla_info_policy, NULL) < 0) return NULL; if (linfo[IFLA_INFO_KIND]) { char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linfo[IFLA_INFO_KIND], sizeof(kind)); ops = rtnl_link_ops_get(kind, ops_srcu_index); } return ops; } static bool link_master_filtered(struct net_device *dev, int master_idx) { struct net_device *master; if (!master_idx) return false; master = netdev_master_upper_dev_get(dev); /* 0 is already used to denote IFLA_MASTER wasn't passed, therefore need * another invalid value for ifindex to denote "no master". */ if (master_idx == -1) return !!master; if (!master || master->ifindex != master_idx) return true; return false; } static bool link_kind_filtered(const struct net_device *dev, const struct rtnl_link_ops *kind_ops) { if (kind_ops && dev->rtnl_link_ops != kind_ops) return true; return false; } static bool link_dump_filtered(struct net_device *dev, int master_idx, const struct rtnl_link_ops *kind_ops) { if (link_master_filtered(dev, master_idx) || link_kind_filtered(dev, kind_ops)) return true; return false; } /** * rtnl_get_net_ns_capable - Get netns if sufficiently privileged. * @sk: netlink socket * @netnsid: network namespace identifier * * Returns the network namespace identified by netnsid on success or an error * pointer on failure. */ struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid) { struct net *net; net = get_net_ns_by_id(sock_net(sk), netnsid); if (!net) return ERR_PTR(-EINVAL); /* For now, the caller is required to have CAP_NET_ADMIN in * the user namespace owning the target net ns. */ if (!sk_ns_capable(sk, net->user_ns, CAP_NET_ADMIN)) { put_net(net); return ERR_PTR(-EACCES); } return net; } EXPORT_SYMBOL_GPL(rtnl_get_net_ns_capable); static int rtnl_valid_dump_ifinfo_req(const struct nlmsghdr *nlh, bool strict_check, struct nlattr **tb, struct netlink_ext_ack *extack) { int hdrlen; if (strict_check) { struct ifinfomsg *ifm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { NL_SET_ERR_MSG(extack, "Invalid header for link dump"); return -EINVAL; } ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for link dump request"); return -EINVAL; } if (ifm->ifi_index) { NL_SET_ERR_MSG(extack, "Filter by device index not supported for link dumps"); return -EINVAL; } return nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); } /* A hack to preserve kernel<->userspace interface. * The correct header is ifinfomsg. It is consistent with rtnl_getlink. * However, before Linux v3.9 the code here assumed rtgenmsg and that's * what iproute2 < v3.9.0 used. * We can detect the old iproute2. Even including the IFLA_EXT_MASK * attribute, its netlink message is shorter than struct ifinfomsg. */ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); return nlmsg_parse_deprecated(nlh, hdrlen, tb, IFLA_MAX, ifla_policy, extack); } static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct rtnl_link_ops *kind_ops = NULL; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; struct nlattr *tb[IFLA_MAX+1]; struct { unsigned long ifindex; } *ctx = (void *)cb->ctx; struct net *tgt_net = net; u32 ext_filter_mask = 0; struct net_device *dev; int ops_srcu_index; int master_idx = 0; int netnsid = -1; int err, i; err = rtnl_valid_dump_ifinfo_req(nlh, cb->strict_check, tb, extack); if (err < 0) { if (cb->strict_check) return err; goto walk_entries; } for (i = 0; i <= IFLA_MAX; ++i) { if (!tb[i]) continue; /* new attributes should only be added with strict checking */ switch (i) { case IFLA_TARGET_NETNSID: netnsid = nla_get_s32(tb[i]); tgt_net = rtnl_get_net_ns_capable(skb->sk, netnsid); if (IS_ERR(tgt_net)) { NL_SET_ERR_MSG(extack, "Invalid target network namespace id"); err = PTR_ERR(tgt_net); netnsid = -1; goto out; } break; case IFLA_EXT_MASK: ext_filter_mask = nla_get_u32(tb[i]); break; case IFLA_MASTER: master_idx = nla_get_u32(tb[i]); break; case IFLA_LINKINFO: kind_ops = linkinfo_to_kind_ops(tb[i], &ops_srcu_index); break; default: if (cb->strict_check) { NL_SET_ERR_MSG(extack, "Unsupported attribute in link dump request"); err = -EINVAL; goto out; } } } if (master_idx || kind_ops) flags |= NLM_F_DUMP_FILTERED; walk_entries: err = 0; for_each_netdev_dump(tgt_net, dev, ctx->ifindex) { if (link_dump_filtered(dev, master_idx, kind_ops)) continue; err = rtnl_fill_ifinfo(skb, dev, net, RTM_NEWLINK, NETLINK_CB(cb->skb).portid, nlh->nlmsg_seq, 0, flags, ext_filter_mask, 0, NULL, 0, netnsid, GFP_KERNEL); if (err < 0) break; } cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); out: if (kind_ops) rtnl_link_ops_put(kind_ops, ops_srcu_index); if (netnsid >= 0) put_net(tgt_net); return err; } int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer, struct netlink_ext_ack *exterr) { const struct ifinfomsg *ifmp; const struct nlattr *attrs; size_t len; ifmp = nla_data(nla_peer); attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg); len = nla_len(nla_peer) - sizeof(struct ifinfomsg); if (ifmp->ifi_index < 0) { NL_SET_ERR_MSG_ATTR(exterr, nla_peer, "ifindex can't be negative"); return -EINVAL; } return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy, exterr); } EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg); static struct net *rtnl_link_get_net_ifla(struct nlattr *tb[]) { struct net *net = NULL; /* Examine the link attributes and figure out which * network namespace we are talking about. */ if (tb[IFLA_NET_NS_PID]) net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID])); else if (tb[IFLA_NET_NS_FD]) net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD])); return net; } struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net = rtnl_link_get_net_ifla(tb); if (!net) net = get_net(src_net); return net; } EXPORT_SYMBOL(rtnl_link_get_net); /* Figure out which network namespace we are talking about by * examining the link attributes in the following order: * * 1. IFLA_NET_NS_PID * 2. IFLA_NET_NS_FD * 3. IFLA_TARGET_NETNSID */ static struct net *rtnl_link_get_net_by_nlattr(struct net *src_net, struct nlattr *tb[]) { struct net *net; if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) return rtnl_link_get_net(src_net, tb); if (!tb[IFLA_TARGET_NETNSID]) return get_net(src_net); net = get_net_ns_by_id(src_net, nla_get_u32(tb[IFLA_TARGET_NETNSID])); if (!net) return ERR_PTR(-EINVAL); return net; } static struct net *rtnl_link_get_net_capable(const struct sk_buff *skb, struct net *src_net, struct nlattr *tb[], int cap) { struct net *net; net = rtnl_link_get_net_by_nlattr(src_net, tb); if (IS_ERR(net)) return net; if (!netlink_ns_capable(skb, net->user_ns, cap)) { put_net(net); return ERR_PTR(-EPERM); } return net; } /* Verify that rtnetlink requests do not pass additional properties * potentially referring to different network namespaces. */ static int rtnl_ensure_unique_netns(struct nlattr *tb[], struct netlink_ext_ack *extack, bool netns_id_only) { if (netns_id_only) { if (!tb[IFLA_NET_NS_PID] && !tb[IFLA_NET_NS_FD]) return 0; NL_SET_ERR_MSG(extack, "specified netns attribute not supported"); return -EOPNOTSUPP; } if (tb[IFLA_TARGET_NETNSID] && (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD])) goto invalid_attr; if (tb[IFLA_NET_NS_PID] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_FD])) goto invalid_attr; if (tb[IFLA_NET_NS_FD] && (tb[IFLA_TARGET_NETNSID] || tb[IFLA_NET_NS_PID])) goto invalid_attr; return 0; invalid_attr: NL_SET_ERR_MSG(extack, "multiple netns identifying attributes specified"); return -EINVAL; } static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate) { const struct net_device_ops *ops = dev->netdev_ops; if (!ops->ndo_set_vf_rate) return -EOPNOTSUPP; if (max_tx_rate && max_tx_rate < min_tx_rate) return -EINVAL; return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate); } static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS] && nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) return -EINVAL; if (tb[IFLA_BROADCAST] && nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) return -EINVAL; if (tb[IFLA_GSO_MAX_SIZE] && nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) { NL_SET_ERR_MSG(extack, "too big gso_max_size"); return -EINVAL; } if (tb[IFLA_GSO_MAX_SEGS] && (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS || nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) { NL_SET_ERR_MSG(extack, "too big gso_max_segs"); return -EINVAL; } if (tb[IFLA_GRO_MAX_SIZE] && nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) { NL_SET_ERR_MSG(extack, "too big gro_max_size"); return -EINVAL; } if (tb[IFLA_GSO_IPV4_MAX_SIZE] && nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) { NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size"); return -EINVAL; } if (tb[IFLA_GRO_IPV4_MAX_SIZE] && nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) { NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size"); return -EINVAL; } if (tb[IFLA_AF_SPEC]) { struct nlattr *af; int rem, err; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { struct rtnl_af_ops *af_ops; int af_ops_srcu_index; af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); if (!af_ops) return -EAFNOSUPPORT; if (!af_ops->set_link_af) err = -EOPNOTSUPP; else if (af_ops->validate_link_af) err = af_ops->validate_link_af(dev, af, extack); else err = 0; rtnl_af_put(af_ops, af_ops_srcu_index); if (err < 0) return err; } } return 0; } static int handle_infiniband_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type) { const struct net_device_ops *ops = dev->netdev_ops; return ops->ndo_set_vf_guid(dev, ivt->vf, ivt->guid, guid_type); } static int handle_vf_guid(struct net_device *dev, struct ifla_vf_guid *ivt, int guid_type) { if (dev->type != ARPHRD_INFINIBAND) return -EOPNOTSUPP; return handle_infiniband_guid(dev, ivt, guid_type); } static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) { const struct net_device_ops *ops = dev->netdev_ops; int err = -EINVAL; if (tb[IFLA_VF_MAC]) { struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]); if (ivm->vf >= INT_MAX) return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_mac) err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac); if (err < 0) return err; } if (tb[IFLA_VF_VLAN]) { struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]); if (ivv->vf >= INT_MAX) return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, ivv->qos, htons(ETH_P_8021Q)); if (err < 0) return err; } if (tb[IFLA_VF_VLAN_LIST]) { struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN]; struct nlattr *attr; int rem, len = 0; err = -EOPNOTSUPP; if (!ops->ndo_set_vf_vlan) return err; nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { if (nla_type(attr) != IFLA_VF_VLAN_INFO || nla_len(attr) < sizeof(struct ifla_vf_vlan_info)) { return -EINVAL; } if (len >= MAX_VLAN_LIST_LEN) return -EOPNOTSUPP; ivvl[len] = nla_data(attr); len++; } if (len == 0) return -EINVAL; if (ivvl[0]->vf >= INT_MAX) return -EINVAL; err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) return err; } if (tb[IFLA_VF_TX_RATE]) { struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]); struct ifla_vf_info ivf; if (ivt->vf >= INT_MAX) return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_get_vf_config) err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf); if (err < 0) return err; err = rtnl_set_vf_rate(dev, ivt->vf, ivf.min_tx_rate, ivt->rate); if (err < 0) return err; } if (tb[IFLA_VF_RATE]) { struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]); if (ivt->vf >= INT_MAX) return -EINVAL; err = rtnl_set_vf_rate(dev, ivt->vf, ivt->min_tx_rate, ivt->max_tx_rate); if (err < 0) return err; } if (tb[IFLA_VF_SPOOFCHK]) { struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]); if (ivs->vf >= INT_MAX) return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_spoofchk) err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, ivs->setting); if (err < 0) return err; } if (tb[IFLA_VF_LINK_STATE]) { struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]); if (ivl->vf >= INT_MAX) return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_link_state) err = ops->ndo_set_vf_link_state(dev, ivl->vf, ivl->link_state); if (err < 0) return err; } if (tb[IFLA_VF_RSS_QUERY_EN]) { struct ifla_vf_rss_query_en *ivrssq_en; err = -EOPNOTSUPP; ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]); if (ivrssq_en->vf >= INT_MAX) return -EINVAL; if (ops->ndo_set_vf_rss_query_en) err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf, ivrssq_en->setting); if (err < 0) return err; } if (tb[IFLA_VF_TRUST]) { struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]); if (ivt->vf >= INT_MAX) return -EINVAL; err = -EOPNOTSUPP; if (ops->ndo_set_vf_trust) err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting); if (err < 0) return err; } if (tb[IFLA_VF_IB_NODE_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]); if (ivt->vf >= INT_MAX) return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID); } if (tb[IFLA_VF_IB_PORT_GUID]) { struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]); if (ivt->vf >= INT_MAX) return -EINVAL; if (!ops->ndo_set_vf_guid) return -EOPNOTSUPP; return handle_vf_guid(dev, ivt, IFLA_VF_IB_PORT_GUID); } return err; } static int do_set_master(struct net_device *dev, int ifindex, struct netlink_ext_ack *extack) { struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; int err; if (upper_dev) { if (upper_dev->ifindex == ifindex) return 0; ops = upper_dev->netdev_ops; if (ops->ndo_del_slave) { err = ops->ndo_del_slave(upper_dev, dev); if (err) return err; } else { return -EOPNOTSUPP; } } if (ifindex) { upper_dev = __dev_get_by_index(dev_net(dev), ifindex); if (!upper_dev) return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { err = ops->ndo_add_slave(upper_dev, dev, extack); if (err) return err; } else { return -EOPNOTSUPP; } } return 0; } static const struct nla_policy ifla_proto_down_reason_policy[IFLA_PROTO_DOWN_REASON_VALUE + 1] = { [IFLA_PROTO_DOWN_REASON_MASK] = { .type = NLA_U32 }, [IFLA_PROTO_DOWN_REASON_VALUE] = { .type = NLA_U32 }, }; static int do_set_proto_down(struct net_device *dev, struct nlattr *nl_proto_down, struct nlattr *nl_proto_down_reason, struct netlink_ext_ack *extack) { struct nlattr *pdreason[IFLA_PROTO_DOWN_REASON_MAX + 1]; unsigned long mask = 0; u32 value; bool proto_down; int err; if (!dev->change_proto_down) { NL_SET_ERR_MSG(extack, "Protodown not supported by device"); return -EOPNOTSUPP; } if (nl_proto_down_reason) { err = nla_parse_nested_deprecated(pdreason, IFLA_PROTO_DOWN_REASON_MAX, nl_proto_down_reason, ifla_proto_down_reason_policy, NULL); if (err < 0) return err; if (!pdreason[IFLA_PROTO_DOWN_REASON_VALUE]) { NL_SET_ERR_MSG(extack, "Invalid protodown reason value"); return -EINVAL; } value = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_VALUE]); if (pdreason[IFLA_PROTO_DOWN_REASON_MASK]) mask = nla_get_u32(pdreason[IFLA_PROTO_DOWN_REASON_MASK]); dev_change_proto_down_reason(dev, mask, value); } if (nl_proto_down) { proto_down = nla_get_u8(nl_proto_down); /* Don't turn off protodown if there are active reasons */ if (!proto_down && dev->proto_down_reason) { NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); return -EBUSY; } err = dev_change_proto_down(dev, proto_down); if (err) return err; } return 0; } #define DO_SETLINK_MODIFIED 0x01 /* notify flag means notify + modified. */ #define DO_SETLINK_NOTIFY 0x03 static int do_setlink(const struct sk_buff *skb, struct net_device *dev, struct net *tgt_net, struct ifinfomsg *ifm, struct netlink_ext_ack *extack, struct nlattr **tb, int status) { const struct net_device_ops *ops = dev->netdev_ops; char ifname[IFNAMSIZ]; int err; err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else ifname[0] = '\0'; if (!net_eq(tgt_net, dev_net(dev))) { const char *pat = ifname[0] ? ifname : NULL; int new_ifindex; new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex); if (err) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; if (!ops->ndo_set_config) { err = -EOPNOTSUPP; goto errout; } if (!netif_device_present(dev)) { err = -ENODEV; goto errout; } u_map = nla_data(tb[IFLA_MAP]); k_map.mem_start = (unsigned long) u_map->mem_start; k_map.mem_end = (unsigned long) u_map->mem_end; k_map.base_addr = (unsigned short) u_map->base_addr; k_map.irq = (unsigned char) u_map->irq; k_map.dma = (unsigned char) u_map->dma; k_map.port = (unsigned char) u_map->port; err = ops->ndo_set_config(dev, &k_map); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_ADDRESS]) { struct sockaddr *sa; int len; len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len, sizeof(*sa)); sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; goto errout; } sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); err = dev_set_mac_address_user(dev, sa, extack); kfree(sa); if (err) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_MTU]) { err = dev_set_mtu_ext(dev, nla_get_u32(tb[IFLA_MTU]), extack); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GROUP]) { dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); status |= DO_SETLINK_NOTIFY; } /* * Interface selected by interface index but interface * name provided implies that a name change has been * requested. */ if (ifm->ifi_index > 0 && ifname[0]) { err = dev_change_name(dev, ifname); if (err < 0) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_IFALIAS]) { err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), nla_len(tb[IFLA_IFALIAS])); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); } if (ifm->ifi_flags || ifm->ifi_change) { err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), extack); if (err < 0) goto errout; } if (tb[IFLA_MASTER]) { err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_CARRIER]) { err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_TXQLEN]) { unsigned int value = nla_get_u32(tb[IFLA_TXQLEN]); err = dev_change_tx_queue_len(dev, value); if (err) goto errout; status |= DO_SETLINK_MODIFIED; } if (tb[IFLA_GSO_MAX_SIZE]) { u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]); if (dev->gso_max_size ^ max_size) { netif_set_gso_max_size(dev, max_size); status |= DO_SETLINK_MODIFIED; } } if (tb[IFLA_GSO_MAX_SEGS]) { u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]); if (dev->gso_max_segs ^ max_segs) { netif_set_gso_max_segs(dev, max_segs); status |= DO_SETLINK_MODIFIED; } } if (tb[IFLA_GRO_MAX_SIZE]) { u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]); if (dev->gro_max_size ^ gro_max_size) { netif_set_gro_max_size(dev, gro_max_size); status |= DO_SETLINK_MODIFIED; } } if (tb[IFLA_GSO_IPV4_MAX_SIZE]) { u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]); if (dev->gso_ipv4_max_size ^ max_size) { netif_set_gso_ipv4_max_size(dev, max_size); status |= DO_SETLINK_MODIFIED; } } if (tb[IFLA_GRO_IPV4_MAX_SIZE]) { u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]); if (dev->gro_ipv4_max_size ^ gro_max_size) { netif_set_gro_ipv4_max_size(dev, gro_max_size); status |= DO_SETLINK_MODIFIED; } } if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) { unsigned char value = nla_get_u8(tb[IFLA_LINKMODE]); if (dev->link_mode ^ value) status |= DO_SETLINK_NOTIFY; WRITE_ONCE(dev->link_mode, value); } if (tb[IFLA_VFINFO_LIST]) { struct nlattr *vfinfo[IFLA_VF_MAX + 1]; struct nlattr *attr; int rem; nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) { if (nla_type(attr) != IFLA_VF_INFO || nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } err = nla_parse_nested_deprecated(vfinfo, IFLA_VF_MAX, attr, ifla_vf_policy, NULL); if (err < 0) goto errout; err = do_setvfinfo(dev, vfinfo); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; } } err = 0; if (tb[IFLA_VF_PORTS]) { struct nlattr *port[IFLA_PORT_MAX+1]; struct nlattr *attr; int vf; int rem; err = -EOPNOTSUPP; if (!ops->ndo_set_vf_port) goto errout; nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) { if (nla_type(attr) != IFLA_VF_PORT || nla_len(attr) < NLA_HDRLEN) { err = -EINVAL; goto errout; } err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, attr, ifla_port_policy, NULL); if (err < 0) goto errout; if (!port[IFLA_PORT_VF]) { err = -EOPNOTSUPP; goto errout; } vf = nla_get_u32(port[IFLA_PORT_VF]); err = ops->ndo_set_vf_port(dev, vf, port); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; } } err = 0; if (tb[IFLA_PORT_SELF]) { struct nlattr *port[IFLA_PORT_MAX+1]; err = nla_parse_nested_deprecated(port, IFLA_PORT_MAX, tb[IFLA_PORT_SELF], ifla_port_policy, NULL); if (err < 0) goto errout; err = -EOPNOTSUPP; if (ops->ndo_set_vf_port) err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_AF_SPEC]) { struct nlattr *af; int rem; nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { struct rtnl_af_ops *af_ops; int af_ops_srcu_index; af_ops = rtnl_af_lookup(nla_type(af), &af_ops_srcu_index); if (!af_ops) { err = -EAFNOSUPPORT; goto errout; } err = af_ops->set_link_af(dev, af, extack); rtnl_af_put(af_ops, af_ops_srcu_index); if (err < 0) goto errout; status |= DO_SETLINK_NOTIFY; } } err = 0; if (tb[IFLA_PROTO_DOWN] || tb[IFLA_PROTO_DOWN_REASON]) { err = do_set_proto_down(dev, tb[IFLA_PROTO_DOWN], tb[IFLA_PROTO_DOWN_REASON], extack); if (err) goto errout; status |= DO_SETLINK_NOTIFY; } if (tb[IFLA_XDP]) { struct nlattr *xdp[IFLA_XDP_MAX + 1]; u32 xdp_flags = 0; err = nla_parse_nested_deprecated(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], ifla_xdp_policy, NULL); if (err < 0) goto errout; if (xdp[IFLA_XDP_ATTACHED] || xdp[IFLA_XDP_PROG_ID]) { err = -EINVAL; goto errout; } if (xdp[IFLA_XDP_FLAGS]) { xdp_flags = nla_get_u32(xdp[IFLA_XDP_FLAGS]); if (xdp_flags & ~XDP_FLAGS_MASK) { err = -EINVAL; goto errout; } if (hweight32(xdp_flags & XDP_FLAGS_MODES) > 1) { err = -EINVAL; goto errout; } } if (xdp[IFLA_XDP_FD]) { int expected_fd = -1; if (xdp_flags & XDP_FLAGS_REPLACE) { if (!xdp[IFLA_XDP_EXPECTED_FD]) { err = -EINVAL; goto errout; } expected_fd = nla_get_s32(xdp[IFLA_XDP_EXPECTED_FD]); } err = dev_change_xdp_fd(dev, extack, nla_get_s32(xdp[IFLA_XDP_FD]), expected_fd, xdp_flags); if (err) goto errout; status |= DO_SETLINK_NOTIFY; } } errout: if (status & DO_SETLINK_MODIFIED) { if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) netdev_state_change(dev); if (err < 0) net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n", dev->name); } return err; } static struct net_device *rtnl_dev_get(struct net *net, struct nlattr *tb[]) { char ifname[ALTIFNAMSIZ]; if (tb[IFLA_IFNAME]) nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else if (tb[IFLA_ALT_IFNAME]) nla_strscpy(ifname, tb[IFLA_ALT_IFNAME], ALTIFNAMSIZ); else return NULL; return __dev_get_by_name(net, ifname); } static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct rtnl_nets rtnl_nets; struct net *tgt_net; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) goto errout; err = rtnl_ensure_unique_netns(tb, extack, false); if (err < 0) goto errout; tgt_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN); if (IS_ERR(tgt_net)) { err = PTR_ERR(tgt_net); goto errout; } rtnl_nets_init(&rtnl_nets); rtnl_nets_add(&rtnl_nets, get_net(net)); rtnl_nets_add(&rtnl_nets, tgt_net); rtnl_nets_lock(&rtnl_nets); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(net, tb); else err = -EINVAL; if (dev) err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); else if (!err) err = -ENODEV; rtnl_nets_unlock(&rtnl_nets); rtnl_nets_destroy(&rtnl_nets); errout: return err; } static int rtnl_group_dellink(const struct net *net, int group) { struct net_device *dev, *aux; LIST_HEAD(list_kill); bool found = false; if (!group) return -EPERM; for_each_netdev(net, dev) { if (dev->group == group) { const struct rtnl_link_ops *ops; found = true; ops = dev->rtnl_link_ops; if (!ops || !ops->dellink) return -EOPNOTSUPP; } } if (!found) return -ENODEV; for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { const struct rtnl_link_ops *ops; ops = dev->rtnl_link_ops; ops->dellink(dev, &list_kill); } } unregister_netdevice_many(&list_kill); return 0; } int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh) { const struct rtnl_link_ops *ops; LIST_HEAD(list_kill); ops = dev->rtnl_link_ops; if (!ops || !ops->dellink) return -EOPNOTSUPP; ops->dellink(dev, &list_kill); unregister_netdevice_many_notify(&list_kill, portid, nlh); return 0; } EXPORT_SYMBOL_GPL(rtnl_delete_link); static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct ifinfomsg *ifm = nlmsg_data(nlh); struct net *net = sock_net(skb->sk); u32 portid = NETLINK_CB(skb).portid; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct net *tgt_net = net; int netnsid = -1; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) return err; err = rtnl_ensure_unique_netns(tb, extack, true); if (err < 0) return err; if (tb[IFLA_TARGET_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]); tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid); if (IS_ERR(tgt_net)) return PTR_ERR(tgt_net); } rtnl_net_lock(tgt_net); if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(tgt_net, tb); if (dev) err = rtnl_delete_link(dev, portid, nlh); else if (ifm->ifi_index > 0 || tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) err = -ENODEV; else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else err = -EINVAL; rtnl_net_unlock(tgt_net); if (netnsid >= 0) put_net(tgt_net); return err; } int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, u32 portid, const struct nlmsghdr *nlh) { unsigned int old_flags; int err; old_flags = dev->flags; if (ifm && (ifm->ifi_flags || ifm->ifi_change)) { err = __dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), NULL); if (err < 0) return err; } if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) { __dev_notify_flags(dev, old_flags, (old_flags ^ dev->flags), portid, nlh); } else { dev->rtnl_link_state = RTNL_LINK_INITIALIZED; __dev_notify_flags(dev, old_flags, ~0U, portid, nlh); } return 0; } EXPORT_SYMBOL(rtnl_configure_link); struct net_device *rtnl_create_link(struct net *net, const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct net_device *dev; unsigned int num_tx_queues = 1; unsigned int num_rx_queues = 1; int err; if (tb[IFLA_NUM_TX_QUEUES]) num_tx_queues = nla_get_u32(tb[IFLA_NUM_TX_QUEUES]); else if (ops->get_num_tx_queues) num_tx_queues = ops->get_num_tx_queues(); if (tb[IFLA_NUM_RX_QUEUES]) num_rx_queues = nla_get_u32(tb[IFLA_NUM_RX_QUEUES]); else if (ops->get_num_rx_queues) num_rx_queues = ops->get_num_rx_queues(); if (num_tx_queues < 1 || num_tx_queues > 4096) { NL_SET_ERR_MSG(extack, "Invalid number of transmit queues"); return ERR_PTR(-EINVAL); } if (num_rx_queues < 1 || num_rx_queues > 4096) { NL_SET_ERR_MSG(extack, "Invalid number of receive queues"); return ERR_PTR(-EINVAL); } if (ops->alloc) { dev = ops->alloc(tb, ifname, name_assign_type, num_tx_queues, num_rx_queues); if (IS_ERR(dev)) return dev; } else { dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, ops->setup, num_tx_queues, num_rx_queues); } if (!dev) return ERR_PTR(-ENOMEM); err = validate_linkmsg(dev, tb, extack); if (err < 0) { free_netdev(dev); return ERR_PTR(err); } dev_net_set(dev, net); dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; if (tb[IFLA_MTU]) { u32 mtu = nla_get_u32(tb[IFLA_MTU]); err = dev_validate_mtu(dev, mtu, extack); if (err) { free_netdev(dev); return ERR_PTR(err); } dev->mtu = mtu; } if (tb[IFLA_ADDRESS]) { __dev_addr_set(dev, nla_data(tb[IFLA_ADDRESS]), nla_len(tb[IFLA_ADDRESS])); dev->addr_assign_type = NET_ADDR_SET; } if (tb[IFLA_BROADCAST]) memcpy(dev->broadcast, nla_data(tb[IFLA_BROADCAST]), nla_len(tb[IFLA_BROADCAST])); if (tb[IFLA_TXQLEN]) dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); if (tb[IFLA_OPERSTATE]) set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); if (tb[IFLA_LINKMODE]) dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); if (tb[IFLA_GROUP]) dev_set_group(dev, nla_get_u32(tb[IFLA_GROUP])); if (tb[IFLA_GSO_MAX_SIZE]) netif_set_gso_max_size(dev, nla_get_u32(tb[IFLA_GSO_MAX_SIZE])); if (tb[IFLA_GSO_MAX_SEGS]) netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS])); if (tb[IFLA_GRO_MAX_SIZE]) netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE])); if (tb[IFLA_GSO_IPV4_MAX_SIZE]) netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE])); if (tb[IFLA_GRO_IPV4_MAX_SIZE]) netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE])); return dev; } EXPORT_SYMBOL(rtnl_create_link); struct rtnl_newlink_tbs { struct nlattr *tb[IFLA_MAX + 1]; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; struct nlattr *attr[RTNL_MAX_TYPE + 1]; struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; }; static int rtnl_changelink(const struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, struct net_device *dev, struct net *tgt_net, struct rtnl_newlink_tbs *tbs, struct nlattr **data, struct netlink_ext_ack *extack) { struct nlattr ** const linkinfo = tbs->linkinfo; struct nlattr ** const tb = tbs->tb; int status = 0; int err; if (nlh->nlmsg_flags & NLM_F_EXCL) return -EEXIST; if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; if (linkinfo[IFLA_INFO_DATA]) { if (!ops || ops != dev->rtnl_link_ops || !ops->changelink) return -EOPNOTSUPP; err = ops->changelink(dev, tb, data, extack); if (err < 0) return err; status |= DO_SETLINK_NOTIFY; } if (linkinfo[IFLA_INFO_SLAVE_DATA]) { const struct rtnl_link_ops *m_ops = NULL; struct nlattr **slave_data = NULL; struct net_device *master_dev; master_dev = netdev_master_upper_dev_get(dev); if (master_dev) m_ops = master_dev->rtnl_link_ops; if (!m_ops || !m_ops->slave_changelink) return -EOPNOTSUPP; if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE) return -EINVAL; if (m_ops->slave_maxtype) { err = nla_parse_nested_deprecated(tbs->slave_attr, m_ops->slave_maxtype, linkinfo[IFLA_INFO_SLAVE_DATA], m_ops->slave_policy, extack); if (err < 0) return err; slave_data = tbs->slave_attr; } err = m_ops->slave_changelink(master_dev, dev, tb, slave_data, extack); if (err < 0) return err; status |= DO_SETLINK_NOTIFY; } return do_setlink(skb, dev, tgt_net, nlmsg_data(nlh), extack, tb, status); } static int rtnl_group_changelink(const struct sk_buff *skb, struct net *net, struct net *tgt_net, int group, struct ifinfomsg *ifm, struct netlink_ext_ack *extack, struct nlattr **tb) { struct net_device *dev, *aux; int err; for_each_netdev_safe(net, dev, aux) { if (dev->group == group) { err = do_setlink(skb, dev, tgt_net, ifm, extack, tb, 0); if (err < 0) return err; } } return 0; } static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm, const struct rtnl_link_ops *ops, struct net *tgt_net, struct net *link_net, struct net *peer_net, const struct nlmsghdr *nlh, struct nlattr **tb, struct nlattr **data, struct netlink_ext_ack *extack) { unsigned char name_assign_type = NET_NAME_USER; struct net *net = sock_net(skb->sk); u32 portid = NETLINK_CB(skb).portid; struct net_device *dev; char ifname[IFNAMSIZ]; int err; if (!ops->alloc && !ops->setup) return -EOPNOTSUPP; if (tb[IFLA_IFNAME]) { nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); } else { snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind); name_assign_type = NET_NAME_ENUM; } dev = rtnl_create_link(link_net ? : tgt_net, ifname, name_assign_type, ops, tb, extack); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; } dev->ifindex = ifm->ifi_index; if (link_net) net = link_net; if (peer_net) net = peer_net; if (ops->newlink) err = ops->newlink(net, dev, tb, data, extack); else err = register_netdevice(dev); if (err < 0) { free_netdev(dev); goto out; } err = rtnl_configure_link(dev, ifm, portid, nlh); if (err < 0) goto out_unregister; if (link_net) { err = dev_change_net_namespace(dev, tgt_net, ifname); if (err < 0) goto out_unregister; } if (tb[IFLA_MASTER]) { err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto out_unregister; } out: return err; out_unregister: if (ops->newlink) { LIST_HEAD(list_kill); ops->dellink(dev, &list_kill); unregister_netdevice_many(&list_kill); } else { unregister_netdevice(dev); } goto out; } static struct net *rtnl_get_peer_net(const struct rtnl_link_ops *ops, struct nlattr *tbp[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_MAX + 1]; int err; if (!data || !data[ops->peer_type]) return rtnl_link_get_net_ifla(tbp); err = rtnl_nla_parse_ifinfomsg(tb, data[ops->peer_type], extack); if (err < 0) return ERR_PTR(err); if (ops->validate) { err = ops->validate(tb, NULL, extack); if (err < 0) return ERR_PTR(err); } return rtnl_link_get_net_ifla(tb); } static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, const struct rtnl_link_ops *ops, struct net *tgt_net, struct net *link_net, struct net *peer_net, struct rtnl_newlink_tbs *tbs, struct nlattr **data, struct netlink_ext_ack *extack) { struct nlattr ** const tb = tbs->tb; struct net *net = sock_net(skb->sk); struct net_device *dev; struct ifinfomsg *ifm; bool link_specified; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) { link_specified = true; dev = __dev_get_by_index(net, ifm->ifi_index); } else if (ifm->ifi_index < 0) { NL_SET_ERR_MSG(extack, "ifindex can't be negative"); return -EINVAL; } else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) { link_specified = true; dev = rtnl_dev_get(net, tb); } else { link_specified = false; dev = NULL; } if (dev) return rtnl_changelink(skb, nlh, ops, dev, tgt_net, tbs, data, extack); if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { /* No dev found and NLM_F_CREATE not set. Requested dev does not exist, * or it's for a group */ if (link_specified || !tb[IFLA_GROUP]) return -ENODEV; return rtnl_group_changelink(skb, net, tgt_net, nla_get_u32(tb[IFLA_GROUP]), ifm, extack, tb); } if (tb[IFLA_MAP] || tb[IFLA_PROTINFO]) return -EOPNOTSUPP; if (!ops) { NL_SET_ERR_MSG(extack, "Unknown device type"); return -EOPNOTSUPP; } return rtnl_newlink_create(skb, ifm, ops, tgt_net, link_net, peer_net, nlh, tb, data, extack); } static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *tgt_net, *link_net = NULL, *peer_net = NULL; struct nlattr **tb, **linkinfo, **data = NULL; struct rtnl_link_ops *ops = NULL; struct rtnl_newlink_tbs *tbs; struct rtnl_nets rtnl_nets; int ops_srcu_index; int ret; tbs = kmalloc(sizeof(*tbs), GFP_KERNEL); if (!tbs) return -ENOMEM; tb = tbs->tb; ret = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy, extack); if (ret < 0) goto free; ret = rtnl_ensure_unique_netns(tb, extack, false); if (ret < 0) goto free; linkinfo = tbs->linkinfo; if (tb[IFLA_LINKINFO]) { ret = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO], ifla_info_policy, NULL); if (ret < 0) goto free; } else { memset(linkinfo, 0, sizeof(tbs->linkinfo)); } if (linkinfo[IFLA_INFO_KIND]) { char kind[MODULE_NAME_LEN]; nla_strscpy(kind, linkinfo[IFLA_INFO_KIND], sizeof(kind)); ops = rtnl_link_ops_get(kind, &ops_srcu_index); #ifdef CONFIG_MODULES if (!ops) { request_module("rtnl-link-%s", kind); ops = rtnl_link_ops_get(kind, &ops_srcu_index); } #endif } rtnl_nets_init(&rtnl_nets); if (ops) { if (ops->maxtype > RTNL_MAX_TYPE) { ret = -EINVAL; goto put_ops; } if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) { ret = nla_parse_nested_deprecated(tbs->attr, ops->maxtype, linkinfo[IFLA_INFO_DATA], ops->policy, extack); if (ret < 0) goto put_ops; data = tbs->attr; } if (ops->validate) { ret = ops->validate(tb, data, extack); if (ret < 0) goto put_ops; } if (ops->peer_type) { peer_net = rtnl_get_peer_net(ops, tb, data, extack); if (IS_ERR(peer_net)) { ret = PTR_ERR(peer_net); goto put_ops; } if (peer_net) rtnl_nets_add(&rtnl_nets, peer_net); } } tgt_net = rtnl_link_get_net_capable(skb, sock_net(skb->sk), tb, CAP_NET_ADMIN); if (IS_ERR(tgt_net)) { ret = PTR_ERR(tgt_net); goto put_net; } rtnl_nets_add(&rtnl_nets, tgt_net); if (tb[IFLA_LINK_NETNSID]) { int id = nla_get_s32(tb[IFLA_LINK_NETNSID]); link_net = get_net_ns_by_id(tgt_net, id); if (!link_net) { NL_SET_ERR_MSG(extack, "Unknown network namespace id"); ret = -EINVAL; goto put_net; } rtnl_nets_add(&rtnl_nets, link_net); if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; goto put_net; } } rtnl_nets_lock(&rtnl_nets); ret = __rtnl_newlink(skb, nlh, ops, tgt_net, link_net, peer_net, tbs, data, extack); rtnl_nets_unlock(&rtnl_nets); put_net: rtnl_nets_destroy(&rtnl_nets); put_ops: if (ops) rtnl_link_ops_put(ops, ops_srcu_index); free: kfree(tbs); return ret; } static int rtnl_valid_getlink_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { struct ifinfomsg *ifm; int i, err; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { NL_SET_ERR_MSG(extack, "Invalid header for get link"); return -EINVAL; } if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change) { NL_SET_ERR_MSG(extack, "Invalid values in header for get link request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err) return err; for (i = 0; i <= IFLA_MAX; i++) { if (!tb[i]) continue; switch (i) { case IFLA_IFNAME: case IFLA_ALT_IFNAME: case IFLA_EXT_MASK: case IFLA_TARGET_NETNSID: break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in get link request"); return -EINVAL; } } return 0; } static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct net *tgt_net = net; struct ifinfomsg *ifm; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; int netnsid = -1; int err; u32 ext_filter_mask = 0; err = rtnl_valid_getlink_req(skb, nlh, tb, extack); if (err < 0) return err; err = rtnl_ensure_unique_netns(tb, extack, true); if (err < 0) return err; if (tb[IFLA_TARGET_NETNSID]) { netnsid = nla_get_s32(tb[IFLA_TARGET_NETNSID]); tgt_net = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, netnsid); if (IS_ERR(tgt_net)) return PTR_ERR(tgt_net); } if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(tgt_net, tb); else goto out; err = -ENODEV; if (dev == NULL) goto out; err = -ENOBUFS; nskb = nlmsg_new_large(if_nlmsg_size(dev, ext_filter_mask)); if (nskb == NULL) goto out; /* Synchronize the carrier state so we don't report a state * that we're not actually going to honour immediately; if * the driver just did a carrier off->on transition, we can * only TX if link watch work has run, but without this we'd * already report carrier on, even if it doesn't work yet. */ linkwatch_sync_dev(dev); err = rtnl_fill_ifinfo(nskb, dev, net, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0, NULL, 0, netnsid, GFP_KERNEL); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); out: if (netnsid >= 0) put_net(tgt_net); return err; } static int rtnl_alt_ifname(int cmd, struct net_device *dev, struct nlattr *attr, bool *changed, struct netlink_ext_ack *extack) { char *alt_ifname; size_t size; int err; err = nla_validate(attr, attr->nla_len, IFLA_MAX, ifla_policy, extack); if (err) return err; if (cmd == RTM_NEWLINKPROP) { size = rtnl_prop_list_size(dev); size += nla_total_size(ALTIFNAMSIZ); if (size >= U16_MAX) { NL_SET_ERR_MSG(extack, "effective property list too long"); return -EINVAL; } } alt_ifname = nla_strdup(attr, GFP_KERNEL_ACCOUNT); if (!alt_ifname) return -ENOMEM; if (cmd == RTM_NEWLINKPROP) { err = netdev_name_node_alt_create(dev, alt_ifname); if (!err) alt_ifname = NULL; } else if (cmd == RTM_DELLINKPROP) { err = netdev_name_node_alt_destroy(dev, alt_ifname); } else { WARN_ON_ONCE(1); err = -EINVAL; } kfree(alt_ifname); if (!err) *changed = true; return err; } static int rtnl_linkprop(int cmd, struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; struct ifinfomsg *ifm; bool changed = false; struct nlattr *attr; int err, rem; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy, extack); if (err) return err; err = rtnl_ensure_unique_netns(tb, extack, true); if (err) return err; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) dev = __dev_get_by_index(net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) dev = rtnl_dev_get(net, tb); else return -EINVAL; if (!dev) return -ENODEV; if (!tb[IFLA_PROP_LIST]) return 0; nla_for_each_nested(attr, tb[IFLA_PROP_LIST], rem) { switch (nla_type(attr)) { case IFLA_ALT_IFNAME: err = rtnl_alt_ifname(cmd, dev, attr, &changed, extack); if (err) return err; break; } } if (changed) netdev_state_change(dev); return 0; } static int rtnl_newlinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { return rtnl_linkprop(RTM_NEWLINKPROP, skb, nlh, extack); } static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack); } static noinline_for_stack u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); size_t min_ifinfo_dump_size = 0; u32 ext_filter_mask = 0; struct net_device *dev; struct nlattr *nla; int hdrlen, rem; /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ hdrlen = nlmsg_len(nlh) < sizeof(struct ifinfomsg) ? sizeof(struct rtgenmsg) : sizeof(struct ifinfomsg); if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return NLMSG_GOODSIZE; nla_for_each_attr_type(nla, IFLA_EXT_MASK, nlmsg_attrdata(nlh, hdrlen), nlmsg_attrlen(nlh, hdrlen), rem) { if (nla_len(nla) == sizeof(u32)) ext_filter_mask = nla_get_u32(nla); } if (!ext_filter_mask) return NLMSG_GOODSIZE; /* * traverse the list of net devices and compute the minimum * buffer size based upon the filter mask. */ rcu_read_lock(); for_each_netdev_rcu(net, dev) { min_ifinfo_dump_size = max(min_ifinfo_dump_size, if_nlmsg_size(dev, ext_filter_mask)); } rcu_read_unlock(); return nlmsg_total_size(min_ifinfo_dump_size); } static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->family; int type = cb->nlh->nlmsg_type - RTM_BASE; int ret = 0; if (s_idx == 0) s_idx = 1; for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { struct rtnl_link __rcu **tab; struct rtnl_link *link; rtnl_dumpit_func dumpit; if (idx < s_idx || idx == PF_PACKET) continue; if (type < 0 || type >= RTM_NR_MSGTYPES) continue; tab = rcu_dereference_rtnl(rtnl_msg_handlers[idx]); if (!tab) continue; link = rcu_dereference_rtnl(tab[type]); if (!link) continue; dumpit = link->dumpit; if (!dumpit) continue; if (idx > s_idx) { memset(&cb->args[0], 0, sizeof(cb->args)); cb->prev_seq = 0; cb->seq = 0; } ret = dumpit(skb, cb); if (ret) break; } cb->family = idx; return skb->len ? : ret; } struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, u32 event, gfp_t flags, int *new_nsid, int new_ifindex, u32 portid, const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; u32 seq = 0; skb = nlmsg_new(if_nlmsg_size(dev, 0), flags); if (skb == NULL) goto errout; if (nlmsg_report(nlh)) seq = nlmsg_seq(nlh); else portid = 0; err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), type, portid, seq, change, 0, 0, event, new_nsid, new_ifindex, -1, flags); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } return skb; errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); return NULL; } void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, u32 portid, const struct nlmsghdr *nlh) { struct net *net = dev_net(dev); rtnl_notify(skb, net, portid, RTNLGRP_LINK, nlh, flags); } static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, gfp_t flags, int *new_nsid, int new_ifindex, u32 portid, const struct nlmsghdr *nlh) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid, new_ifindex, portid, nlh); if (skb) rtmsg_ifinfo_send(skb, dev, flags, portid, nlh); } void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags, u32 portid, const struct nlmsghdr *nlh) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL, 0, portid, nlh); } void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex) { rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, new_nsid, new_ifindex, 0, NULL); } static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, u8 *addr, u16 vid, u32 pid, u32 seq, int type, unsigned int flags, int nlflags, u16 ndm_state) { struct nlmsghdr *nlh; struct ndmsg *ndm; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags); if (!nlh) return -EMSGSIZE; ndm = nlmsg_data(nlh); ndm->ndm_family = AF_BRIDGE; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; ndm->ndm_flags = flags; ndm->ndm_type = 0; ndm->ndm_ifindex = dev->ifindex; ndm->ndm_state = ndm_state; if (nla_put(skb, NDA_LLADDR, dev->addr_len, addr)) goto nla_put_failure; if (vid) if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static inline size_t rtnl_fdb_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(dev->addr_len) + /* NDA_LLADDR */ nla_total_size(sizeof(u16)) + /* NDA_VLAN */ 0; } static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type, u16 ndm_state) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -ENOBUFS; skb = nlmsg_new(rtnl_fdb_nlmsg_size(dev), GFP_ATOMIC); if (!skb) goto errout; err = nlmsg_populate_fdb_fill(skb, dev, addr, vid, 0, 0, type, NTF_SELF, 0, ndm_state); if (err < 0) { kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); return; errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } /* * ndo_dflt_fdb_add - default netdevice operation to add an FDB entry */ int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags) { int err = -EINVAL; /* If aging addresses are supported device will need to * implement its own handler for this. */ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { netdev_info(dev, "default FDB implementation only supports local addresses\n"); return err; } if (tb[NDA_FLAGS_EXT]) { netdev_info(dev, "invalid flags given to default FDB implementation\n"); return err; } if (vid) { netdev_info(dev, "vlans aren't supported yet for dev_uc|mc_add()\n"); return err; } if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) err = dev_uc_add_excl(dev, addr); else if (is_multicast_ether_addr(addr)) err = dev_mc_add_excl(dev, addr); /* Only return duplicate errors if NLM_F_EXCL is set */ if (err == -EEXIST && !(flags & NLM_F_EXCL)) err = 0; return err; } EXPORT_SYMBOL(ndo_dflt_fdb_add); static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid, struct netlink_ext_ack *extack) { u16 vid = 0; if (vlan_attr) { if (nla_len(vlan_attr) != sizeof(u16)) { NL_SET_ERR_MSG(extack, "invalid vlan attribute size"); return -EINVAL; } vid = nla_get_u16(vlan_attr); if (!vid || vid >= VLAN_VID_MASK) { NL_SET_ERR_MSG(extack, "invalid vlan id"); return -EINVAL; } } *p_vid = vid; return 0; } static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; u8 *addr; u16 vid; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); if (err < 0) return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } if (dev->type != ARPHRD_ETHER) { NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops = br_dev->netdev_ops; bool notified = false; err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags, &notified, extack); if (err) goto out; else ndm->ndm_flags &= ~NTF_MASTER; } /* Embedded bridge, macvlan, and any other device support */ if ((ndm->ndm_flags & NTF_SELF)) { bool notified = false; if (dev->netdev_ops->ndo_fdb_add) err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags, &notified, extack); else err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, nlh->nlmsg_flags); if (!err && !notified) { rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } out: return err; } /* * ndo_dflt_fdb_del - default netdevice operation to delete an FDB entry */ int ndo_dflt_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid) { int err = -EINVAL; /* If aging addresses are supported device will need to * implement its own handler for this. */ if (!(ndm->ndm_state & NUD_PERMANENT)) { netdev_info(dev, "default FDB implementation only supports local addresses\n"); return err; } if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) err = dev_uc_del(dev, addr); else if (is_multicast_ether_addr(addr)) err = dev_mc_del(dev, addr); return err; } EXPORT_SYMBOL(ndo_dflt_fdb_del); static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct net *net = sock_net(skb->sk); const struct net_device_ops *ops; struct ndmsg *ndm; struct nlattr *tb[NDA_MAX+1]; struct net_device *dev; __u8 *addr = NULL; int err; u16 vid; if (!netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; if (!del_bulk) { err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } else { /* For bulk delete, the drivers will parse the message with * policy. */ err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL, extack); } if (err < 0) return err; ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!del_bulk) { if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; } if (dev->type != ARPHRD_ETHER) { NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices"); return -EINVAL; } err = -EOPNOTSUPP; /* Support fdb on master device the net/bridge default case */ if ((!ndm->ndm_flags || ndm->ndm_flags & NTF_MASTER) && netif_is_bridge_port(dev)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); bool notified = false; ops = br_dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, &notified, extack); } else { if (ops->ndo_fdb_del_bulk) err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (err) goto out; else ndm->ndm_flags &= ~NTF_MASTER; } /* Embedded bridge, macvlan, and any other device support */ if (ndm->ndm_flags & NTF_SELF) { bool notified = false; ops = dev->netdev_ops; if (!del_bulk) { if (ops->ndo_fdb_del) err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, &notified, extack); else err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); } else { /* in case err was cleared by NTF_MASTER call */ err = -EOPNOTSUPP; if (ops->ndo_fdb_del_bulk) err = ops->ndo_fdb_del_bulk(nlh, dev, extack); } if (!err) { if (!del_bulk && !notified) rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH, ndm->ndm_state); ndm->ndm_flags &= ~NTF_SELF; } } out: return err; } static int nlmsg_populate_fdb(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, int *idx, struct netdev_hw_addr_list *list) { struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct netdev_hw_addr *ha; u32 portid, seq; int err; portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { if (*idx < ctx->fdb_idx) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, portid, seq, RTM_NEWNEIGH, NTF_SELF, NLM_F_MULTI, NUD_PERMANENT); if (err < 0) return err; skip: *idx += 1; } return 0; } /** * ndo_dflt_fdb_dump - default netdevice operation to dump an FDB table. * @skb: socket buffer to store message in * @cb: netlink callback * @dev: netdevice * @filter_dev: ignored * @idx: the number of FDB table entries dumped is added to *@idx * * Default netdevice operation to dump the existing unicast address list. * Returns number of addresses from list put in skb. */ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx) { int err; if (dev->type != ARPHRD_ETHER) return -EINVAL; netif_addr_lock_bh(dev); err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) goto out; err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc); out: netif_addr_unlock_bh(dev); return err; } EXPORT_SYMBOL(ndo_dflt_fdb_dump); static int valid_fdb_dump_strict(const struct nlmsghdr *nlh, int *br_idx, int *brport_idx, struct netlink_ext_ack *extack) { struct nlattr *tb[NDA_MAX + 1]; struct ndmsg *ndm; int err, i; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { NL_SET_ERR_MSG(extack, "Invalid header for fdb dump request"); return -EINVAL; } ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_flags || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for fdb dump request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL, extack); if (err < 0) return err; *brport_idx = ndm->ndm_ifindex; for (i = 0; i <= NDA_MAX; ++i) { if (!tb[i]) continue; switch (i) { case NDA_IFINDEX: if (nla_len(tb[i]) != sizeof(u32)) { NL_SET_ERR_MSG(extack, "Invalid IFINDEX attribute in fdb dump request"); return -EINVAL; } *brport_idx = nla_get_u32(tb[NDA_IFINDEX]); break; case NDA_MASTER: if (nla_len(tb[i]) != sizeof(u32)) { NL_SET_ERR_MSG(extack, "Invalid MASTER attribute in fdb dump request"); return -EINVAL; } *br_idx = nla_get_u32(tb[NDA_MASTER]); break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb dump request"); return -EINVAL; } } return 0; } static int valid_fdb_dump_legacy(const struct nlmsghdr *nlh, int *br_idx, int *brport_idx, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_MAX+1]; int err; /* A hack to preserve kernel<->userspace interface. * Before Linux v4.12 this code accepted ndmsg since iproute2 v3.3.0. * However, ndmsg is shorter than ifinfomsg thus nlmsg_parse() bails. * So, check for ndmsg with an optional u32 attribute (not used here). * Fortunately these sizes don't conflict with the size of ifinfomsg * with an optional attribute. */ if (nlmsg_len(nlh) != sizeof(struct ndmsg) && (nlmsg_len(nlh) != sizeof(struct ndmsg) + nla_attr_size(sizeof(u32)))) { struct ifinfomsg *ifm; err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy, extack); if (err < 0) { return -EINVAL; } else if (err == 0) { if (tb[IFLA_MASTER]) *br_idx = nla_get_u32(tb[IFLA_MASTER]); } ifm = nlmsg_data(nlh); *brport_idx = ifm->ifi_index; } return 0; } static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { const struct net_device_ops *ops = NULL, *cops = NULL; struct ndo_fdb_dump_context *ctx = (void *)cb->ctx; struct net_device *dev, *br_dev = NULL; struct net *net = sock_net(skb->sk); int brport_idx = 0; int br_idx = 0; int fidx = 0; int err; NL_ASSERT_CTX_FITS(struct ndo_fdb_dump_context); if (cb->strict_check) err = valid_fdb_dump_strict(cb->nlh, &br_idx, &brport_idx, cb->extack); else err = valid_fdb_dump_legacy(cb->nlh, &br_idx, &brport_idx, cb->extack); if (err < 0) return err; if (br_idx) { br_dev = __dev_get_by_index(net, br_idx); if (!br_dev) return -ENODEV; ops = br_dev->netdev_ops; } for_each_netdev_dump(net, dev, ctx->ifindex) { if (brport_idx && (dev->ifindex != brport_idx)) continue; if (!br_idx) { /* user did not specify a specific bridge */ if (netif_is_bridge_port(dev)) { br_dev = netdev_master_upper_dev_get(dev); cops = br_dev->netdev_ops; } } else { if (dev != br_dev && !netif_is_bridge_port(dev)) continue; if (br_dev != netdev_master_upper_dev_get(dev) && !netif_is_bridge_master(dev)) continue; cops = ops; } if (netif_is_bridge_port(dev)) { if (cops && cops->ndo_fdb_dump) { err = cops->ndo_fdb_dump(skb, cb, br_dev, dev, &fidx); if (err == -EMSGSIZE) break; } } if (dev->netdev_ops->ndo_fdb_dump) err = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, &fidx); else err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, &fidx); if (err == -EMSGSIZE) break; cops = NULL; /* reset fdb offset to 0 for rest of the interfaces */ ctx->fdb_idx = 0; fidx = 0; } ctx->fdb_idx = fidx; return skb->len; } static int valid_fdb_get_strict(const struct nlmsghdr *nlh, struct nlattr **tb, u8 *ndm_flags, int *br_idx, int *brport_idx, u8 **addr, u16 *vid, struct netlink_ext_ack *extack) { struct ndmsg *ndm; int err, i; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { NL_SET_ERR_MSG(extack, "Invalid header for fdb get request"); return -EINVAL; } ndm = nlmsg_data(nlh); if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || ndm->ndm_type) { NL_SET_ERR_MSG(extack, "Invalid values in header for fdb get request"); return -EINVAL; } if (ndm->ndm_flags & ~(NTF_MASTER | NTF_SELF)) { NL_SET_ERR_MSG(extack, "Invalid flags in header for fdb get request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, NDA_MAX, nda_policy, extack); if (err < 0) return err; *ndm_flags = ndm->ndm_flags; *brport_idx = ndm->ndm_ifindex; for (i = 0; i <= NDA_MAX; ++i) { if (!tb[i]) continue; switch (i) { case NDA_MASTER: *br_idx = nla_get_u32(tb[i]); break; case NDA_LLADDR: if (nla_len(tb[i]) != ETH_ALEN) { NL_SET_ERR_MSG(extack, "Invalid address in fdb get request"); return -EINVAL; } *addr = nla_data(tb[i]); break; case NDA_VLAN: err = fdb_vid_parse(tb[i], vid, extack); if (err) return err; break; case NDA_VNI: break; default: NL_SET_ERR_MSG(extack, "Unsupported attribute in fdb get request"); return -EINVAL; } } return 0; } static int rtnl_fdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net_device *dev = NULL, *br_dev = NULL; const struct net_device_ops *ops = NULL; struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NDA_MAX + 1]; struct sk_buff *skb; int brport_idx = 0; u8 ndm_flags = 0; int br_idx = 0; u8 *addr = NULL; u16 vid = 0; int err; err = valid_fdb_get_strict(nlh, tb, &ndm_flags, &br_idx, &brport_idx, &addr, &vid, extack); if (err < 0) return err; if (!addr) { NL_SET_ERR_MSG(extack, "Missing lookup address for fdb get request"); return -EINVAL; } if (brport_idx) { dev = __dev_get_by_index(net, brport_idx); if (!dev) { NL_SET_ERR_MSG(extack, "Unknown device ifindex"); return -ENODEV; } } if (br_idx) { if (dev) { NL_SET_ERR_MSG(extack, "Master and device are mutually exclusive"); return -EINVAL; } br_dev = __dev_get_by_index(net, br_idx); if (!br_dev) { NL_SET_ERR_MSG(extack, "Invalid master ifindex"); return -EINVAL; } ops = br_dev->netdev_ops; } if (dev) { if (!ndm_flags || (ndm_flags & NTF_MASTER)) { if (!netif_is_bridge_port(dev)) { NL_SET_ERR_MSG(extack, "Device is not a bridge port"); return -EINVAL; } br_dev = netdev_master_upper_dev_get(dev); if (!br_dev) { NL_SET_ERR_MSG(extack, "Master of device not found"); return -EINVAL; } ops = br_dev->netdev_ops; } else { if (!(ndm_flags & NTF_SELF)) { NL_SET_ERR_MSG(extack, "Missing NTF_SELF"); return -EINVAL; } ops = dev->netdev_ops; } } if (!br_dev && !dev) { NL_SET_ERR_MSG(extack, "No device specified"); return -ENODEV; } if (!ops || !ops->ndo_fdb_get) { NL_SET_ERR_MSG(extack, "Fdb get operation not supported by device"); return -EOPNOTSUPP; } skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (br_dev) dev = br_dev; err = ops->ndo_fdb_get(skb, tb, dev, addr, vid, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, extack); if (err) goto out; return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); out: kfree_skb(skb); return err; } static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask, unsigned int attrnum, unsigned int flag) { if (mask & flag) return nla_put_u8(skb, attrnum, !!(flags & flag)); return 0; } int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, u32 flags, u32 mask, int nlflags, u32 filter_mask, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)) { struct nlmsghdr *nlh; struct ifinfomsg *ifm; struct nlattr *br_afspec; struct nlattr *protinfo; u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_device *br_dev = netdev_master_upper_dev_get(dev); int err = 0; nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), nlflags); if (nlh == NULL) return -EMSGSIZE; ifm = nlmsg_data(nlh); ifm->ifi_family = AF_BRIDGE; ifm->__ifi_pad = 0; ifm->ifi_type = dev->type; ifm->ifi_index = dev->ifindex; ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = 0; if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || nla_put_u8(skb, IFLA_OPERSTATE, operstate) || (br_dev && nla_put_u32(skb, IFLA_MASTER, br_dev->ifindex)) || (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || (dev->ifindex != dev_get_iflink(dev) && nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; br_afspec = nla_nest_start_noflag(skb, IFLA_AF_SPEC); if (!br_afspec) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF)) { nla_nest_cancel(skb, br_afspec); goto nla_put_failure; } if (mode != BRIDGE_MODE_UNDEF) { if (nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) { nla_nest_cancel(skb, br_afspec); goto nla_put_failure; } } if (vlan_fill) { err = vlan_fill(skb, dev, filter_mask); if (err) { nla_nest_cancel(skb, br_afspec); goto nla_put_failure; } } nla_nest_end(skb, br_afspec); protinfo = nla_nest_start(skb, IFLA_PROTINFO); if (!protinfo) goto nla_put_failure; if (brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_GUARD, BR_BPDU_GUARD) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_LEARNING, BR_LEARNING) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_LEARNING_SYNC, BR_LEARNING_SYNC) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_PROXYARP, BR_PROXYARP) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD) || brport_nla_put_flag(skb, flags, mask, IFLA_BRPORT_BCAST_FLOOD, BR_BCAST_FLOOD)) { nla_nest_cancel(skb, protinfo); goto nla_put_failure; } nla_nest_end(skb, protinfo); nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return err ? err : -EMSGSIZE; } EXPORT_SYMBOL_GPL(ndo_dflt_bridge_getlink); static int valid_bridge_getlink_req(const struct nlmsghdr *nlh, bool strict_check, u32 *filter_mask, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_MAX+1]; int err, i; if (strict_check) { struct ifinfomsg *ifm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) { NL_SET_ERR_MSG(extack, "Invalid header for bridge link dump"); return -EINVAL; } ifm = nlmsg_data(nlh); if (ifm->__ifi_pad || ifm->ifi_type || ifm->ifi_flags || ifm->ifi_change || ifm->ifi_index) { NL_SET_ERR_MSG(extack, "Invalid values in header for bridge link dump request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy, extack); } else { err = nlmsg_parse_deprecated(nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy, extack); } if (err < 0) return err; /* new attributes should only be added with strict checking */ for (i = 0; i <= IFLA_MAX; ++i) { if (!tb[i]) continue; switch (i) { case IFLA_EXT_MASK: *filter_mask = nla_get_u32(tb[i]); break; default: if (strict_check) { NL_SET_ERR_MSG(extack, "Unsupported attribute in bridge link dump request"); return -EINVAL; } } } return 0; } static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) { const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); struct net_device *dev; int idx = 0; u32 portid = NETLINK_CB(cb->skb).portid; u32 seq = nlh->nlmsg_seq; u32 filter_mask = 0; int err; err = valid_bridge_getlink_req(nlh, cb->strict_check, &filter_mask, cb->extack); if (err < 0 && cb->strict_check) return err; rcu_read_lock(); for_each_netdev_rcu(net, dev) { const struct net_device_ops *ops = dev->netdev_ops; struct net_device *br_dev = netdev_master_upper_dev_get(dev); if (br_dev && br_dev->netdev_ops->ndo_bridge_getlink) { if (idx >= cb->args[0]) { err = br_dev->netdev_ops->ndo_bridge_getlink( skb, portid, seq, dev, filter_mask, NLM_F_MULTI); if (err < 0 && err != -EOPNOTSUPP) { if (likely(skb->len)) break; goto out_err; } } idx++; } if (ops->ndo_bridge_getlink) { if (idx >= cb->args[0]) { err = ops->ndo_bridge_getlink(skb, portid, seq, dev, filter_mask, NLM_F_MULTI); if (err < 0 && err != -EOPNOTSUPP) { if (likely(skb->len)) break; goto out_err; } } idx++; } } err = skb->len; out_err: rcu_read_unlock(); cb->args[0] = idx; return err; } static inline size_t bridge_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ } static int rtnl_bridge_notify(struct net_device *dev) { struct net *net = dev_net(dev); struct sk_buff *skb; int err = -EOPNOTSUPP; if (!dev->netdev_ops->ndo_bridge_getlink) return 0; skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); if (!skb) { err = -ENOMEM; goto errout; } err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev, 0, 0); if (err < 0) goto errout; /* Notification info is only filled for bridge ports, not the bridge * device itself. Therefore, a zero notification length is valid and * should not result in an error. */ if (!skb->len) goto errout; rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; errout: WARN_ON(err == -EMSGSIZE); kfree_skb(skb); if (err) rtnl_set_sk_err(net, RTNLGRP_LINK, err); return err; } static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; struct nlattr *br_spec, *attr, *br_flags_attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_family != AF_BRIDGE) return -EPFNOSUPPORT; dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested(attr, br_spec, rem) { if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !br_flags_attr) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; br_flags_attr = attr; flags = nla_get_u16(attr); } if (nla_type(attr) == IFLA_BRIDGE_MODE) { if (nla_len(attr) < sizeof(u16)) return -EINVAL; } } } if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); if (!br_dev || !br_dev->netdev_ops->ndo_bridge_setlink) { err = -EOPNOTSUPP; goto out; } err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags, extack); if (err) goto out; flags &= ~BRIDGE_FLAGS_MASTER; } if ((flags & BRIDGE_FLAGS_SELF)) { if (!dev->netdev_ops->ndo_bridge_setlink) err = -EOPNOTSUPP; else err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags, extack); if (!err) { flags &= ~BRIDGE_FLAGS_SELF; /* Generate event to notify upper layer of bridge * change */ err = rtnl_bridge_notify(dev); } } if (br_flags_attr) memcpy(nla_data(br_flags_attr), &flags, sizeof(flags)); out: return err; } static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; struct nlattr *br_spec, *attr = NULL; int rem, err = -EOPNOTSUPP; u16 flags = 0; bool have_flags = false; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_family != AF_BRIDGE) return -EPFNOSUPPORT; dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (br_spec) { nla_for_each_nested_type(attr, IFLA_BRIDGE_FLAGS, br_spec, rem) { if (nla_len(attr) < sizeof(flags)) return -EINVAL; have_flags = true; flags = nla_get_u16(attr); break; } } if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { struct net_device *br_dev = netdev_master_upper_dev_get(dev); if (!br_dev || !br_dev->netdev_ops->ndo_bridge_dellink) { err = -EOPNOTSUPP; goto out; } err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags); if (err) goto out; flags &= ~BRIDGE_FLAGS_MASTER; } if ((flags & BRIDGE_FLAGS_SELF)) { if (!dev->netdev_ops->ndo_bridge_dellink) err = -EOPNOTSUPP; else err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags); if (!err) { flags &= ~BRIDGE_FLAGS_SELF; /* Generate event to notify upper layer of bridge * change */ err = rtnl_bridge_notify(dev); } } if (have_flags) memcpy(nla_data(attr), &flags, sizeof(flags)); out: return err; } static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) { return (mask & IFLA_STATS_FILTER_BIT(attrid)) && (!idxattr || idxattr == attrid); } static bool rtnl_offload_xstats_have_ndo(const struct net_device *dev, int attr_id) { return dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && dev->netdev_ops->ndo_get_offload_stats && dev->netdev_ops->ndo_has_offload_stats(dev, attr_id); } static unsigned int rtnl_offload_xstats_get_size_ndo(const struct net_device *dev, int attr_id) { return rtnl_offload_xstats_have_ndo(dev, attr_id) ? sizeof(struct rtnl_link_stats64) : 0; } static int rtnl_offload_xstats_fill_ndo(struct net_device *dev, int attr_id, struct sk_buff *skb) { unsigned int size = rtnl_offload_xstats_get_size_ndo(dev, attr_id); struct nlattr *attr = NULL; void *attr_data; int err; if (!size) return -ENODATA; attr = nla_reserve_64bit(skb, attr_id, size, IFLA_OFFLOAD_XSTATS_UNSPEC); if (!attr) return -EMSGSIZE; attr_data = nla_data(attr); memset(attr_data, 0, size); err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, attr_data); if (err) return err; return 0; } static unsigned int rtnl_offload_xstats_get_size_stats(const struct net_device *dev, enum netdev_offload_xstats_type type) { bool enabled = netdev_offload_xstats_enabled(dev, type); return enabled ? sizeof(struct rtnl_hw_stats64) : 0; } struct rtnl_offload_xstats_request_used { bool request; bool used; }; static int rtnl_offload_xstats_get_stats(struct net_device *dev, enum netdev_offload_xstats_type type, struct rtnl_offload_xstats_request_used *ru, struct rtnl_hw_stats64 *stats, struct netlink_ext_ack *extack) { bool request; bool used; int err; request = netdev_offload_xstats_enabled(dev, type); if (!request) { used = false; goto out; } err = netdev_offload_xstats_get(dev, type, stats, &used, extack); if (err) return err; out: if (ru) { ru->request = request; ru->used = used; } return 0; } static int rtnl_offload_xstats_fill_hw_s_info_one(struct sk_buff *skb, int attr_id, struct rtnl_offload_xstats_request_used *ru) { struct nlattr *nest; nest = nla_nest_start(skb, attr_id); if (!nest) return -EMSGSIZE; if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST, ru->request)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED, ru->used)) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int rtnl_offload_xstats_fill_hw_s_info(struct sk_buff *skb, struct net_device *dev, struct netlink_ext_ack *extack) { enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; struct rtnl_offload_xstats_request_used ru_l3; struct nlattr *nest; int err; err = rtnl_offload_xstats_get_stats(dev, t_l3, &ru_l3, NULL, extack); if (err) return err; nest = nla_nest_start(skb, IFLA_OFFLOAD_XSTATS_HW_S_INFO); if (!nest) return -EMSGSIZE; if (rtnl_offload_xstats_fill_hw_s_info_one(skb, IFLA_OFFLOAD_XSTATS_L3_STATS, &ru_l3)) goto nla_put_failure; nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int rtnl_offload_xstats_fill(struct sk_buff *skb, struct net_device *dev, int *prividx, u32 off_filter_mask, struct netlink_ext_ack *extack) { enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; int attr_id_hw_s_info = IFLA_OFFLOAD_XSTATS_HW_S_INFO; int attr_id_l3_stats = IFLA_OFFLOAD_XSTATS_L3_STATS; int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT; bool have_data = false; int err; if (*prividx <= attr_id_cpu_hit && (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_cpu_hit))) { err = rtnl_offload_xstats_fill_ndo(dev, attr_id_cpu_hit, skb); if (!err) { have_data = true; } else if (err != -ENODATA) { *prividx = attr_id_cpu_hit; return err; } } if (*prividx <= attr_id_hw_s_info && (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_hw_s_info))) { *prividx = attr_id_hw_s_info; err = rtnl_offload_xstats_fill_hw_s_info(skb, dev, extack); if (err) return err; have_data = true; *prividx = 0; } if (*prividx <= attr_id_l3_stats && (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_l3_stats))) { unsigned int size_l3; struct nlattr *attr; *prividx = attr_id_l3_stats; size_l3 = rtnl_offload_xstats_get_size_stats(dev, t_l3); if (!size_l3) goto skip_l3_stats; attr = nla_reserve_64bit(skb, attr_id_l3_stats, size_l3, IFLA_OFFLOAD_XSTATS_UNSPEC); if (!attr) return -EMSGSIZE; err = rtnl_offload_xstats_get_stats(dev, t_l3, NULL, nla_data(attr), extack); if (err) return err; have_data = true; skip_l3_stats: *prividx = 0; } if (!have_data) return -ENODATA; *prividx = 0; return 0; } static unsigned int rtnl_offload_xstats_get_size_hw_s_info_one(const struct net_device *dev, enum netdev_offload_xstats_type type) { return nla_total_size(0) + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_REQUEST */ nla_total_size(sizeof(u8)) + /* IFLA_OFFLOAD_XSTATS_HW_S_INFO_USED */ nla_total_size(sizeof(u8)) + 0; } static unsigned int rtnl_offload_xstats_get_size_hw_s_info(const struct net_device *dev) { enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; return nla_total_size(0) + /* IFLA_OFFLOAD_XSTATS_L3_STATS */ rtnl_offload_xstats_get_size_hw_s_info_one(dev, t_l3) + 0; } static int rtnl_offload_xstats_get_size(const struct net_device *dev, u32 off_filter_mask) { enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; int attr_id_cpu_hit = IFLA_OFFLOAD_XSTATS_CPU_HIT; int nla_size = 0; int size; if (off_filter_mask & IFLA_STATS_FILTER_BIT(attr_id_cpu_hit)) { size = rtnl_offload_xstats_get_size_ndo(dev, attr_id_cpu_hit); nla_size += nla_total_size_64bit(size); } if (off_filter_mask & IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO)) nla_size += rtnl_offload_xstats_get_size_hw_s_info(dev); if (off_filter_mask & IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_L3_STATS)) { size = rtnl_offload_xstats_get_size_stats(dev, t_l3); nla_size += nla_total_size_64bit(size); } if (nla_size != 0) nla_size += nla_total_size(0); return nla_size; } struct rtnl_stats_dump_filters { /* mask[0] filters outer attributes. Then individual nests have their * filtering mask at the index of the nested attribute. */ u32 mask[IFLA_STATS_MAX + 1]; }; static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, const struct rtnl_stats_dump_filters *filters, int *idxattr, int *prividx, struct netlink_ext_ack *extack) { unsigned int filter_mask = filters->mask[0]; struct if_stats_msg *ifsm; struct nlmsghdr *nlh; struct nlattr *attr; int s_prividx = *prividx; int err; ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags); if (!nlh) return -EMSGSIZE; ifsm = nlmsg_data(nlh); ifsm->family = PF_UNSPEC; ifsm->pad1 = 0; ifsm->pad2 = 0; ifsm->ifindex = dev->ifindex; ifsm->filter_mask = filter_mask; if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, *idxattr)) { struct rtnl_link_stats64 *sp; attr = nla_reserve_64bit(skb, IFLA_STATS_LINK_64, sizeof(struct rtnl_link_stats64), IFLA_STATS_UNSPEC); if (!attr) { err = -EMSGSIZE; goto nla_put_failure; } sp = nla_data(attr); dev_get_stats(dev, sp); } if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, *idxattr)) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS; attr = nla_nest_start_noflag(skb, IFLA_STATS_LINK_XSTATS); if (!attr) { err = -EMSGSIZE; goto nla_put_failure; } err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); if (err) goto nla_put_failure; *idxattr = 0; } } if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, *idxattr)) { const struct rtnl_link_ops *ops = NULL; const struct net_device *master; master = netdev_master_upper_dev_get(dev); if (master) ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; attr = nla_nest_start_noflag(skb, IFLA_STATS_LINK_XSTATS_SLAVE); if (!attr) { err = -EMSGSIZE; goto nla_put_failure; } err = ops->fill_linkxstats(skb, dev, prividx, *idxattr); nla_nest_end(skb, attr); if (err) goto nla_put_failure; *idxattr = 0; } } if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, *idxattr)) { u32 off_filter_mask; off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS]; *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; attr = nla_nest_start_noflag(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); if (!attr) { err = -EMSGSIZE; goto nla_put_failure; } err = rtnl_offload_xstats_fill(skb, dev, prividx, off_filter_mask, extack); if (err == -ENODATA) nla_nest_cancel(skb, attr); else nla_nest_end(skb, attr); if (err && err != -ENODATA) goto nla_put_failure; *idxattr = 0; } if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, *idxattr)) { struct rtnl_af_ops *af_ops; *idxattr = IFLA_STATS_AF_SPEC; attr = nla_nest_start_noflag(skb, IFLA_STATS_AF_SPEC); if (!attr) { err = -EMSGSIZE; goto nla_put_failure; } rcu_read_lock(); list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->fill_stats_af) { struct nlattr *af; af = nla_nest_start_noflag(skb, af_ops->family); if (!af) { rcu_read_unlock(); err = -EMSGSIZE; goto nla_put_failure; } err = af_ops->fill_stats_af(skb, dev); if (err == -ENODATA) { nla_nest_cancel(skb, af); } else if (err < 0) { rcu_read_unlock(); goto nla_put_failure; } nla_nest_end(skb, af); } } rcu_read_unlock(); nla_nest_end(skb, attr); *idxattr = 0; } nlmsg_end(skb, nlh); return 0; nla_put_failure: /* not a multi message or no progress mean a real error */ if (!(flags & NLM_F_MULTI) || s_prividx == *prividx) nlmsg_cancel(skb, nlh); else nlmsg_end(skb, nlh); return err; } static size_t if_nlmsg_stats_size(const struct net_device *dev, const struct rtnl_stats_dump_filters *filters) { size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg)); unsigned int filter_mask = filters->mask[0]; if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS, 0)) { const struct rtnl_link_ops *ops = dev->rtnl_link_ops; int attr = IFLA_STATS_LINK_XSTATS; if (ops && ops->get_linkxstats_size) { size += nla_total_size(ops->get_linkxstats_size(dev, attr)); /* for IFLA_STATS_LINK_XSTATS */ size += nla_total_size(0); } } if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_XSTATS_SLAVE, 0)) { struct net_device *_dev = (struct net_device *)dev; const struct rtnl_link_ops *ops = NULL; const struct net_device *master; /* netdev_master_upper_dev_get can't take const */ master = netdev_master_upper_dev_get(_dev); if (master) ops = master->rtnl_link_ops; if (ops && ops->get_linkxstats_size) { int attr = IFLA_STATS_LINK_XSTATS_SLAVE; size += nla_total_size(ops->get_linkxstats_size(dev, attr)); /* for IFLA_STATS_LINK_XSTATS_SLAVE */ size += nla_total_size(0); } } if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) { u32 off_filter_mask; off_filter_mask = filters->mask[IFLA_STATS_LINK_OFFLOAD_XSTATS]; size += rtnl_offload_xstats_get_size(dev, off_filter_mask); } if (stats_attr_valid(filter_mask, IFLA_STATS_AF_SPEC, 0)) { struct rtnl_af_ops *af_ops; /* for IFLA_STATS_AF_SPEC */ size += nla_total_size(0); rcu_read_lock(); list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_stats_af_size) { size += nla_total_size( af_ops->get_stats_af_size(dev)); /* for AF_* */ size += nla_total_size(0); } } rcu_read_unlock(); } return size; } #define RTNL_STATS_OFFLOAD_XSTATS_VALID ((1 << __IFLA_OFFLOAD_XSTATS_MAX) - 1) static const struct nla_policy rtnl_stats_get_policy_filters[IFLA_STATS_MAX + 1] = { [IFLA_STATS_LINK_OFFLOAD_XSTATS] = NLA_POLICY_MASK(NLA_U32, RTNL_STATS_OFFLOAD_XSTATS_VALID), }; static const struct nla_policy rtnl_stats_get_policy[IFLA_STATS_GETSET_MAX + 1] = { [IFLA_STATS_GET_FILTERS] = NLA_POLICY_NESTED(rtnl_stats_get_policy_filters), }; static const struct nla_policy ifla_stats_set_policy[IFLA_STATS_GETSET_MAX + 1] = { [IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS] = NLA_POLICY_MAX(NLA_U8, 1), }; static int rtnl_stats_get_parse_filters(struct nlattr *ifla_filters, struct rtnl_stats_dump_filters *filters, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_STATS_MAX + 1]; int err; int at; err = nla_parse_nested(tb, IFLA_STATS_MAX, ifla_filters, rtnl_stats_get_policy_filters, extack); if (err < 0) return err; for (at = 1; at <= IFLA_STATS_MAX; at++) { if (tb[at]) { if (!(filters->mask[0] & IFLA_STATS_FILTER_BIT(at))) { NL_SET_ERR_MSG(extack, "Filtered attribute not enabled in filter_mask"); return -EINVAL; } filters->mask[at] = nla_get_u32(tb[at]); } } return 0; } static int rtnl_stats_get_parse(const struct nlmsghdr *nlh, u32 filter_mask, struct rtnl_stats_dump_filters *filters, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1]; int err; int i; filters->mask[0] = filter_mask; for (i = 1; i < ARRAY_SIZE(filters->mask); i++) filters->mask[i] = -1U; err = nlmsg_parse(nlh, sizeof(struct if_stats_msg), tb, IFLA_STATS_GETSET_MAX, rtnl_stats_get_policy, extack); if (err < 0) return err; if (tb[IFLA_STATS_GET_FILTERS]) { err = rtnl_stats_get_parse_filters(tb[IFLA_STATS_GET_FILTERS], filters, extack); if (err) return err; } return 0; } static int rtnl_valid_stats_req(const struct nlmsghdr *nlh, bool strict_check, bool is_dump, struct netlink_ext_ack *extack) { struct if_stats_msg *ifsm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifsm))) { NL_SET_ERR_MSG(extack, "Invalid header for stats dump"); return -EINVAL; } if (!strict_check) return 0; ifsm = nlmsg_data(nlh); /* only requests using strict checks can pass data to influence * the dump. The legacy exception is filter_mask. */ if (ifsm->pad1 || ifsm->pad2 || (is_dump && ifsm->ifindex)) { NL_SET_ERR_MSG(extack, "Invalid values in header for stats dump request"); return -EINVAL; } if (ifsm->filter_mask >= IFLA_STATS_FILTER_BIT(IFLA_STATS_MAX + 1)) { NL_SET_ERR_MSG(extack, "Invalid stats requested through filter mask"); return -EINVAL; } return 0; } static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); struct net_device *dev = NULL; int idxattr = 0, prividx = 0; struct if_stats_msg *ifsm; struct sk_buff *nskb; int err; err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), false, extack); if (err) return err; ifsm = nlmsg_data(nlh); if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); else return -EINVAL; if (!dev) return -ENODEV; if (!ifsm->filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats get"); return -EINVAL; } err = rtnl_stats_get_parse(nlh, ifsm->filter_mask, &filters, extack); if (err) return err; nskb = nlmsg_new(if_nlmsg_stats_size(dev, &filters), GFP_KERNEL); if (!nskb) return -ENOBUFS; err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, &filters, &idxattr, &prividx, extack); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else { err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); } return err; } static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_ext_ack *extack = cb->extack; struct rtnl_stats_dump_filters filters; struct net *net = sock_net(skb->sk); unsigned int flags = NLM_F_MULTI; struct if_stats_msg *ifsm; struct { unsigned long ifindex; int idxattr; int prividx; } *ctx = (void *)cb->ctx; struct net_device *dev; int err; cb->seq = net->dev_base_seq; err = rtnl_valid_stats_req(cb->nlh, cb->strict_check, true, extack); if (err) return err; ifsm = nlmsg_data(cb->nlh); if (!ifsm->filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be set for stats dump"); return -EINVAL; } err = rtnl_stats_get_parse(cb->nlh, ifsm->filter_mask, &filters, extack); if (err) return err; for_each_netdev_dump(net, dev, ctx->ifindex) { err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, &filters, &ctx->idxattr, &ctx->prividx, extack); /* If we ran out of room on the first message, * we're in trouble. */ WARN_ON((err == -EMSGSIZE) && (skb->len == 0)); if (err < 0) break; ctx->prividx = 0; ctx->idxattr = 0; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); } return err; } void rtnl_offload_xstats_notify(struct net_device *dev) { struct rtnl_stats_dump_filters response_filters = {}; struct net *net = dev_net(dev); int idxattr = 0, prividx = 0; struct sk_buff *skb; int err = -ENOBUFS; ASSERT_RTNL(); response_filters.mask[0] |= IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS); response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |= IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO); skb = nlmsg_new(if_nlmsg_stats_size(dev, &response_filters), GFP_KERNEL); if (!skb) goto errout; err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS, 0, 0, 0, 0, &response_filters, &idxattr, &prividx, NULL); if (err < 0) { kfree_skb(skb); goto errout; } rtnl_notify(skb, net, 0, RTNLGRP_STATS, NULL, GFP_KERNEL); return; errout: rtnl_set_sk_err(net, RTNLGRP_STATS, err); } EXPORT_SYMBOL(rtnl_offload_xstats_notify); static int rtnl_stats_set(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { enum netdev_offload_xstats_type t_l3 = NETDEV_OFFLOAD_XSTATS_TYPE_L3; struct rtnl_stats_dump_filters response_filters = {}; struct nlattr *tb[IFLA_STATS_GETSET_MAX + 1]; struct net *net = sock_net(skb->sk); struct net_device *dev = NULL; struct if_stats_msg *ifsm; bool notify = false; int err; err = rtnl_valid_stats_req(nlh, netlink_strict_get_check(skb), false, extack); if (err) return err; ifsm = nlmsg_data(nlh); if (ifsm->family != AF_UNSPEC) { NL_SET_ERR_MSG(extack, "Address family should be AF_UNSPEC"); return -EINVAL; } if (ifsm->ifindex > 0) dev = __dev_get_by_index(net, ifsm->ifindex); else return -EINVAL; if (!dev) return -ENODEV; if (ifsm->filter_mask) { NL_SET_ERR_MSG(extack, "Filter mask must be 0 for stats set"); return -EINVAL; } err = nlmsg_parse(nlh, sizeof(*ifsm), tb, IFLA_STATS_GETSET_MAX, ifla_stats_set_policy, extack); if (err < 0) return err; if (tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]) { u8 req = nla_get_u8(tb[IFLA_STATS_SET_OFFLOAD_XSTATS_L3_STATS]); if (req) err = netdev_offload_xstats_enable(dev, t_l3, extack); else err = netdev_offload_xstats_disable(dev, t_l3); if (!err) notify = true; else if (err != -EALREADY) return err; response_filters.mask[0] |= IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_OFFLOAD_XSTATS); response_filters.mask[IFLA_STATS_LINK_OFFLOAD_XSTATS] |= IFLA_STATS_FILTER_BIT(IFLA_OFFLOAD_XSTATS_HW_S_INFO); } if (notify) rtnl_offload_xstats_notify(dev); return 0; } static int rtnl_mdb_valid_dump_req(const struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct br_port_msg *bpm; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*bpm))) { NL_SET_ERR_MSG(extack, "Invalid header for mdb dump request"); return -EINVAL; } bpm = nlmsg_data(nlh); if (bpm->ifindex) { NL_SET_ERR_MSG(extack, "Filtering by device index is not supported for mdb dump request"); return -EINVAL; } if (nlmsg_attrlen(nlh, sizeof(*bpm))) { NL_SET_ERR_MSG(extack, "Invalid data after header in mdb dump request"); return -EINVAL; } return 0; } struct rtnl_mdb_dump_ctx { long idx; }; static int rtnl_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct rtnl_mdb_dump_ctx *ctx = (void *)cb->ctx; struct net *net = sock_net(skb->sk); struct net_device *dev; int idx, s_idx; int err; NL_ASSERT_CTX_FITS(struct rtnl_mdb_dump_ctx); if (cb->strict_check) { err = rtnl_mdb_valid_dump_req(cb->nlh, cb->extack); if (err) return err; } s_idx = ctx->idx; idx = 0; for_each_netdev(net, dev) { if (idx < s_idx) goto skip; if (!dev->netdev_ops->ndo_mdb_dump) goto skip; err = dev->netdev_ops->ndo_mdb_dump(dev, skb, cb); if (err == -EMSGSIZE) goto out; /* Moving on to next device, reset markers and sequence * counters since they are all maintained per-device. */ memset(cb->ctx, 0, sizeof(cb->ctx)); cb->prev_seq = 0; cb->seq = 0; skip: idx++; } out: ctx->idx = idx; return skb->len; } static int rtnl_validate_mdb_entry_get(const struct nlattr *attr, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(attr); if (nla_len(attr) != sizeof(struct br_mdb_entry)) { NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); return -EINVAL; } if (entry->ifindex) { NL_SET_ERR_MSG(extack, "Entry ifindex cannot be specified"); return -EINVAL; } if (entry->state) { NL_SET_ERR_MSG(extack, "Entry state cannot be specified"); return -EINVAL; } if (entry->flags) { NL_SET_ERR_MSG(extack, "Entry flags cannot be specified"); return -EINVAL; } if (entry->vid >= VLAN_VID_MASK) { NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); return -EINVAL; } if (entry->addr.proto != htons(ETH_P_IP) && entry->addr.proto != htons(ETH_P_IPV6) && entry->addr.proto != 0) { NL_SET_ERR_MSG(extack, "Unknown entry protocol"); return -EINVAL; } return 0; } static const struct nla_policy mdba_get_policy[MDBA_GET_ENTRY_MAX + 1] = { [MDBA_GET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, rtnl_validate_mdb_entry_get, sizeof(struct br_mdb_entry)), [MDBA_GET_ENTRY_ATTRS] = { .type = NLA_NESTED }, }; static int rtnl_mdb_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[MDBA_GET_ENTRY_MAX + 1]; struct net *net = sock_net(in_skb->sk); struct br_port_msg *bpm; struct net_device *dev; int err; err = nlmsg_parse(nlh, sizeof(struct br_port_msg), tb, MDBA_GET_ENTRY_MAX, mdba_get_policy, extack); if (err) return err; bpm = nlmsg_data(nlh); if (!bpm->ifindex) { NL_SET_ERR_MSG(extack, "Invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, bpm->ifindex); if (!dev) { NL_SET_ERR_MSG(extack, "Device doesn't exist"); return -ENODEV; } if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_GET_ENTRY)) { NL_SET_ERR_MSG(extack, "Missing MDBA_GET_ENTRY attribute"); return -EINVAL; } if (!dev->netdev_ops->ndo_mdb_get) { NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); return -EOPNOTSUPP; } return dev->netdev_ops->ndo_mdb_get(dev, tb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, extack); } static int rtnl_validate_mdb_entry(const struct nlattr *attr, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(attr); if (nla_len(attr) != sizeof(struct br_mdb_entry)) { NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); return -EINVAL; } if (entry->ifindex == 0) { NL_SET_ERR_MSG(extack, "Zero entry ifindex is not allowed"); return -EINVAL; } if (entry->addr.proto == htons(ETH_P_IP)) { if (!ipv4_is_multicast(entry->addr.u.ip4) && !ipv4_is_zeronet(entry->addr.u.ip4)) { NL_SET_ERR_MSG(extack, "IPv4 entry group address is not multicast or 0.0.0.0"); return -EINVAL; } if (ipv4_is_local_multicast(entry->addr.u.ip4)) { NL_SET_ERR_MSG(extack, "IPv4 entry group address is local multicast"); return -EINVAL; } #if IS_ENABLED(CONFIG_IPV6) } else if (entry->addr.proto == htons(ETH_P_IPV6)) { if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) { NL_SET_ERR_MSG(extack, "IPv6 entry group address is link-local all nodes"); return -EINVAL; } #endif } else if (entry->addr.proto == 0) { /* L2 mdb */ if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) { NL_SET_ERR_MSG(extack, "L2 entry group is not multicast"); return -EINVAL; } } else { NL_SET_ERR_MSG(extack, "Unknown entry protocol"); return -EINVAL; } if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { NL_SET_ERR_MSG(extack, "Unknown entry state"); return -EINVAL; } if (entry->vid >= VLAN_VID_MASK) { NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); return -EINVAL; } return 0; } static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = { [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 }, [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, rtnl_validate_mdb_entry, sizeof(struct br_mdb_entry)), [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, }; static int rtnl_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; struct net *net = sock_net(skb->sk); struct br_port_msg *bpm; struct net_device *dev; int err; err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, mdba_policy, extack); if (err) return err; bpm = nlmsg_data(nlh); if (!bpm->ifindex) { NL_SET_ERR_MSG(extack, "Invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, bpm->ifindex); if (!dev) { NL_SET_ERR_MSG(extack, "Device doesn't exist"); return -ENODEV; } if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); return -EINVAL; } if (!dev->netdev_ops->ndo_mdb_add) { NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); return -EOPNOTSUPP; } return dev->netdev_ops->ndo_mdb_add(dev, tb, nlh->nlmsg_flags, extack); } static int rtnl_validate_mdb_entry_del_bulk(const struct nlattr *attr, struct netlink_ext_ack *extack) { struct br_mdb_entry *entry = nla_data(attr); struct br_mdb_entry zero_entry = {}; if (nla_len(attr) != sizeof(struct br_mdb_entry)) { NL_SET_ERR_MSG_ATTR(extack, attr, "Invalid attribute length"); return -EINVAL; } if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) { NL_SET_ERR_MSG(extack, "Unknown entry state"); return -EINVAL; } if (entry->flags) { NL_SET_ERR_MSG(extack, "Entry flags cannot be set"); return -EINVAL; } if (entry->vid >= VLAN_N_VID - 1) { NL_SET_ERR_MSG(extack, "Invalid entry VLAN id"); return -EINVAL; } if (memcmp(&entry->addr, &zero_entry.addr, sizeof(entry->addr))) { NL_SET_ERR_MSG(extack, "Entry address cannot be set"); return -EINVAL; } return 0; } static const struct nla_policy mdba_del_bulk_policy[MDBA_SET_ENTRY_MAX + 1] = { [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, rtnl_validate_mdb_entry_del_bulk, sizeof(struct br_mdb_entry)), [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED }, }; static int rtnl_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { bool del_bulk = !!(nlh->nlmsg_flags & NLM_F_BULK); struct nlattr *tb[MDBA_SET_ENTRY_MAX + 1]; struct net *net = sock_net(skb->sk); struct br_port_msg *bpm; struct net_device *dev; int err; if (!del_bulk) err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, mdba_policy, extack); else err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY_MAX, mdba_del_bulk_policy, extack); if (err) return err; bpm = nlmsg_data(nlh); if (!bpm->ifindex) { NL_SET_ERR_MSG(extack, "Invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, bpm->ifindex); if (!dev) { NL_SET_ERR_MSG(extack, "Device doesn't exist"); return -ENODEV; } if (NL_REQ_ATTR_CHECK(extack, NULL, tb, MDBA_SET_ENTRY)) { NL_SET_ERR_MSG(extack, "Missing MDBA_SET_ENTRY attribute"); return -EINVAL; } if (del_bulk) { if (!dev->netdev_ops->ndo_mdb_del_bulk) { NL_SET_ERR_MSG(extack, "Device does not support MDB bulk deletion"); return -EOPNOTSUPP; } return dev->netdev_ops->ndo_mdb_del_bulk(dev, tb, extack); } if (!dev->netdev_ops->ndo_mdb_del) { NL_SET_ERR_MSG(extack, "Device does not support MDB operations"); return -EOPNOTSUPP; } return dev->netdev_ops->ndo_mdb_del(dev, tb, extack); } /* Process one rtnetlink message. */ static int rtnl_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { const bool needs_lock = !(cb->flags & RTNL_FLAG_DUMP_UNLOCKED); rtnl_dumpit_func dumpit = cb->data; int err; /* Previous iteration have already finished, avoid calling->dumpit() * again, it may not expect to be called after it reached the end. */ if (!dumpit) return 0; if (needs_lock) rtnl_lock(); err = dumpit(skb, cb); if (needs_lock) rtnl_unlock(); /* Old dump handlers used to send NLM_DONE as in a separate recvmsg(). * Some applications which parse netlink manually depend on this. */ if (cb->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE) { if (err < 0 && err != -EMSGSIZE) return err; if (!err) cb->data = NULL; return skb->len; } return err; } static int rtnetlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, struct netlink_dump_control *control) { if (control->flags & RTNL_FLAG_DUMP_SPLIT_NLM_DONE || !(control->flags & RTNL_FLAG_DUMP_UNLOCKED)) { WARN_ON(control->data); control->data = control->dump; control->dump = rtnl_dumpit; } return netlink_dump_start(ssk, skb, nlh, control); } static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct rtnl_link *link; enum rtnl_kinds kind; struct module *owner; int err = -EOPNOTSUPP; rtnl_doit_func doit; unsigned int flags; int family; int type; type = nlh->nlmsg_type; if (type > RTM_MAX) return -EOPNOTSUPP; type -= RTM_BASE; /* All the messages must have at least 1 byte length */ if (nlmsg_len(nlh) < sizeof(struct rtgenmsg)) return 0; family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; kind = rtnl_msgtype_kind(type); if (kind != RTNL_KIND_GET && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; rcu_read_lock(); if (kind == RTNL_KIND_GET && (nlh->nlmsg_flags & NLM_F_DUMP)) { struct sock *rtnl; rtnl_dumpit_func dumpit; u32 min_dump_alloc = 0; link = rtnl_get_link(family, type); if (!link || !link->dumpit) { family = PF_UNSPEC; link = rtnl_get_link(family, type); if (!link || !link->dumpit) goto err_unlock; } owner = link->owner; dumpit = link->dumpit; flags = link->flags; if (type == RTM_GETLINK - RTM_BASE) min_dump_alloc = rtnl_calcit(skb, nlh); err = 0; /* need to do this before rcu_read_unlock() */ if (!try_module_get(owner)) err = -EPROTONOSUPPORT; rcu_read_unlock(); rtnl = net->rtnl; if (err == 0) { struct netlink_dump_control c = { .dump = dumpit, .min_dump_alloc = min_dump_alloc, .module = owner, .flags = flags, }; err = rtnetlink_dump_start(rtnl, skb, nlh, &c); /* netlink_dump_start() will keep a reference on * module if dump is still in progress. */ module_put(owner); } return err; } link = rtnl_get_link(family, type); if (!link || !link->doit) { family = PF_UNSPEC; link = rtnl_get_link(PF_UNSPEC, type); if (!link || !link->doit) goto out_unlock; } owner = link->owner; if (!try_module_get(owner)) { err = -EPROTONOSUPPORT; goto out_unlock; } flags = link->flags; if (kind == RTNL_KIND_DEL && (nlh->nlmsg_flags & NLM_F_BULK) && !(flags & RTNL_FLAG_BULK_DEL_SUPPORTED)) { NL_SET_ERR_MSG(extack, "Bulk delete is not supported"); module_put(owner); goto err_unlock; } if (flags & RTNL_FLAG_DOIT_UNLOCKED) { doit = link->doit; rcu_read_unlock(); if (doit) err = doit(skb, nlh, extack); module_put(owner); return err; } rcu_read_unlock(); rtnl_lock(); link = rtnl_get_link(family, type); if (link && link->doit) err = link->doit(skb, nlh, extack); rtnl_unlock(); module_put(owner); return err; out_unlock: rcu_read_unlock(); return err; err_unlock: rcu_read_unlock(); return -EOPNOTSUPP; } static void rtnetlink_rcv(struct sk_buff *skb) { netlink_rcv_skb(skb, &rtnetlink_rcv_msg); } static int rtnetlink_bind(struct net *net, int group) { switch (group) { case RTNLGRP_IPV4_MROUTE_R: case RTNLGRP_IPV6_MROUTE_R: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; break; } return 0; } static int rtnetlink_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_REBOOT: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: case NETDEV_BONDING_FAILOVER: case NETDEV_POST_TYPE_CHANGE: case NETDEV_NOTIFY_PEERS: case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: case NETDEV_CHANGELOWERSTATE: case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), GFP_KERNEL, NULL, 0, 0, NULL); break; default: break; } return NOTIFY_DONE; } static struct notifier_block rtnetlink_dev_notifier = { .notifier_call = rtnetlink_event, }; static int __net_init rtnetlink_net_init(struct net *net) { struct sock *sk; struct netlink_kernel_cfg cfg = { .groups = RTNLGRP_MAX, .input = rtnetlink_rcv, .flags = NL_CFG_F_NONROOT_RECV, .bind = rtnetlink_bind, }; sk = netlink_kernel_create(net, NETLINK_ROUTE, &cfg); if (!sk) return -ENOMEM; net->rtnl = sk; return 0; } static void __net_exit rtnetlink_net_exit(struct net *net) { netlink_kernel_release(net->rtnl); net->rtnl = NULL; } static struct pernet_operations rtnetlink_net_ops = { .init = rtnetlink_net_init, .exit = rtnetlink_net_exit, }; static const struct rtnl_msg_handler rtnetlink_rtnl_msg_handlers[] __initconst = { {.msgtype = RTM_NEWLINK, .doit = rtnl_newlink, .flags = RTNL_FLAG_DOIT_PERNET}, {.msgtype = RTM_DELLINK, .doit = rtnl_dellink, .flags = RTNL_FLAG_DOIT_PERNET_WIP}, {.msgtype = RTM_GETLINK, .doit = rtnl_getlink, .dumpit = rtnl_dump_ifinfo, .flags = RTNL_FLAG_DUMP_SPLIT_NLM_DONE}, {.msgtype = RTM_SETLINK, .doit = rtnl_setlink, .flags = RTNL_FLAG_DOIT_PERNET_WIP}, {.msgtype = RTM_GETADDR, .dumpit = rtnl_dump_all}, {.msgtype = RTM_GETROUTE, .dumpit = rtnl_dump_all}, {.msgtype = RTM_GETNETCONF, .dumpit = rtnl_dump_all}, {.msgtype = RTM_GETSTATS, .doit = rtnl_stats_get, .dumpit = rtnl_stats_dump}, {.msgtype = RTM_SETSTATS, .doit = rtnl_stats_set}, {.msgtype = RTM_NEWLINKPROP, .doit = rtnl_newlinkprop}, {.msgtype = RTM_DELLINKPROP, .doit = rtnl_dellinkprop}, {.protocol = PF_BRIDGE, .msgtype = RTM_GETLINK, .dumpit = rtnl_bridge_getlink}, {.protocol = PF_BRIDGE, .msgtype = RTM_DELLINK, .doit = rtnl_bridge_dellink}, {.protocol = PF_BRIDGE, .msgtype = RTM_SETLINK, .doit = rtnl_bridge_setlink}, {.protocol = PF_BRIDGE, .msgtype = RTM_NEWNEIGH, .doit = rtnl_fdb_add}, {.protocol = PF_BRIDGE, .msgtype = RTM_DELNEIGH, .doit = rtnl_fdb_del, .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, {.protocol = PF_BRIDGE, .msgtype = RTM_GETNEIGH, .doit = rtnl_fdb_get, .dumpit = rtnl_fdb_dump}, {.protocol = PF_BRIDGE, .msgtype = RTM_NEWMDB, .doit = rtnl_mdb_add}, {.protocol = PF_BRIDGE, .msgtype = RTM_DELMDB, .doit = rtnl_mdb_del, .flags = RTNL_FLAG_BULK_DEL_SUPPORTED}, {.protocol = PF_BRIDGE, .msgtype = RTM_GETMDB, .doit = rtnl_mdb_get, .dumpit = rtnl_mdb_dump}, }; void __init rtnetlink_init(void) { if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register_many(rtnetlink_rtnl_msg_handlers); }
92 112 112 111 90 99 100 100 101 1 100 100 1 99 99 99 99 98 99 1 1 1 1 96 2 2 13 11 7 7 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 // SPDX-License-Identifier: GPL-2.0-or-later /* * drivers/net/bond/bond_netlink.c - Netlink interface for bonding * Copyright (c) 2013 Jiri Pirko <jiri@resnulli.us> * Copyright (c) 2013 Scott Feldman <sfeldma@cumulusnetworks.com> */ #include <linux/module.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_link.h> #include <linux/if_ether.h> #include <net/netlink.h> #include <net/rtnetlink.h> #include <net/bonding.h> #include <net/ipv6.h> static size_t bond_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) { return nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_STATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_MII_STATUS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_SLAVE_LINK_FAILURE_COUNT */ nla_total_size(MAX_ADDR_LEN) + /* IFLA_BOND_SLAVE_PERM_HWADDR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_QUEUE_ID */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */ nla_total_size(sizeof(s32)) + /* IFLA_BOND_SLAVE_PRIO */ 0; } static int bond_fill_slave_info(struct sk_buff *skb, const struct net_device *bond_dev, const struct net_device *slave_dev) { struct slave *slave = bond_slave_get_rtnl(slave_dev); if (nla_put_u8(skb, IFLA_BOND_SLAVE_STATE, bond_slave_state(slave))) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_SLAVE_MII_STATUS, slave->link)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_SLAVE_LINK_FAILURE_COUNT, slave->link_failure_count)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_SLAVE_PERM_HWADDR, slave_dev->addr_len, slave->perm_hwaddr)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_SLAVE_QUEUE_ID, READ_ONCE(slave->queue_id))) goto nla_put_failure; if (nla_put_s32(skb, IFLA_BOND_SLAVE_PRIO, slave->prio)) goto nla_put_failure; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { const struct aggregator *agg; const struct port *ad_port; ad_port = &SLAVE_AD_INFO(slave)->port; agg = SLAVE_AD_INFO(slave)->port.aggregator; if (agg) { if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID, agg->aggregator_identifier)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE, ad_port->actor_oper_port_state)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE, ad_port->partner_oper.port_state)) goto nla_put_failure; } } return 0; nla_put_failure: return -EMSGSIZE; } /* Limit the max delay range to 300s */ static const struct netlink_range_validation delay_range = { .max = 300000, }; static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = { [IFLA_BOND_MODE] = { .type = NLA_U8 }, [IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 }, [IFLA_BOND_MIIMON] = { .type = NLA_U32 }, [IFLA_BOND_UPDELAY] = { .type = NLA_U32 }, [IFLA_BOND_DOWNDELAY] = { .type = NLA_U32 }, [IFLA_BOND_USE_CARRIER] = { .type = NLA_U8 }, [IFLA_BOND_ARP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_ARP_IP_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_ARP_VALIDATE] = { .type = NLA_U32 }, [IFLA_BOND_ARP_ALL_TARGETS] = { .type = NLA_U32 }, [IFLA_BOND_PRIMARY] = { .type = NLA_U32 }, [IFLA_BOND_PRIMARY_RESELECT] = { .type = NLA_U8 }, [IFLA_BOND_FAIL_OVER_MAC] = { .type = NLA_U8 }, [IFLA_BOND_XMIT_HASH_POLICY] = { .type = NLA_U8 }, [IFLA_BOND_RESEND_IGMP] = { .type = NLA_U32 }, [IFLA_BOND_NUM_PEER_NOTIF] = { .type = NLA_U8 }, [IFLA_BOND_ALL_SLAVES_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_MIN_LINKS] = { .type = NLA_U32 }, [IFLA_BOND_LP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BOND_PACKETS_PER_SLAVE] = { .type = NLA_U32 }, [IFLA_BOND_AD_LACP_ACTIVE] = { .type = NLA_U8 }, [IFLA_BOND_AD_LACP_RATE] = { .type = NLA_U8 }, [IFLA_BOND_AD_SELECT] = { .type = NLA_U8 }, [IFLA_BOND_AD_INFO] = { .type = NLA_NESTED }, [IFLA_BOND_AD_ACTOR_SYS_PRIO] = { .type = NLA_U16 }, [IFLA_BOND_AD_USER_PORT_KEY] = { .type = NLA_U16 }, [IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY, .len = ETH_ALEN }, [IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 }, [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range), [IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 }, [IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED }, [IFLA_BOND_COUPLED_CONTROL] = { .type = NLA_U8 }, }; static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = { [IFLA_BOND_SLAVE_QUEUE_ID] = { .type = NLA_U16 }, [IFLA_BOND_SLAVE_PRIO] = { .type = NLA_S32 }, }; static int bond_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) return -EINVAL; if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) return -EADDRNOTAVAIL; } return 0; } static int bond_slave_changelink(struct net_device *bond_dev, struct net_device *slave_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct bond_opt_value newval; int err; if (!data) return 0; if (data[IFLA_BOND_SLAVE_QUEUE_ID]) { u16 queue_id = nla_get_u16(data[IFLA_BOND_SLAVE_QUEUE_ID]); char queue_id_str[IFNAMSIZ + 7]; /* queue_id option setting expects slave_name:queue_id */ snprintf(queue_id_str, sizeof(queue_id_str), "%s:%u\n", slave_dev->name, queue_id); bond_opt_initstr(&newval, queue_id_str); err = __bond_opt_set(bond, BOND_OPT_QUEUE_ID, &newval, data[IFLA_BOND_SLAVE_QUEUE_ID], extack); if (err) return err; } if (data[IFLA_BOND_SLAVE_PRIO]) { int prio = nla_get_s32(data[IFLA_BOND_SLAVE_PRIO]); bond_opt_slave_initval(&newval, &slave_dev, prio); err = __bond_opt_set(bond, BOND_OPT_PRIO, &newval, data[IFLA_BOND_SLAVE_PRIO], extack); if (err) return err; } return 0; } static int bond_changelink(struct net_device *bond_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct bonding *bond = netdev_priv(bond_dev); struct bond_opt_value newval; int miimon = 0; int err; if (!data) return 0; if (data[IFLA_BOND_MODE]) { int mode = nla_get_u8(data[IFLA_BOND_MODE]); bond_opt_initval(&newval, mode); err = __bond_opt_set(bond, BOND_OPT_MODE, &newval, data[IFLA_BOND_MODE], extack); if (err) return err; } if (data[IFLA_BOND_ACTIVE_SLAVE]) { int ifindex = nla_get_u32(data[IFLA_BOND_ACTIVE_SLAVE]); struct net_device *slave_dev; char *active_slave = ""; if (ifindex != 0) { slave_dev = __dev_get_by_index(dev_net(bond_dev), ifindex); if (!slave_dev) return -ENODEV; active_slave = slave_dev->name; } bond_opt_initstr(&newval, active_slave); err = __bond_opt_set(bond, BOND_OPT_ACTIVE_SLAVE, &newval, data[IFLA_BOND_ACTIVE_SLAVE], extack); if (err) return err; } if (data[IFLA_BOND_MIIMON]) { miimon = nla_get_u32(data[IFLA_BOND_MIIMON]); bond_opt_initval(&newval, miimon); err = __bond_opt_set(bond, BOND_OPT_MIIMON, &newval, data[IFLA_BOND_MIIMON], extack); if (err) return err; } if (data[IFLA_BOND_UPDELAY]) { int updelay = nla_get_u32(data[IFLA_BOND_UPDELAY]); bond_opt_initval(&newval, updelay); err = __bond_opt_set(bond, BOND_OPT_UPDELAY, &newval, data[IFLA_BOND_UPDELAY], extack); if (err) return err; } if (data[IFLA_BOND_DOWNDELAY]) { int downdelay = nla_get_u32(data[IFLA_BOND_DOWNDELAY]); bond_opt_initval(&newval, downdelay); err = __bond_opt_set(bond, BOND_OPT_DOWNDELAY, &newval, data[IFLA_BOND_DOWNDELAY], extack); if (err) return err; } if (data[IFLA_BOND_PEER_NOTIF_DELAY]) { int delay = nla_get_u32(data[IFLA_BOND_PEER_NOTIF_DELAY]); bond_opt_initval(&newval, delay); err = __bond_opt_set(bond, BOND_OPT_PEER_NOTIF_DELAY, &newval, data[IFLA_BOND_PEER_NOTIF_DELAY], extack); if (err) return err; } if (data[IFLA_BOND_USE_CARRIER]) { int use_carrier = nla_get_u8(data[IFLA_BOND_USE_CARRIER]); bond_opt_initval(&newval, use_carrier); err = __bond_opt_set(bond, BOND_OPT_USE_CARRIER, &newval, data[IFLA_BOND_USE_CARRIER], extack); if (err) return err; } if (data[IFLA_BOND_ARP_INTERVAL]) { int arp_interval = nla_get_u32(data[IFLA_BOND_ARP_INTERVAL]); if (arp_interval && miimon) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], "ARP monitoring cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_interval); err = __bond_opt_set(bond, BOND_OPT_ARP_INTERVAL, &newval, data[IFLA_BOND_ARP_INTERVAL], extack); if (err) return err; } if (data[IFLA_BOND_ARP_IP_TARGET]) { struct nlattr *attr; int i = 0, rem; bond_option_arp_ip_targets_clear(bond); nla_for_each_nested(attr, data[IFLA_BOND_ARP_IP_TARGET], rem) { __be32 target; if (nla_len(attr) < sizeof(target)) return -EINVAL; target = nla_get_be32(attr); bond_opt_initval(&newval, (__force u64)target); err = __bond_opt_set(bond, BOND_OPT_ARP_TARGETS, &newval, data[IFLA_BOND_ARP_IP_TARGET], extack); if (err) break; i++; } if (i == 0 && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last arp target with arp_interval on\n"); if (err) return err; } #if IS_ENABLED(CONFIG_IPV6) if (data[IFLA_BOND_NS_IP6_TARGET]) { struct nlattr *attr; int i = 0, rem; bond_option_ns_ip6_targets_clear(bond); nla_for_each_nested(attr, data[IFLA_BOND_NS_IP6_TARGET], rem) { struct in6_addr addr6; if (nla_len(attr) < sizeof(addr6)) { NL_SET_ERR_MSG(extack, "Invalid IPv6 address"); return -EINVAL; } addr6 = nla_get_in6_addr(attr); bond_opt_initextra(&newval, &addr6, sizeof(addr6)); err = __bond_opt_set(bond, BOND_OPT_NS_TARGETS, &newval, data[IFLA_BOND_NS_IP6_TARGET], extack); if (err) break; i++; } if (i == 0 && bond->params.arp_interval) netdev_warn(bond->dev, "Removing last ns target with arp_interval on\n"); if (err) return err; } #endif if (data[IFLA_BOND_ARP_VALIDATE]) { int arp_validate = nla_get_u32(data[IFLA_BOND_ARP_VALIDATE]); if (arp_validate && miimon) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_BOND_ARP_INTERVAL], "ARP validating cannot be used with MII monitoring"); return -EINVAL; } bond_opt_initval(&newval, arp_validate); err = __bond_opt_set(bond, BOND_OPT_ARP_VALIDATE, &newval, data[IFLA_BOND_ARP_VALIDATE], extack); if (err) return err; } if (data[IFLA_BOND_ARP_ALL_TARGETS]) { int arp_all_targets = nla_get_u32(data[IFLA_BOND_ARP_ALL_TARGETS]); bond_opt_initval(&newval, arp_all_targets); err = __bond_opt_set(bond, BOND_OPT_ARP_ALL_TARGETS, &newval, data[IFLA_BOND_ARP_ALL_TARGETS], extack); if (err) return err; } if (data[IFLA_BOND_PRIMARY]) { int ifindex = nla_get_u32(data[IFLA_BOND_PRIMARY]); struct net_device *dev; char *primary = ""; dev = __dev_get_by_index(dev_net(bond_dev), ifindex); if (dev) primary = dev->name; bond_opt_initstr(&newval, primary); err = __bond_opt_set(bond, BOND_OPT_PRIMARY, &newval, data[IFLA_BOND_PRIMARY], extack); if (err) return err; } if (data[IFLA_BOND_PRIMARY_RESELECT]) { int primary_reselect = nla_get_u8(data[IFLA_BOND_PRIMARY_RESELECT]); bond_opt_initval(&newval, primary_reselect); err = __bond_opt_set(bond, BOND_OPT_PRIMARY_RESELECT, &newval, data[IFLA_BOND_PRIMARY_RESELECT], extack); if (err) return err; } if (data[IFLA_BOND_FAIL_OVER_MAC]) { int fail_over_mac = nla_get_u8(data[IFLA_BOND_FAIL_OVER_MAC]); bond_opt_initval(&newval, fail_over_mac); err = __bond_opt_set(bond, BOND_OPT_FAIL_OVER_MAC, &newval, data[IFLA_BOND_FAIL_OVER_MAC], extack); if (err) return err; } if (data[IFLA_BOND_XMIT_HASH_POLICY]) { int xmit_hash_policy = nla_get_u8(data[IFLA_BOND_XMIT_HASH_POLICY]); bond_opt_initval(&newval, xmit_hash_policy); err = __bond_opt_set(bond, BOND_OPT_XMIT_HASH, &newval, data[IFLA_BOND_XMIT_HASH_POLICY], extack); if (err) return err; } if (data[IFLA_BOND_RESEND_IGMP]) { int resend_igmp = nla_get_u32(data[IFLA_BOND_RESEND_IGMP]); bond_opt_initval(&newval, resend_igmp); err = __bond_opt_set(bond, BOND_OPT_RESEND_IGMP, &newval, data[IFLA_BOND_RESEND_IGMP], extack); if (err) return err; } if (data[IFLA_BOND_NUM_PEER_NOTIF]) { int num_peer_notif = nla_get_u8(data[IFLA_BOND_NUM_PEER_NOTIF]); bond_opt_initval(&newval, num_peer_notif); err = __bond_opt_set(bond, BOND_OPT_NUM_PEER_NOTIF, &newval, data[IFLA_BOND_NUM_PEER_NOTIF], extack); if (err) return err; } if (data[IFLA_BOND_ALL_SLAVES_ACTIVE]) { int all_slaves_active = nla_get_u8(data[IFLA_BOND_ALL_SLAVES_ACTIVE]); bond_opt_initval(&newval, all_slaves_active); err = __bond_opt_set(bond, BOND_OPT_ALL_SLAVES_ACTIVE, &newval, data[IFLA_BOND_ALL_SLAVES_ACTIVE], extack); if (err) return err; } if (data[IFLA_BOND_MIN_LINKS]) { int min_links = nla_get_u32(data[IFLA_BOND_MIN_LINKS]); bond_opt_initval(&newval, min_links); err = __bond_opt_set(bond, BOND_OPT_MINLINKS, &newval, data[IFLA_BOND_MIN_LINKS], extack); if (err) return err; } if (data[IFLA_BOND_LP_INTERVAL]) { int lp_interval = nla_get_u32(data[IFLA_BOND_LP_INTERVAL]); bond_opt_initval(&newval, lp_interval); err = __bond_opt_set(bond, BOND_OPT_LP_INTERVAL, &newval, data[IFLA_BOND_LP_INTERVAL], extack); if (err) return err; } if (data[IFLA_BOND_PACKETS_PER_SLAVE]) { int packets_per_slave = nla_get_u32(data[IFLA_BOND_PACKETS_PER_SLAVE]); bond_opt_initval(&newval, packets_per_slave); err = __bond_opt_set(bond, BOND_OPT_PACKETS_PER_SLAVE, &newval, data[IFLA_BOND_PACKETS_PER_SLAVE], extack); if (err) return err; } if (data[IFLA_BOND_AD_LACP_ACTIVE]) { int lacp_active = nla_get_u8(data[IFLA_BOND_AD_LACP_ACTIVE]); bond_opt_initval(&newval, lacp_active); err = __bond_opt_set(bond, BOND_OPT_LACP_ACTIVE, &newval, data[IFLA_BOND_AD_LACP_ACTIVE], extack); if (err) return err; } if (data[IFLA_BOND_AD_LACP_RATE]) { int lacp_rate = nla_get_u8(data[IFLA_BOND_AD_LACP_RATE]); bond_opt_initval(&newval, lacp_rate); err = __bond_opt_set(bond, BOND_OPT_LACP_RATE, &newval, data[IFLA_BOND_AD_LACP_RATE], extack); if (err) return err; } if (data[IFLA_BOND_AD_SELECT]) { int ad_select = nla_get_u8(data[IFLA_BOND_AD_SELECT]); bond_opt_initval(&newval, ad_select); err = __bond_opt_set(bond, BOND_OPT_AD_SELECT, &newval, data[IFLA_BOND_AD_SELECT], extack); if (err) return err; } if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) { int actor_sys_prio = nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]); bond_opt_initval(&newval, actor_sys_prio); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval, data[IFLA_BOND_AD_ACTOR_SYS_PRIO], extack); if (err) return err; } if (data[IFLA_BOND_AD_USER_PORT_KEY]) { int port_key = nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]); bond_opt_initval(&newval, port_key); err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval, data[IFLA_BOND_AD_USER_PORT_KEY], extack); if (err) return err; } if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) { if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN) return -EINVAL; bond_opt_initval(&newval, nla_get_u64(data[IFLA_BOND_AD_ACTOR_SYSTEM])); err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval, data[IFLA_BOND_AD_ACTOR_SYSTEM], extack); if (err) return err; } if (data[IFLA_BOND_TLB_DYNAMIC_LB]) { int dynamic_lb = nla_get_u8(data[IFLA_BOND_TLB_DYNAMIC_LB]); bond_opt_initval(&newval, dynamic_lb); err = __bond_opt_set(bond, BOND_OPT_TLB_DYNAMIC_LB, &newval, data[IFLA_BOND_TLB_DYNAMIC_LB], extack); if (err) return err; } if (data[IFLA_BOND_MISSED_MAX]) { int missed_max = nla_get_u8(data[IFLA_BOND_MISSED_MAX]); bond_opt_initval(&newval, missed_max); err = __bond_opt_set(bond, BOND_OPT_MISSED_MAX, &newval, data[IFLA_BOND_MISSED_MAX], extack); if (err) return err; } if (data[IFLA_BOND_COUPLED_CONTROL]) { int coupled_control = nla_get_u8(data[IFLA_BOND_COUPLED_CONTROL]); bond_opt_initval(&newval, coupled_control); err = __bond_opt_set(bond, BOND_OPT_COUPLED_CONTROL, &newval, data[IFLA_BOND_COUPLED_CONTROL], extack); if (err) return err; } return 0; } static int bond_newlink(struct net *src_net, struct net_device *bond_dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { int err; err = bond_changelink(bond_dev, tb, data, extack); if (err < 0) return err; err = register_netdevice(bond_dev); if (!err) { struct bonding *bond = netdev_priv(bond_dev); netif_carrier_off(bond_dev); bond_work_init_all(bond); } return err; } static size_t bond_get_size(const struct net_device *bond_dev) { return nla_total_size(sizeof(u8)) + /* IFLA_BOND_MODE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ACTIVE_SLAVE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIIMON */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_UPDELAY */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_DOWNDELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_USE_CARRIER */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_INTERVAL */ /* IFLA_BOND_ARP_IP_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(u32)) * BOND_MAX_ARP_TARGETS + nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_VALIDATE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_ARP_ALL_TARGETS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PRIMARY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_PRIMARY_RESELECT */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_FAIL_OVER_MAC */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_XMIT_HASH_POLICY */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_RESEND_IGMP */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_NUM_PEER_NOTIF */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_ALL_SLAVES_ACTIVE */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_MIN_LINKS */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_LP_INTERVAL */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PACKETS_PER_SLAVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_ACTIVE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_LACP_RATE */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_AD_SELECT */ nla_total_size(sizeof(struct nlattr)) + /* IFLA_BOND_AD_INFO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_AGGREGATOR */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_NUM_PORTS */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_INFO_PARTNER_MAC*/ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */ nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */ nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_TLB_DYNAMIC_LB */ nla_total_size(sizeof(u32)) + /* IFLA_BOND_PEER_NOTIF_DELAY */ nla_total_size(sizeof(u8)) + /* IFLA_BOND_MISSED_MAX */ /* IFLA_BOND_NS_IP6_TARGET */ nla_total_size(sizeof(struct nlattr)) + nla_total_size(sizeof(struct in6_addr)) * BOND_MAX_NS_TARGETS + nla_total_size(sizeof(u8)) + /* IFLA_BOND_COUPLED_CONTROL */ 0; } static int bond_option_active_slave_get_ifindex(struct bonding *bond) { const struct net_device *slave; int ifindex; rcu_read_lock(); slave = bond_option_active_slave_get_rcu(bond); ifindex = slave ? slave->ifindex : 0; rcu_read_unlock(); return ifindex; } static int bond_fill_info(struct sk_buff *skb, const struct net_device *bond_dev) { struct bonding *bond = netdev_priv(bond_dev); unsigned int packets_per_slave; int ifindex, i, targets_added; struct nlattr *targets; struct slave *primary; if (nla_put_u8(skb, IFLA_BOND_MODE, BOND_MODE(bond))) goto nla_put_failure; ifindex = bond_option_active_slave_get_ifindex(bond); if (ifindex && nla_put_u32(skb, IFLA_BOND_ACTIVE_SLAVE, ifindex)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_MIIMON, bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_UPDELAY, bond->params.updelay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_DOWNDELAY, bond->params.downdelay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_PEER_NOTIF_DELAY, bond->params.peer_notif_delay * bond->params.miimon)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_USE_CARRIER, bond->params.use_carrier)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_INTERVAL, bond->params.arp_interval)) goto nla_put_failure; targets = nla_nest_start_noflag(skb, IFLA_BOND_ARP_IP_TARGET); if (!targets) goto nla_put_failure; targets_added = 0; for (i = 0; i < BOND_MAX_ARP_TARGETS; i++) { if (bond->params.arp_targets[i]) { if (nla_put_be32(skb, i, bond->params.arp_targets[i])) goto nla_put_failure; targets_added = 1; } } if (targets_added) nla_nest_end(skb, targets); else nla_nest_cancel(skb, targets); if (nla_put_u32(skb, IFLA_BOND_ARP_VALIDATE, bond->params.arp_validate)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_ARP_ALL_TARGETS, bond->params.arp_all_targets)) goto nla_put_failure; #if IS_ENABLED(CONFIG_IPV6) targets = nla_nest_start(skb, IFLA_BOND_NS_IP6_TARGET); if (!targets) goto nla_put_failure; targets_added = 0; for (i = 0; i < BOND_MAX_NS_TARGETS; i++) { if (!ipv6_addr_any(&bond->params.ns_targets[i])) { if (nla_put_in6_addr(skb, i, &bond->params.ns_targets[i])) goto nla_put_failure; targets_added = 1; } } if (targets_added) nla_nest_end(skb, targets); else nla_nest_cancel(skb, targets); #endif primary = rtnl_dereference(bond->primary_slave); if (primary && nla_put_u32(skb, IFLA_BOND_PRIMARY, primary->dev->ifindex)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_PRIMARY_RESELECT, bond->params.primary_reselect)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_FAIL_OVER_MAC, bond->params.fail_over_mac)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_XMIT_HASH_POLICY, bond->params.xmit_policy)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_RESEND_IGMP, bond->params.resend_igmp)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_NUM_PEER_NOTIF, bond->params.num_peer_notif)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_ALL_SLAVES_ACTIVE, bond->params.all_slaves_active)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_MIN_LINKS, bond->params.min_links)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BOND_LP_INTERVAL, bond->params.lp_interval)) goto nla_put_failure; packets_per_slave = bond->params.packets_per_slave; if (nla_put_u32(skb, IFLA_BOND_PACKETS_PER_SLAVE, packets_per_slave)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_LACP_ACTIVE, bond->params.lacp_active)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_LACP_RATE, bond->params.lacp_fast)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_AD_SELECT, bond->params.ad_select)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_TLB_DYNAMIC_LB, bond->params.tlb_dynamic_lb)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_MISSED_MAX, bond->params.missed_max)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BOND_COUPLED_CONTROL, bond->params.coupled_control)) goto nla_put_failure; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info info; if (capable(CAP_NET_ADMIN)) { if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO, bond->params.ad_actor_sys_prio)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY, bond->params.ad_user_port_key)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, ETH_ALEN, &bond->params.ad_actor_system)) goto nla_put_failure; } if (!bond_3ad_get_active_agg_info(bond, &info)) { struct nlattr *nest; nest = nla_nest_start_noflag(skb, IFLA_BOND_AD_INFO); if (!nest) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_AGGREGATOR, info.aggregator_id)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_NUM_PORTS, info.ports)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_ACTOR_KEY, info.actor_key)) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BOND_AD_INFO_PARTNER_KEY, info.partner_key)) goto nla_put_failure; if (nla_put(skb, IFLA_BOND_AD_INFO_PARTNER_MAC, sizeof(info.partner_system), &info.partner_system)) goto nla_put_failure; nla_nest_end(skb, nest); } } return 0; nla_put_failure: return -EMSGSIZE; } static size_t bond_get_linkxstats_size(const struct net_device *dev, int attr) { switch (attr) { case IFLA_STATS_LINK_XSTATS: case IFLA_STATS_LINK_XSTATS_SLAVE: break; default: return 0; } return bond_3ad_stats_size() + nla_total_size(0); } static int bond_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, int *prividx, int attr) { struct nlattr *nla __maybe_unused; struct slave *slave = NULL; struct nlattr *nest, *nest2; struct bonding *bond; switch (attr) { case IFLA_STATS_LINK_XSTATS: bond = netdev_priv(dev); break; case IFLA_STATS_LINK_XSTATS_SLAVE: slave = bond_slave_get_rtnl(dev); if (!slave) return 0; bond = slave->bond; break; default: return -EINVAL; } nest = nla_nest_start_noflag(skb, LINK_XSTATS_TYPE_BOND); if (!nest) return -EMSGSIZE; if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct bond_3ad_stats *stats; if (slave) stats = &SLAVE_AD_INFO(slave)->stats; else stats = &BOND_AD_INFO(bond).stats; nest2 = nla_nest_start_noflag(skb, BOND_XSTATS_3AD); if (!nest2) { nla_nest_end(skb, nest); return -EMSGSIZE; } if (bond_3ad_stats_fill(skb, stats)) { nla_nest_cancel(skb, nest2); nla_nest_end(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest2); } nla_nest_end(skb, nest); return 0; } struct rtnl_link_ops bond_link_ops __read_mostly = { .kind = "bond", .priv_size = sizeof(struct bonding), .setup = bond_setup, .maxtype = IFLA_BOND_MAX, .policy = bond_policy, .validate = bond_validate, .newlink = bond_newlink, .changelink = bond_changelink, .get_size = bond_get_size, .fill_info = bond_fill_info, .get_num_tx_queues = bond_get_num_tx_queues, .get_num_rx_queues = bond_get_num_tx_queues, /* Use the same number as for TX queues */ .fill_linkxstats = bond_fill_linkxstats, .get_linkxstats_size = bond_get_linkxstats_size, .slave_maxtype = IFLA_BOND_SLAVE_MAX, .slave_policy = bond_slave_policy, .slave_changelink = bond_slave_changelink, .get_slave_size = bond_get_slave_size, .fill_slave_info = bond_fill_slave_info, }; int __init bond_netlink_init(void) { return rtnl_link_register(&bond_link_ops); } void bond_netlink_fini(void) { rtnl_link_unregister(&bond_link_ops); } MODULE_ALIAS_RTNL_LINK("bond");
16 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 // SPDX-License-Identifier: GPL-2.0-only /* * crc16.c */ #include <linux/types.h> #include <linux/module.h> #include <linux/crc16.h> /** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ u16 const crc16_table[256] = { 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 }; EXPORT_SYMBOL(crc16_table); /** * crc16 - compute the CRC-16 for the data buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer * * Returns the updated CRC value. */ u16 crc16(u16 crc, u8 const *buffer, size_t len) { while (len--) crc = crc16_byte(crc, *buffer++); return crc; } EXPORT_SYMBOL(crc16); MODULE_DESCRIPTION("CRC16 calculations"); MODULE_LICENSE("GPL");
13 13 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 // SPDX-License-Identifier: GPL-2.0-only /* * VHT handling * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH * Copyright (C) 2018 - 2024 Intel Corporation */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "rate.h" static void __check_vhtcap_disable(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap, u32 flag) { __le32 le_flag = cpu_to_le32(flag); if (sdata->u.mgd.vht_capa_mask.vht_cap_info & le_flag && !(sdata->u.mgd.vht_capa.vht_cap_info & le_flag)) vht_cap->cap &= ~flag; } void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap) { int i; u16 rxmcs_mask, rxmcs_cap, rxmcs_n, txmcs_mask, txmcs_cap, txmcs_n; if (!vht_cap->vht_supported) return; if (sdata->vif.type != NL80211_IFTYPE_STATION) return; __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_RXLDPC); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_SHORT_GI_80); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_SHORT_GI_160); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_TXSTBC); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN); __check_vhtcap_disable(sdata, vht_cap, IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN); /* Allow user to decrease AMPDU length exponent */ if (sdata->u.mgd.vht_capa_mask.vht_cap_info & cpu_to_le32(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK)) { u32 cap, n; n = le32_to_cpu(sdata->u.mgd.vht_capa.vht_cap_info) & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; n >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; cap = vht_cap->cap & IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; cap >>= IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; if (n < cap) { vht_cap->cap &= ~IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK; vht_cap->cap |= n << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT; } } /* Allow the user to decrease MCSes */ rxmcs_mask = le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.rx_mcs_map); rxmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.rx_mcs_map); rxmcs_n &= rxmcs_mask; rxmcs_cap = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); txmcs_mask = le16_to_cpu(sdata->u.mgd.vht_capa_mask.supp_mcs.tx_mcs_map); txmcs_n = le16_to_cpu(sdata->u.mgd.vht_capa.supp_mcs.tx_mcs_map); txmcs_n &= txmcs_mask; txmcs_cap = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); for (i = 0; i < 8; i++) { u8 m, n, c; m = (rxmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; n = (rxmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; c = (rxmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) || n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) { rxmcs_cap &= ~(3 << 2*i); rxmcs_cap |= (rxmcs_n & (3 << 2*i)); } m = (txmcs_mask >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; n = (txmcs_n >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; c = (txmcs_cap >> 2*i) & IEEE80211_VHT_MCS_NOT_SUPPORTED; if (m && ((c != IEEE80211_VHT_MCS_NOT_SUPPORTED && n < c) || n == IEEE80211_VHT_MCS_NOT_SUPPORTED)) { txmcs_cap &= ~(3 << 2*i); txmcs_cap |= (txmcs_n & (3 << 2*i)); } } vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(rxmcs_cap); vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(txmcs_cap); } void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const struct ieee80211_vht_cap *vht_cap_ie, const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta) { struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; struct ieee80211_sta_vht_cap own_cap; u32 cap_info, i; bool have_80mhz; u32 mpdu_len; memset(vht_cap, 0, sizeof(*vht_cap)); if (!link_sta->pub->ht_cap.ht_supported) return; if (!vht_cap_ie || !sband->vht_cap.vht_supported) return; /* Allow VHT if at least one channel on the sband supports 80 MHz */ have_80mhz = false; for (i = 0; i < sband->n_channels; i++) { if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | IEEE80211_CHAN_NO_80MHZ)) continue; have_80mhz = true; break; } if (!have_80mhz) return; /* * A VHT STA must support 40 MHz, but if we verify that here * then we break a few things - some APs (e.g. Netgear R6300v2 * and others based on the BCM4360 chipset) will unset this * capability bit when operating in 20 MHz. */ vht_cap->vht_supported = true; own_cap = sband->vht_cap; /* * If user has specified capability overrides, take care * of that if the station we're setting up is the AP that * we advertised a restricted capability set to. Override * our own capabilities and then use those below. */ if (sdata->vif.type == NL80211_IFTYPE_STATION && !test_sta_flag(link_sta->sta, WLAN_STA_TDLS_PEER)) ieee80211_apply_vhtcap_overrides(sdata, &own_cap); /* take some capabilities as-is */ cap_info = le32_to_cpu(vht_cap_ie->vht_cap_info); vht_cap->cap = cap_info; vht_cap->cap &= IEEE80211_VHT_CAP_RXLDPC | IEEE80211_VHT_CAP_VHT_TXOP_PS | IEEE80211_VHT_CAP_HTC_VHT | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB | IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB | IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN | IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN; vht_cap->cap |= min_t(u32, cap_info & IEEE80211_VHT_CAP_MAX_MPDU_MASK, own_cap.cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK); /* and some based on our own capabilities */ switch (own_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ; break; case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; break; default: /* nothing */ break; } /* symmetric capabilities */ vht_cap->cap |= cap_info & own_cap.cap & (IEEE80211_VHT_CAP_SHORT_GI_80 | IEEE80211_VHT_CAP_SHORT_GI_160); /* remaining ones */ if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE | IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK); if (own_cap.cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & (IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK); if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE) vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; if (own_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE; if (own_cap.cap & IEEE80211_VHT_CAP_TXSTBC) vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_RXSTBC_MASK; if (own_cap.cap & IEEE80211_VHT_CAP_RXSTBC_MASK) vht_cap->cap |= cap_info & IEEE80211_VHT_CAP_TXSTBC; /* Copy peer MCS info, the driver might need them. */ memcpy(&vht_cap->vht_mcs, &vht_cap_ie->supp_mcs, sizeof(struct ieee80211_vht_mcs_info)); /* copy EXT_NSS_BW Support value or remove the capability */ if (ieee80211_hw_check(&sdata->local->hw, SUPPORTS_VHT_EXT_NSS_BW)) vht_cap->cap |= (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK); else vht_cap->vht_mcs.tx_highest &= ~cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE); /* but also restrict MCSes */ for (i = 0; i < 8; i++) { u16 own_rx, own_tx, peer_rx, peer_tx; own_rx = le16_to_cpu(own_cap.vht_mcs.rx_mcs_map); own_rx = (own_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; own_tx = le16_to_cpu(own_cap.vht_mcs.tx_mcs_map); own_tx = (own_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; peer_rx = le16_to_cpu(vht_cap->vht_mcs.rx_mcs_map); peer_rx = (peer_rx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; peer_tx = le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map); peer_tx = (peer_tx >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; if (peer_tx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { if (own_rx == IEEE80211_VHT_MCS_NOT_SUPPORTED) peer_tx = IEEE80211_VHT_MCS_NOT_SUPPORTED; else if (own_rx < peer_tx) peer_tx = own_rx; } if (peer_rx != IEEE80211_VHT_MCS_NOT_SUPPORTED) { if (own_tx == IEEE80211_VHT_MCS_NOT_SUPPORTED) peer_rx = IEEE80211_VHT_MCS_NOT_SUPPORTED; else if (own_tx < peer_rx) peer_rx = own_tx; } vht_cap->vht_mcs.rx_mcs_map &= ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); vht_cap->vht_mcs.rx_mcs_map |= cpu_to_le16(peer_rx << i * 2); vht_cap->vht_mcs.tx_mcs_map &= ~cpu_to_le16(IEEE80211_VHT_MCS_NOT_SUPPORTED << i * 2); vht_cap->vht_mcs.tx_mcs_map |= cpu_to_le16(peer_tx << i * 2); } /* * This is a workaround for VHT-enabled STAs which break the spec * and have the VHT-MCS Rx map filled in with value 3 for all eight * spatial streams, an example is AR9462. * * As per spec, in section 22.1.1 Introduction to the VHT PHY * A VHT STA shall support at least single spatial stream VHT-MCSs * 0 to 7 (transmit and receive) in all supported channel widths. */ if (vht_cap->vht_mcs.rx_mcs_map == cpu_to_le16(0xFFFF)) { vht_cap->vht_supported = false; sdata_info(sdata, "Ignoring VHT IE from %pM (link:%pM) due to invalid rx_mcs_map\n", link_sta->sta->addr, link_sta->addr); return; } /* finally set up the bandwidth */ switch (vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) { case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ: case IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ: link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; default: link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; if (!(vht_cap->vht_mcs.tx_highest & cpu_to_le16(IEEE80211_VHT_EXT_NSS_BW_CAPABLE))) break; /* * If this is non-zero, then it does support 160 MHz after all, * in one form or the other. We don't distinguish here (or even * above) between 160 and 80+80 yet. */ if (cap_info & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; } link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); /* * Work around the Cisco 9115 FW 17.3 bug by taking the min of * both reported MPDU lengths. */ mpdu_len = vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK; if (vht_cap_ie2) mpdu_len = min_t(u32, mpdu_len, le32_get_bits(vht_cap_ie2->vht_cap_info, IEEE80211_VHT_CAP_MAX_MPDU_MASK)); /* * FIXME - should the amsdu len be per link? store per link * and maintain a minimum? */ switch (mpdu_len) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: default: link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; break; } ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); } /* FIXME: move this to some better location - parses HE/EHT now */ static enum ieee80211_sta_rx_bandwidth __ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta, struct cfg80211_chan_def *chandef) { unsigned int link_id = link_sta->link_id; struct ieee80211_sub_if_data *sdata = link_sta->sta->sdata; struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap; struct ieee80211_sta_eht_cap *eht_cap = &link_sta->pub->eht_cap; u32 cap_width; if (he_cap->has_he) { enum nl80211_band band; u8 info; if (chandef) { band = chandef->chan->band; } else { struct ieee80211_bss_conf *link_conf; rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_id]); band = link_conf->chanreq.oper.chan->band; rcu_read_unlock(); } if (eht_cap->has_eht && band == NL80211_BAND_6GHZ) { info = eht_cap->eht_cap_elem.phy_cap_info[0]; if (info & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) return IEEE80211_STA_RX_BW_320; } info = he_cap->he_cap_elem.phy_cap_info[0]; if (band == NL80211_BAND_2GHZ) { if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) return IEEE80211_STA_RX_BW_40; return IEEE80211_STA_RX_BW_20; } if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G || info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) return IEEE80211_STA_RX_BW_160; if (info & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) return IEEE80211_STA_RX_BW_80; return IEEE80211_STA_RX_BW_20; } if (!vht_cap->vht_supported) return link_sta->pub->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ || cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return IEEE80211_STA_RX_BW_160; /* * If this is non-zero, then it does support 160 MHz after all, * in one form or the other. We don't distinguish here (or even * above) between 160 and 80+80 yet. */ if (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) return IEEE80211_STA_RX_BW_160; return IEEE80211_STA_RX_BW_80; } enum ieee80211_sta_rx_bandwidth _ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta, struct cfg80211_chan_def *chandef) { /* * With RX OMI, also pretend that the STA's capability changed. * Of course this isn't really true, it didn't change, only our * RX capability was changed by notifying RX OMI to the STA. * The purpose, however, is to save power, and that requires * changing also transmissions to the AP and the chanctx. The * transmissions depend on link_sta->bandwidth which is set in * _ieee80211_sta_cur_vht_bw() below, but the chanctx depends * on the result of this function which is also called by * _ieee80211_sta_cur_vht_bw(), so we need to do that here as * well. This is sufficient for the steady state, but during * the transition we already need to change TX/RX separately, * so _ieee80211_sta_cur_vht_bw() below applies the _tx one. */ return min(__ieee80211_sta_cap_rx_bw(link_sta, chandef), link_sta->rx_omi_bw_rx); } enum nl80211_chan_width ieee80211_sta_cap_chan_bw(struct link_sta_info *link_sta) { struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; u32 cap_width; if (!vht_cap->vht_supported) { if (!link_sta->pub->ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20_NOHT; return link_sta->pub->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40 ? NL80211_CHAN_WIDTH_40 : NL80211_CHAN_WIDTH_20; } cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) return NL80211_CHAN_WIDTH_160; else if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return NL80211_CHAN_WIDTH_80P80; return NL80211_CHAN_WIDTH_80; } enum nl80211_chan_width ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *link_sta) { enum ieee80211_sta_rx_bandwidth cur_bw = link_sta->pub->bandwidth; struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; u32 cap_width; switch (cur_bw) { case IEEE80211_STA_RX_BW_20: if (!link_sta->pub->ht_cap.ht_supported) return NL80211_CHAN_WIDTH_20_NOHT; else return NL80211_CHAN_WIDTH_20; case IEEE80211_STA_RX_BW_40: return NL80211_CHAN_WIDTH_40; case IEEE80211_STA_RX_BW_80: return NL80211_CHAN_WIDTH_80; case IEEE80211_STA_RX_BW_160: cap_width = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ) return NL80211_CHAN_WIDTH_160; return NL80211_CHAN_WIDTH_80P80; default: return NL80211_CHAN_WIDTH_20; } } /* FIXME: rename/move - this deals with everything not just VHT */ enum ieee80211_sta_rx_bandwidth _ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, struct cfg80211_chan_def *chandef) { struct sta_info *sta = link_sta->sta; enum nl80211_chan_width bss_width; enum ieee80211_sta_rx_bandwidth bw; if (chandef) { bss_width = chandef->width; } else { struct ieee80211_bss_conf *link_conf; rcu_read_lock(); link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]); if (WARN_ON_ONCE(!link_conf)) { rcu_read_unlock(); return IEEE80211_STA_RX_BW_20; } bss_width = link_conf->chanreq.oper.width; rcu_read_unlock(); } /* intentionally do not take rx_bw_omi_rx into account */ bw = __ieee80211_sta_cap_rx_bw(link_sta, chandef); bw = min(bw, link_sta->cur_max_bandwidth); /* but do apply rx_omi_bw_tx */ bw = min(bw, link_sta->rx_omi_bw_tx); /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of * IEEE80211-2016 specification makes higher bandwidth operation * possible on the TDLS link if the peers have wider bandwidth * capability. * * However, in this case, and only if the TDLS peer is authorized, * limit to the tdls_chandef so that the configuration here isn't * wider than what's actually requested on the channel context. */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) && test_sta_flag(sta, WLAN_STA_AUTHORIZED) && sta->tdls_chandef.chan) bw = min(bw, ieee80211_chan_width_to_rx_bw(sta->tdls_chandef.width)); else bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; } void ieee80211_sta_init_nss(struct link_sta_info *link_sta) { u8 ht_rx_nss = 0, vht_rx_nss = 0, he_rx_nss = 0, eht_rx_nss = 0, rx_nss; bool support_160; if (link_sta->pub->eht_cap.has_eht) { int i; const u8 *rx_nss_mcs = (void *)&link_sta->pub->eht_cap.eht_mcs_nss_supp; /* get the max nss for EHT over all possible bandwidths and mcs */ for (i = 0; i < sizeof(struct ieee80211_eht_mcs_nss_supp); i++) eht_rx_nss = max_t(u8, eht_rx_nss, u8_get_bits(rx_nss_mcs[i], IEEE80211_EHT_MCS_NSS_RX)); } if (link_sta->pub->he_cap.has_he) { int i; u8 rx_mcs_80 = 0, rx_mcs_160 = 0; const struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap; u16 mcs_160_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_160); u16 mcs_80_map = le16_to_cpu(he_cap->he_mcs_nss_supp.rx_mcs_80); for (i = 7; i >= 0; i--) { u8 mcs_160 = (mcs_160_map >> (2 * i)) & 3; if (mcs_160 != IEEE80211_HE_MCS_NOT_SUPPORTED) { rx_mcs_160 = i + 1; break; } } for (i = 7; i >= 0; i--) { u8 mcs_80 = (mcs_80_map >> (2 * i)) & 3; if (mcs_80 != IEEE80211_HE_MCS_NOT_SUPPORTED) { rx_mcs_80 = i + 1; break; } } support_160 = he_cap->he_cap_elem.phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G; if (support_160) he_rx_nss = min(rx_mcs_80, rx_mcs_160); else he_rx_nss = rx_mcs_80; } if (link_sta->pub->ht_cap.ht_supported) { if (link_sta->pub->ht_cap.mcs.rx_mask[0]) ht_rx_nss++; if (link_sta->pub->ht_cap.mcs.rx_mask[1]) ht_rx_nss++; if (link_sta->pub->ht_cap.mcs.rx_mask[2]) ht_rx_nss++; if (link_sta->pub->ht_cap.mcs.rx_mask[3]) ht_rx_nss++; /* FIXME: consider rx_highest? */ } if (link_sta->pub->vht_cap.vht_supported) { int i; u16 rx_mcs_map; rx_mcs_map = le16_to_cpu(link_sta->pub->vht_cap.vht_mcs.rx_mcs_map); for (i = 7; i >= 0; i--) { u8 mcs = (rx_mcs_map >> (2 * i)) & 3; if (mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED) { vht_rx_nss = i + 1; break; } } /* FIXME: consider rx_highest? */ } rx_nss = max(vht_rx_nss, ht_rx_nss); rx_nss = max(he_rx_nss, rx_nss); rx_nss = max(eht_rx_nss, rx_nss); rx_nss = max_t(u8, 1, rx_nss); link_sta->capa_nss = rx_nss; /* that shouldn't be set yet, but we can handle it anyway */ if (link_sta->op_mode_nss) link_sta->pub->rx_nss = min_t(u8, rx_nss, link_sta->op_mode_nss); else link_sta->pub->rx_nss = rx_nss; } u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta, u8 opmode, enum nl80211_band band) { enum ieee80211_sta_rx_bandwidth new_bw; struct sta_opmode_info sta_opmode = {}; u32 changed = 0; u8 nss; /* ignore - no support for BF yet */ if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF) return 0; nss = opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_MASK; nss >>= IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT; nss += 1; if (link_sta->op_mode_nss != nss) { if (nss <= link_sta->capa_nss) { link_sta->op_mode_nss = nss; if (nss != link_sta->pub->rx_nss) { link_sta->pub->rx_nss = nss; changed |= IEEE80211_RC_NSS_CHANGED; sta_opmode.rx_nss = link_sta->pub->rx_nss; sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED; } } else { pr_warn_ratelimited("Ignoring NSS change in VHT Operating Mode Notification from %pM with invalid nss %d", link_sta->pub->addr, nss); } } switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) { case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: /* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80) link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; else link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80; break; case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: /* legacy only, no longer used by newer spec */ link_sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160; break; } new_bw = ieee80211_sta_cur_vht_bw(link_sta); if (new_bw != link_sta->pub->bandwidth) { link_sta->pub->bandwidth = new_bw; sta_opmode.bw = ieee80211_sta_rx_bw_to_chan_width(link_sta); changed |= IEEE80211_RC_BW_CHANGED; sta_opmode.changed |= STA_OPMODE_MAX_BW_CHANGED; } if (sta_opmode.changed) cfg80211_sta_opmode_change_notify(sdata->dev, link_sta->addr, &sta_opmode, GFP_KERNEL); return changed; } void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, struct ieee80211_link_data *link, struct ieee80211_mgmt *mgmt) { struct ieee80211_bss_conf *link_conf = link->conf; if (!link_conf->mu_mimo_owner) return; if (!memcmp(mgmt->u.action.u.vht_group_notif.position, link_conf->mu_group.position, WLAN_USER_POSITION_LEN) && !memcmp(mgmt->u.action.u.vht_group_notif.membership, link_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN)) return; memcpy(link_conf->mu_group.membership, mgmt->u.action.u.vht_group_notif.membership, WLAN_MEMBERSHIP_LEN); memcpy(link_conf->mu_group.position, mgmt->u.action.u.vht_group_notif.position, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, link, BSS_CHANGED_MU_GROUPS); } void ieee80211_update_mu_groups(struct ieee80211_vif *vif, unsigned int link_id, const u8 *membership, const u8 *position) { struct ieee80211_bss_conf *link_conf; rcu_read_lock(); link_conf = rcu_dereference(vif->link_conf[link_id]); if (!WARN_ON_ONCE(!link_conf || !link_conf->mu_mimo_owner)) { memcpy(link_conf->mu_group.membership, membership, WLAN_MEMBERSHIP_LEN); memcpy(link_conf->mu_group.position, position, WLAN_USER_POSITION_LEN); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_update_mu_groups); void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata, struct link_sta_info *link_sta, u8 opmode, enum nl80211_band band) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band]; u32 changed = __ieee80211_vht_handle_opmode(sdata, link_sta, opmode, band); if (changed > 0) { ieee80211_recalc_min_chandef(sdata, link_sta->link_id); rate_control_rate_update(local, sband, link_sta, changed); } } void ieee80211_get_vht_mask_from_cap(__le16 vht_cap, u16 vht_mask[NL80211_VHT_NSS_MAX]) { int i; u16 mask, cap = le16_to_cpu(vht_cap); for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { mask = (cap >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED; switch (mask) { case IEEE80211_VHT_MCS_SUPPORT_0_7: vht_mask[i] = 0x00FF; break; case IEEE80211_VHT_MCS_SUPPORT_0_8: vht_mask[i] = 0x01FF; break; case IEEE80211_VHT_MCS_SUPPORT_0_9: vht_mask[i] = 0x03FF; break; case IEEE80211_VHT_MCS_NOT_SUPPORTED: default: vht_mask[i] = 0; break; } } }
331 327 327 1 3560 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_MMU_CONTEXT_H #define _ASM_X86_MMU_CONTEXT_H #include <asm/desc.h> #include <linux/atomic.h> #include <linux/mm_types.h> #include <linux/pkeys.h> #include <trace/events/tlb.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> #include <asm/debugreg.h> #include <asm/gsseg.h> extern atomic64_t last_mm_ctx_id; #ifdef CONFIG_PERF_EVENTS DECLARE_STATIC_KEY_FALSE(rdpmc_never_available_key); DECLARE_STATIC_KEY_FALSE(rdpmc_always_available_key); void cr4_update_pce(void *ignored); #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL /* * ldt_structs can be allocated, used, and freed, but they are never * modified while live. */ struct ldt_struct { /* * Xen requires page-aligned LDTs with special permissions. This is * needed to prevent us from installing evil descriptors such as * call gates. On native, we could merge the ldt_struct and LDT * allocations, but it's not worth trying to optimize. */ struct desc_struct *entries; unsigned int nr_entries; /* * If PTI is in use, then the entries array is not mapped while we're * in user mode. The whole array will be aliased at the addressed * given by ldt_slot_va(slot). We use two slots so that we can allocate * and map, and enable a new LDT without invalidating the mapping * of an older, still-in-use LDT. * * slot will be -1 if this LDT doesn't have an alias mapping. */ int slot; }; /* * Used for LDT copy/destruction. */ static inline void init_new_context_ldt(struct mm_struct *mm) { mm->context.ldt = NULL; init_rwsem(&mm->context.ldt_usr_sem); } int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm); void destroy_context_ldt(struct mm_struct *mm); void ldt_arch_exit_mmap(struct mm_struct *mm); #else /* CONFIG_MODIFY_LDT_SYSCALL */ static inline void init_new_context_ldt(struct mm_struct *mm) { } static inline int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm) { return 0; } static inline void destroy_context_ldt(struct mm_struct *mm) { } static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { } #endif #ifdef CONFIG_MODIFY_LDT_SYSCALL extern void load_mm_ldt(struct mm_struct *mm); extern void switch_ldt(struct mm_struct *prev, struct mm_struct *next); #else static inline void load_mm_ldt(struct mm_struct *mm) { clear_LDT(); } static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) { DEBUG_LOCKS_WARN_ON(preemptible()); } #endif #ifdef CONFIG_ADDRESS_MASKING static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) { /* * When switch_mm_irqs_off() is called for a kthread, it may race with * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it * reads a single value for both. */ return READ_ONCE(mm->context.lam_cr3_mask); } static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) { mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask; mm->context.untag_mask = oldmm->context.untag_mask; } #define mm_untag_mask mm_untag_mask static inline unsigned long mm_untag_mask(struct mm_struct *mm) { return mm->context.untag_mask; } static inline void mm_reset_untag_mask(struct mm_struct *mm) { mm->context.untag_mask = -1UL; } #define arch_pgtable_dma_compat arch_pgtable_dma_compat static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) { return !mm_lam_cr3_mask(mm) || test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags); } #else static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) { return 0; } static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm) { } static inline void mm_reset_untag_mask(struct mm_struct *mm) { } #endif #define enter_lazy_tlb enter_lazy_tlb extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); /* * Init a new mm. Used on mm copies, like at fork() * and on mm's that are brand-new, like at execve(). */ #define init_new_context init_new_context static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { mutex_init(&mm->context.lock); mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id); atomic64_set(&mm->context.tlb_gen, 0); mm->context.next_trim_cpumask = jiffies + HZ; #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; } #endif mm_reset_untag_mask(mm); init_new_context_ldt(mm); return 0; } #define destroy_context destroy_context static inline void destroy_context(struct mm_struct *mm) { destroy_context_ldt(mm); } extern void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk); #define switch_mm_irqs_off switch_mm_irqs_off #define activate_mm(prev, next) \ do { \ paravirt_enter_mmap(next); \ switch_mm((prev), (next), NULL); \ } while (0); #ifdef CONFIG_X86_32 #define deactivate_mm(tsk, mm) \ do { \ loadsegment(gs, 0); \ } while (0) #else #define deactivate_mm(tsk, mm) \ do { \ shstk_free(tsk); \ load_gs_index(0); \ loadsegment(fs, 0); \ } while (0) #endif static inline void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm) { #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) return; /* Duplicate the oldmm pkey state in mm: */ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; #endif } static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) { arch_dup_pkeys(oldmm, mm); paravirt_enter_mmap(mm); dup_lam(oldmm, mm); return ldt_dup_context(oldmm, mm); } static inline void arch_exit_mmap(struct mm_struct *mm) { paravirt_arch_exit_mmap(mm); ldt_arch_exit_mmap(mm); } #ifdef CONFIG_X86_64 static inline bool is_64bit_mm(struct mm_struct *mm) { return !IS_ENABLED(CONFIG_IA32_EMULATION) || !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags); } #else static inline bool is_64bit_mm(struct mm_struct *mm) { return false; } #endif /* * We only want to enforce protection keys on the current process * because we effectively have no access to PKRU for other * processes or any way to tell *which * PKRU in a threaded * process we could use. * * So do not enforce things if the VMA is not from the current * mm, or if we are in a kernel thread. */ static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write, bool execute, bool foreign) { /* pkeys never affect instruction fetches */ if (execute) return true; /* allow access if the VMA is not one from this process */ if (foreign || vma_is_foreign(vma)) return true; return __pkru_allows_pkey(vma_pkey(vma), write); } unsigned long __get_current_cr3_fast(void); #include <asm-generic/mmu_context.h> #endif /* _ASM_X86_MMU_CONTEXT_H */
4 5 15 29 71 69 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth kernel library. */ #define pr_fmt(fmt) "Bluetooth: " fmt #include <linux/export.h> #include <net/bluetooth/bluetooth.h> /** * baswap() - Swaps the order of a bd address * @dst: Pointer to a bdaddr_t struct that will store the swapped * bd address. * @src: Pointer to the bdaddr_t struct to be swapped. * * This function reverses the byte order of a Bluetooth device * address. */ void baswap(bdaddr_t *dst, const bdaddr_t *src) { const unsigned char *s = (const unsigned char *)src; unsigned char *d = (unsigned char *)dst; unsigned int i; for (i = 0; i < 6; i++) d[i] = s[5 - i]; } EXPORT_SYMBOL(baswap); /** * bt_to_errno() - Bluetooth error codes to standard errno * @code: Bluetooth error code to be converted * * This function takes a Bluetooth error code as input and convets * it to an equivalent Unix/standard errno value. * * Return: * * If the bt error code is known, an equivalent Unix errno value * is returned. * If the given bt error code is not known, ENOSYS is returned. */ int bt_to_errno(__u16 code) { switch (code) { case 0: return 0; case 0x01: return EBADRQC; case 0x02: return ENOTCONN; case 0x03: return EIO; case 0x04: case 0x3c: return EHOSTDOWN; case 0x05: return EACCES; case 0x06: return EBADE; case 0x07: return ENOMEM; case 0x08: return ETIMEDOUT; case 0x09: return EMLINK; case 0x0a: return EMLINK; case 0x0b: return EALREADY; case 0x0c: return EBUSY; case 0x0d: case 0x0e: case 0x0f: return ECONNREFUSED; case 0x10: return ETIMEDOUT; case 0x11: case 0x27: case 0x29: case 0x20: return EOPNOTSUPP; case 0x12: return EINVAL; case 0x13: case 0x14: case 0x15: return ECONNRESET; case 0x16: return ECONNABORTED; case 0x17: return ELOOP; case 0x18: return EACCES; case 0x1a: return EPROTONOSUPPORT; case 0x1b: return ECONNREFUSED; case 0x19: case 0x1e: case 0x23: case 0x24: case 0x25: return EPROTO; default: return ENOSYS; } } EXPORT_SYMBOL(bt_to_errno); /** * bt_status() - Standard errno value to Bluetooth error code * @err: Unix/standard errno value to be converted * * This function converts a standard/Unix errno value to an * equivalent Bluetooth error code. * * Return: Bluetooth error code. * * If the given errno is not found, 0x1f is returned by default * which indicates an unspecified error. * For err >= 0, no conversion is performed, and the same value * is immediately returned. */ __u8 bt_status(int err) { if (err >= 0) return err; switch (err) { case -EBADRQC: return 0x01; case -ENOTCONN: return 0x02; case -EIO: return 0x03; case -EHOSTDOWN: return 0x04; case -EACCES: return 0x05; case -EBADE: return 0x06; case -ENOMEM: return 0x07; case -ETIMEDOUT: return 0x08; case -EMLINK: return 0x09; case -EALREADY: return 0x0b; case -EBUSY: return 0x0c; case -ECONNREFUSED: return 0x0d; case -EOPNOTSUPP: return 0x11; case -EINVAL: return 0x12; case -ECONNRESET: return 0x13; case -ECONNABORTED: return 0x16; case -ELOOP: return 0x17; case -EPROTONOSUPPORT: return 0x1a; case -EPROTO: return 0x19; default: return 0x1f; } } EXPORT_SYMBOL(bt_status); /** * bt_info() - Log Bluetooth information message * @format: Message's format string */ void bt_info(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_info("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_info); /** * bt_warn() - Log Bluetooth warning message * @format: Message's format string */ void bt_warn(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_warn("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_warn); /** * bt_err() - Log Bluetooth error message * @format: Message's format string */ void bt_err(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_err("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_err); #ifdef CONFIG_BT_FEATURE_DEBUG static bool debug_enable; void bt_dbg_set(bool enable) { debug_enable = enable; } bool bt_dbg_get(void) { return debug_enable; } /** * bt_dbg() - Log Bluetooth debugging message * @format: Message's format string */ void bt_dbg(const char *format, ...) { struct va_format vaf; va_list args; if (likely(!debug_enable)) return; va_start(args, format); vaf.fmt = format; vaf.va = &args; printk(KERN_DEBUG pr_fmt("%pV"), &vaf); va_end(args); } EXPORT_SYMBOL(bt_dbg); #endif /** * bt_warn_ratelimited() - Log rate-limited Bluetooth warning message * @format: Message's format string * * This functions works like bt_warn, but it uses rate limiting * to prevent the message from being logged too often. */ void bt_warn_ratelimited(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_warn_ratelimited("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_warn_ratelimited); /** * bt_err_ratelimited() - Log rate-limited Bluetooth error message * @format: Message's format string * * This functions works like bt_err, but it uses rate limiting * to prevent the message from being logged too often. */ void bt_err_ratelimited(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_err_ratelimited("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_err_ratelimited);
50 35 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 // SPDX-License-Identifier: GPL-2.0 /* * file.c - part of debugfs, a tiny little debug file system * * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> * Copyright (C) 2004 IBM Inc. * * debugfs is for people to use instead of /proc or /sys. * See Documentation/filesystems/ for more details. */ #include <linux/module.h> #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/pagemap.h> #include <linux/debugfs.h> #include <linux/io.h> #include <linux/slab.h> #include <linux/atomic.h> #include <linux/device.h> #include <linux/pm_runtime.h> #include <linux/poll.h> #include <linux/security.h> #include "internal.h" struct poll_table_struct; static ssize_t default_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return 0; } static ssize_t default_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return count; } const struct file_operations debugfs_noop_file_operations = { .read = default_read_file, .write = default_write_file, .open = simple_open, .llseek = noop_llseek, }; #define F_DENTRY(filp) ((filp)->f_path.dentry) const void *debugfs_get_aux(const struct file *file) { return DEBUGFS_I(file_inode(file))->aux; } EXPORT_SYMBOL_GPL(debugfs_get_aux); const struct file_operations *debugfs_real_fops(const struct file *filp) { struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata; if (!fsd) { /* * Urgh, we've been called w/o a protecting * debugfs_file_get(). */ WARN_ON(1); return NULL; } return fsd->real_fops; } EXPORT_SYMBOL_GPL(debugfs_real_fops); enum dbgfs_get_mode { DBGFS_GET_ALREADY, DBGFS_GET_REGULAR, DBGFS_GET_SHORT, }; static int __debugfs_file_get(struct dentry *dentry, enum dbgfs_get_mode mode) { struct debugfs_fsdata *fsd; void *d_fsd; /* * This could only happen if some debugfs user erroneously calls * debugfs_file_get() on a dentry that isn't even a file, let * them know about it. */ if (WARN_ON(!d_is_reg(dentry))) return -EINVAL; d_fsd = READ_ONCE(dentry->d_fsdata); if (d_fsd) { fsd = d_fsd; } else { struct inode *inode = dentry->d_inode; unsigned int methods = 0; if (WARN_ON(mode == DBGFS_GET_ALREADY)) return -EINVAL; fsd = kmalloc(sizeof(*fsd), GFP_KERNEL); if (!fsd) return -ENOMEM; if (mode == DBGFS_GET_SHORT) { const struct debugfs_short_fops *ops; ops = fsd->short_fops = DEBUGFS_I(inode)->short_fops; if (ops->llseek) methods |= HAS_LSEEK; if (ops->read) methods |= HAS_READ; if (ops->write) methods |= HAS_WRITE; fsd->real_fops = NULL; } else { const struct file_operations *ops; ops = fsd->real_fops = DEBUGFS_I(inode)->real_fops; if (ops->llseek) methods |= HAS_LSEEK; if (ops->read) methods |= HAS_READ; if (ops->write) methods |= HAS_WRITE; if (ops->unlocked_ioctl) methods |= HAS_IOCTL; if (ops->poll) methods |= HAS_POLL; fsd->short_fops = NULL; } fsd->methods = methods; refcount_set(&fsd->active_users, 1); init_completion(&fsd->active_users_drained); INIT_LIST_HEAD(&fsd->cancellations); mutex_init(&fsd->cancellations_mtx); d_fsd = cmpxchg(&dentry->d_fsdata, NULL, fsd); if (d_fsd) { mutex_destroy(&fsd->cancellations_mtx); kfree(fsd); fsd = d_fsd; } } /* * In case of a successful cmpxchg() above, this check is * strictly necessary and must follow it, see the comment in * __debugfs_remove_file(). * OTOH, if the cmpxchg() hasn't been executed or wasn't * successful, this serves the purpose of not starving * removers. */ if (d_unlinked(dentry)) return -EIO; if (!refcount_inc_not_zero(&fsd->active_users)) return -EIO; return 0; } /** * debugfs_file_get - mark the beginning of file data access * @dentry: the dentry object whose data is being accessed. * * Up to a matching call to debugfs_file_put(), any successive call * into the file removing functions debugfs_remove() and * debugfs_remove_recursive() will block. Since associated private * file data may only get freed after a successful return of any of * the removal functions, you may safely access it after a successful * call to debugfs_file_get() without worrying about lifetime issues. * * If -%EIO is returned, the file has already been removed and thus, * it is not safe to access any of its data. If, on the other hand, * it is allowed to access the file data, zero is returned. */ int debugfs_file_get(struct dentry *dentry) { return __debugfs_file_get(dentry, DBGFS_GET_ALREADY); } EXPORT_SYMBOL_GPL(debugfs_file_get); /** * debugfs_file_put - mark the end of file data access * @dentry: the dentry object formerly passed to * debugfs_file_get(). * * Allow any ongoing concurrent call into debugfs_remove() or * debugfs_remove_recursive() blocked by a former call to * debugfs_file_get() to proceed and return to its caller. */ void debugfs_file_put(struct dentry *dentry) { struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata); if (refcount_dec_and_test(&fsd->active_users)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_file_put); /** * debugfs_enter_cancellation - enter a debugfs cancellation * @file: the file being accessed * @cancellation: the cancellation object, the cancel callback * inside of it must be initialized * * When a debugfs file is removed it needs to wait for all active * operations to complete. However, the operation itself may need * to wait for hardware or completion of some asynchronous process * or similar. As such, it may need to be cancelled to avoid long * waits or even deadlocks. * * This function can be used inside a debugfs handler that may * need to be cancelled. As soon as this function is called, the * cancellation's 'cancel' callback may be called, at which point * the caller should proceed to call debugfs_leave_cancellation() * and leave the debugfs handler function as soon as possible. * Note that the 'cancel' callback is only ever called in the * context of some kind of debugfs_remove(). * * This function must be paired with debugfs_leave_cancellation(). */ void debugfs_enter_cancellation(struct file *file, struct debugfs_cancellation *cancellation) { struct debugfs_fsdata *fsd; struct dentry *dentry = F_DENTRY(file); INIT_LIST_HEAD(&cancellation->list); if (WARN_ON(!d_is_reg(dentry))) return; if (WARN_ON(!cancellation->cancel)) return; fsd = READ_ONCE(dentry->d_fsdata); if (WARN_ON(!fsd)) return; mutex_lock(&fsd->cancellations_mtx); list_add(&cancellation->list, &fsd->cancellations); mutex_unlock(&fsd->cancellations_mtx); /* if we're already removing wake it up to cancel */ if (d_unlinked(dentry)) complete(&fsd->active_users_drained); } EXPORT_SYMBOL_GPL(debugfs_enter_cancellation); /** * debugfs_leave_cancellation - leave cancellation section * @file: the file being accessed * @cancellation: the cancellation previously registered with * debugfs_enter_cancellation() * * See the documentation of debugfs_enter_cancellation(). */ void debugfs_leave_cancellation(struct file *file, struct debugfs_cancellation *cancellation) { struct debugfs_fsdata *fsd; struct dentry *dentry = F_DENTRY(file); if (WARN_ON(!d_is_reg(dentry))) return; fsd = READ_ONCE(dentry->d_fsdata); if (WARN_ON(!fsd)) return; mutex_lock(&fsd->cancellations_mtx); if (!list_empty(&cancellation->list)) list_del(&cancellation->list); mutex_unlock(&fsd->cancellations_mtx); } EXPORT_SYMBOL_GPL(debugfs_leave_cancellation); /* * Only permit access to world-readable files when the kernel is locked down. * We also need to exclude any file that has ways to write or alter it as root * can bypass the permissions check. */ static int debugfs_locked_down(struct inode *inode, struct file *filp, const struct file_operations *real_fops) { if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && (!real_fops || (!real_fops->unlocked_ioctl && !real_fops->compat_ioctl && !real_fops->mmap))) return 0; if (security_locked_down(LOCKDOWN_DEBUGFS)) return -EPERM; return 0; } static int open_proxy_open(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops = NULL; int r; r = __debugfs_file_get(dentry, DBGFS_GET_REGULAR); if (r) return r == -EIO ? -ENOENT : r; real_fops = debugfs_real_fops(filp); r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { r = -ENXIO; goto out; } #endif /* Huh? Module did not clean up after itself at exit? */ WARN(1, "debugfs file owner did not clean up at exit: %pd", dentry); r = -ENXIO; goto out; } replace_fops(filp, real_fops); if (real_fops->open) r = real_fops->open(inode, filp); out: debugfs_file_put(dentry); return r; } const struct file_operations debugfs_open_proxy_file_operations = { .open = open_proxy_open, }; #define PROTO(args...) args #define ARGS(args...) args #define FULL_PROXY_FUNC(name, ret_type, filp, proto, args, bit, ret) \ static ret_type full_proxy_ ## name(proto) \ { \ struct dentry *dentry = F_DENTRY(filp); \ struct debugfs_fsdata *fsd = dentry->d_fsdata; \ const struct file_operations *real_fops; \ ret_type r; \ \ if (!(fsd->methods & bit)) \ return ret; \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ real_fops = debugfs_real_fops(filp); \ r = real_fops->name(args); \ debugfs_file_put(dentry); \ return r; \ } #define FULL_PROXY_FUNC_BOTH(name, ret_type, filp, proto, args, bit, ret) \ static ret_type full_proxy_ ## name(proto) \ { \ struct dentry *dentry = F_DENTRY(filp); \ struct debugfs_fsdata *fsd = dentry->d_fsdata; \ ret_type r; \ \ if (!(fsd->methods & bit)) \ return ret; \ r = debugfs_file_get(dentry); \ if (unlikely(r)) \ return r; \ if (fsd->real_fops) \ r = fsd->real_fops->name(args); \ else \ r = fsd->short_fops->name(args); \ debugfs_file_put(dentry); \ return r; \ } FULL_PROXY_FUNC_BOTH(llseek, loff_t, filp, PROTO(struct file *filp, loff_t offset, int whence), ARGS(filp, offset, whence), HAS_LSEEK, -ESPIPE); FULL_PROXY_FUNC_BOTH(read, ssize_t, filp, PROTO(struct file *filp, char __user *buf, size_t size, loff_t *ppos), ARGS(filp, buf, size, ppos), HAS_READ, -EINVAL); FULL_PROXY_FUNC_BOTH(write, ssize_t, filp, PROTO(struct file *filp, const char __user *buf, size_t size, loff_t *ppos), ARGS(filp, buf, size, ppos), HAS_WRITE, -EINVAL); FULL_PROXY_FUNC(unlocked_ioctl, long, filp, PROTO(struct file *filp, unsigned int cmd, unsigned long arg), ARGS(filp, cmd, arg), HAS_IOCTL, -ENOTTY); static __poll_t full_proxy_poll(struct file *filp, struct poll_table_struct *wait) { struct dentry *dentry = F_DENTRY(filp); struct debugfs_fsdata *fsd = dentry->d_fsdata; __poll_t r = 0; const struct file_operations *real_fops; if (!(fsd->methods & HAS_POLL)) return DEFAULT_POLLMASK; if (debugfs_file_get(dentry)) return EPOLLHUP; real_fops = debugfs_real_fops(filp); r = real_fops->poll(filp, wait); debugfs_file_put(dentry); return r; } static int full_proxy_release(struct inode *inode, struct file *filp) { const struct file_operations *real_fops = debugfs_real_fops(filp); int r = 0; /* * We must not protect this against removal races here: the * original releaser should be called unconditionally in order * not to leak any resources. Releasers must not assume that * ->i_private is still being meaningful here. */ if (real_fops->release) r = real_fops->release(inode, filp); fops_put(real_fops); return r; } static int full_proxy_open_regular(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); const struct file_operations *real_fops; struct debugfs_fsdata *fsd; int r; r = __debugfs_file_get(dentry, DBGFS_GET_REGULAR); if (r) return r == -EIO ? -ENOENT : r; fsd = dentry->d_fsdata; real_fops = fsd->real_fops; r = debugfs_locked_down(inode, filp, real_fops); if (r) goto out; if (!fops_get(real_fops)) { #ifdef CONFIG_MODULES if (real_fops->owner && real_fops->owner->state == MODULE_STATE_GOING) { r = -ENXIO; goto out; } #endif /* Huh? Module did not cleanup after itself at exit? */ WARN(1, "debugfs file owner did not clean up at exit: %pd", dentry); r = -ENXIO; goto out; } if (real_fops->open) { r = real_fops->open(inode, filp); if (r) { fops_put(real_fops); } else if (filp->f_op != &debugfs_full_proxy_file_operations) { /* No protection against file removal anymore. */ WARN(1, "debugfs file owner replaced proxy fops: %pd", dentry); fops_put(real_fops); } } out: debugfs_file_put(dentry); return r; } const struct file_operations debugfs_full_proxy_file_operations = { .open = full_proxy_open_regular, .release = full_proxy_release, .llseek = full_proxy_llseek, .read = full_proxy_read, .write = full_proxy_write, .poll = full_proxy_poll, .unlocked_ioctl = full_proxy_unlocked_ioctl }; static int full_proxy_open_short(struct inode *inode, struct file *filp) { struct dentry *dentry = F_DENTRY(filp); int r; r = __debugfs_file_get(dentry, DBGFS_GET_SHORT); if (r) return r == -EIO ? -ENOENT : r; r = debugfs_locked_down(inode, filp, NULL); if (!r) r = simple_open(inode, filp); debugfs_file_put(dentry); return r; } const struct file_operations debugfs_full_short_proxy_file_operations = { .open = full_proxy_open_short, .llseek = full_proxy_llseek, .read = full_proxy_read, .write = full_proxy_write, }; ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; ret = simple_attr_read(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } EXPORT_SYMBOL_GPL(debugfs_attr_read); static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, size_t len, loff_t *ppos, bool is_signed) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; if (is_signed) ret = simple_attr_write_signed(file, buf, len, ppos); else ret = simple_attr_write(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } ssize_t debugfs_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return debugfs_attr_write_xsigned(file, buf, len, ppos, false); } EXPORT_SYMBOL_GPL(debugfs_attr_write); ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, size_t len, loff_t *ppos) { return debugfs_attr_write_xsigned(file, buf, len, ppos, true); } EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, const struct file_operations *fops_ro, const struct file_operations *fops_wo) { /* if there are no write bits set, make read only */ if (!(mode & S_IWUGO)) return debugfs_create_file_unsafe(name, mode, parent, value, fops_ro); /* if there are no read bits set, make write only */ if (!(mode & S_IRUGO)) return debugfs_create_file_unsafe(name, mode, parent, value, fops_wo); return debugfs_create_file_unsafe(name, mode, parent, value, fops); } static int debugfs_u8_set(void *data, u64 val) { *(u8 *)data = val; return 0; } static int debugfs_u8_get(void *data, u64 *val) { *val = *(u8 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u8, debugfs_u8_get, debugfs_u8_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_ro, debugfs_u8_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u8_wo, NULL, debugfs_u8_set, "%llu\n"); /** * debugfs_create_u8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u8, &fops_u8_ro, &fops_u8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u8); static int debugfs_u16_set(void *data, u64 val) { *(u16 *)data = val; return 0; } static int debugfs_u16_get(void *data, u64 *val) { *val = *(u16 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u16, debugfs_u16_get, debugfs_u16_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_ro, debugfs_u16_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u16_wo, NULL, debugfs_u16_set, "%llu\n"); /** * debugfs_create_u16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u16, &fops_u16_ro, &fops_u16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u16); static int debugfs_u32_set(void *data, u64 val) { *(u32 *)data = val; return 0; } static int debugfs_u32_get(void *data, u64 *val) { *val = *(u32 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u32, debugfs_u32_get, debugfs_u32_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_ro, debugfs_u32_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u32_wo, NULL, debugfs_u32_set, "%llu\n"); /** * debugfs_create_u32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u32, &fops_u32_ro, &fops_u32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u32); static int debugfs_u64_set(void *data, u64 val) { *(u64 *)data = val; return 0; } static int debugfs_u64_get(void *data, u64 *val) { *val = *(u64 *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_ro, debugfs_u64_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); /** * debugfs_create_u64 - create a debugfs file that is used to read and write an unsigned 64-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_u64, &fops_u64_ro, &fops_u64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_u64); static int debugfs_ulong_set(void *data, u64 val) { *(unsigned long *)data = val; return 0; } static int debugfs_ulong_get(void *data, u64 *val) { *val = *(unsigned long *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong, debugfs_ulong_get, debugfs_ulong_set, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_ro, debugfs_ulong_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_ulong_wo, NULL, debugfs_ulong_set, "%llu\n"); /** * debugfs_create_ulong - create a debugfs file that is used to read and write * an unsigned long value. * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_ulong(const char *name, umode_t mode, struct dentry *parent, unsigned long *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_ulong, &fops_ulong_ro, &fops_ulong_wo); } EXPORT_SYMBOL_GPL(debugfs_create_ulong); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8, debugfs_u8_get, debugfs_u8_set, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_ro, debugfs_u8_get, NULL, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x8_wo, NULL, debugfs_u8_set, "0x%02llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16, debugfs_u16_get, debugfs_u16_set, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_ro, debugfs_u16_get, NULL, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x16_wo, NULL, debugfs_u16_set, "0x%04llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32, debugfs_u32_get, debugfs_u32_set, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_ro, debugfs_u32_get, NULL, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x32_wo, NULL, debugfs_u32_set, "0x%08llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64, debugfs_u64_get, debugfs_u64_set, "0x%016llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_ro, debugfs_u64_get, NULL, "0x%016llx\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_x64_wo, NULL, debugfs_u64_set, "0x%016llx\n"); /* * debugfs_create_x{8,16,32,64} - create a debugfs file that is used to read and write an unsigned {8,16,32,64}-bit value * * These functions are exactly the same as the above functions (but use a hex * output for the decimal challenged). For details look at the above unsigned * decimal functions. */ /** * debugfs_create_x8 - create a debugfs file that is used to read and write an unsigned 8-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x8, &fops_x8_ro, &fops_x8_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x8); /** * debugfs_create_x16 - create a debugfs file that is used to read and write an unsigned 16-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x16(const char *name, umode_t mode, struct dentry *parent, u16 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x16, &fops_x16_ro, &fops_x16_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x16); /** * debugfs_create_x32 - create a debugfs file that is used to read and write an unsigned 32-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x32(const char *name, umode_t mode, struct dentry *parent, u32 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x32, &fops_x32_ro, &fops_x32_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x32); /** * debugfs_create_x64 - create a debugfs file that is used to read and write an unsigned 64-bit value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_x64(const char *name, umode_t mode, struct dentry *parent, u64 *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_x64, &fops_x64_ro, &fops_x64_wo); } EXPORT_SYMBOL_GPL(debugfs_create_x64); static int debugfs_size_t_set(void *data, u64 val) { *(size_t *)data = val; return 0; } static int debugfs_size_t_get(void *data, u64 *val) { *val = *(size_t *)data; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t, debugfs_size_t_get, debugfs_size_t_set, "%llu\n"); /* %llu and %zu are more or less the same */ DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_ro, debugfs_size_t_get, NULL, "%llu\n"); DEFINE_DEBUGFS_ATTRIBUTE(fops_size_t_wo, NULL, debugfs_size_t_set, "%llu\n"); /** * debugfs_create_size_t - create a debugfs file that is used to read and write an size_t value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_size_t(const char *name, umode_t mode, struct dentry *parent, size_t *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_size_t, &fops_size_t_ro, &fops_size_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_size_t); static int debugfs_atomic_t_set(void *data, u64 val) { atomic_set((atomic_t *)data, val); return 0; } static int debugfs_atomic_t_get(void *data, u64 *val) { *val = atomic_read((atomic_t *)data); return 0; } DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get, debugfs_atomic_t_set, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); /** * debugfs_create_atomic_t - create a debugfs file that is used to read and * write an atomic_t value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. */ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_atomic_t, &fops_atomic_t_ro, &fops_atomic_t_wo); } EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { char buf[2]; bool val; int r; struct dentry *dentry = F_DENTRY(file); r = debugfs_file_get(dentry); if (unlikely(r)) return r; val = *(bool *)file->private_data; debugfs_file_put(dentry); if (val) buf[0] = 'Y'; else buf[0] = 'N'; buf[1] = '\n'; return simple_read_from_buffer(user_buf, count, ppos, buf, 2); } EXPORT_SYMBOL_GPL(debugfs_read_file_bool); ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { bool bv; int r; bool *val = file->private_data; struct dentry *dentry = F_DENTRY(file); r = kstrtobool_from_user(user_buf, count, &bv); if (!r) { r = debugfs_file_get(dentry); if (unlikely(r)) return r; *val = bv; debugfs_file_put(dentry); } return count; } EXPORT_SYMBOL_GPL(debugfs_write_file_bool); static const struct file_operations fops_bool = { .read = debugfs_read_file_bool, .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_bool_ro = { .read = debugfs_read_file_bool, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_bool_wo = { .write = debugfs_write_file_bool, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_bool - create a debugfs file that is used to read and write a boolean value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_bool, &fops_bool_ro, &fops_bool_wo); } EXPORT_SYMBOL_GPL(debugfs_create_bool); ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); char *str, *copy = NULL; int copy_len, len; ssize_t ret; ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; str = *(char **)file->private_data; len = strlen(str) + 1; copy = kmalloc(len, GFP_KERNEL); if (!copy) { debugfs_file_put(dentry); return -ENOMEM; } copy_len = strscpy(copy, str, len); debugfs_file_put(dentry); if (copy_len < 0) { kfree(copy); return copy_len; } copy[copy_len] = '\n'; ret = simple_read_from_buffer(user_buf, count, ppos, copy, len); kfree(copy); return ret; } EXPORT_SYMBOL_GPL(debugfs_create_str); static ssize_t debugfs_write_file_str(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct dentry *dentry = F_DENTRY(file); char *old, *new = NULL; int pos = *ppos; int r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; old = *(char **)file->private_data; /* only allow strict concatenation */ r = -EINVAL; if (pos && pos != strlen(old)) goto error; r = -E2BIG; if (pos + count + 1 > PAGE_SIZE) goto error; r = -ENOMEM; new = kmalloc(pos + count + 1, GFP_KERNEL); if (!new) goto error; if (pos) memcpy(new, old, pos); r = -EFAULT; if (copy_from_user(new + pos, user_buf, count)) goto error; new[pos + count] = '\0'; strim(new); rcu_assign_pointer(*(char __rcu **)file->private_data, new); synchronize_rcu(); kfree(old); debugfs_file_put(dentry); return count; error: kfree(new); debugfs_file_put(dentry); return r; } static const struct file_operations fops_str = { .read = debugfs_read_file_str, .write = debugfs_write_file_str, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_str_ro = { .read = debugfs_read_file_str, .open = simple_open, .llseek = default_llseek, }; static const struct file_operations fops_str_wo = { .write = debugfs_write_file_str, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_str - create a debugfs file that is used to read and write a string value * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @value: a pointer to the variable that the file should read to and write * from. * * This function creates a file in debugfs with the given name that * contains the value of the variable @value. If the @mode variable is so * set, it can be read from, and written to. */ void debugfs_create_str(const char *name, umode_t mode, struct dentry *parent, char **value) { debugfs_create_mode_unsafe(name, mode, parent, value, &fops_str, &fops_str_ro, &fops_str_wo); } static ssize_t read_file_blob(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; struct dentry *dentry = F_DENTRY(file); ssize_t r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; r = simple_read_from_buffer(user_buf, count, ppos, blob->data, blob->size); debugfs_file_put(dentry); return r; } static ssize_t write_file_blob(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct debugfs_blob_wrapper *blob = file->private_data; struct dentry *dentry = F_DENTRY(file); ssize_t r; r = debugfs_file_get(dentry); if (unlikely(r)) return r; r = simple_write_to_buffer(blob->data, blob->size, ppos, user_buf, count); debugfs_file_put(dentry); return r; } static const struct file_operations fops_blob = { .read = read_file_blob, .write = write_file_blob, .open = simple_open, .llseek = default_llseek, }; /** * debugfs_create_blob - create a debugfs file that is used to read and write * a binary blob * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @blob: a pointer to a struct debugfs_blob_wrapper which contains a pointer * to the blob data and the size of the data. * * This function creates a file in debugfs with the given name that exports * @blob->data as a binary blob. If the @mode variable is so set it can be * read from and written to. * * This function will return a pointer to a dentry if it succeeds. This * pointer must be passed to the debugfs_remove() function when the file is * to be removed (no automatic cleanup happens if your module is unloaded, * you are responsible here.) If an error occurs, ERR_PTR(-ERROR) will be * returned. * * If debugfs is not enabled in the kernel, the value ERR_PTR(-ENODEV) will * be returned. */ struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) { return debugfs_create_file_unsafe(name, mode & 0644, parent, blob, &fops_blob); } EXPORT_SYMBOL_GPL(debugfs_create_blob); static size_t u32_format_array(char *buf, size_t bufsize, u32 *array, int array_size) { size_t ret = 0; while (--array_size >= 0) { size_t len; char term = array_size ? ' ' : '\n'; len = snprintf(buf, bufsize, "%u%c", *array++, term); ret += len; buf += len; bufsize -= len; } return ret; } static int u32_array_open(struct inode *inode, struct file *file) { struct debugfs_u32_array *data = inode->i_private; int size, elements = data->n_elements; char *buf; /* * Max size: * - 10 digits + ' '/'\n' = 11 bytes per number * - terminating NUL character */ size = elements*11; buf = kmalloc(size+1, GFP_KERNEL); if (!buf) return -ENOMEM; buf[size] = 0; file->private_data = buf; u32_format_array(buf, size, data->array, data->n_elements); return nonseekable_open(inode, file); } static ssize_t u32_array_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { size_t size = strlen(file->private_data); return simple_read_from_buffer(buf, len, ppos, file->private_data, size); } static int u32_array_release(struct inode *inode, struct file *file) { kfree(file->private_data); return 0; } static const struct file_operations u32_array_fops = { .owner = THIS_MODULE, .open = u32_array_open, .release = u32_array_release, .read = u32_array_read, }; /** * debugfs_create_u32_array - create a debugfs file that is used to read u32 * array. * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @array: wrapper struct containing data pointer and size of the array. * * This function creates a file in debugfs with the given name that exports * @array as data. If the @mode variable is so set it can be read from. * Writing is not supported. Seek within the file is also not supported. * Once array is created its size can not be changed. */ void debugfs_create_u32_array(const char *name, umode_t mode, struct dentry *parent, struct debugfs_u32_array *array) { debugfs_create_file_unsafe(name, mode, parent, array, &u32_array_fops); } EXPORT_SYMBOL_GPL(debugfs_create_u32_array); #ifdef CONFIG_HAS_IOMEM /* * The regset32 stuff is used to print 32-bit registers using the * seq_file utilities. We offer printing a register set in an already-opened * sequential file or create a debugfs file that only prints a regset32. */ /** * debugfs_print_regs32 - use seq_print to describe a set of registers * @s: the seq_file structure being used to generate output * @regs: an array if struct debugfs_reg32 structures * @nregs: the length of the above array * @base: the base address to be used in reading the registers * @prefix: a string to be prefixed to every output line * * This function outputs a text block describing the current values of * some 32-bit hardware registers. It is meant to be used within debugfs * files based on seq_file that need to show registers, intermixed with other * information. The prefix argument may be used to specify a leading string, * because some peripherals have several blocks of identical registers, * for example configuration of dma channels */ void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, int nregs, void __iomem *base, char *prefix) { int i; for (i = 0; i < nregs; i++, regs++) { if (prefix) seq_printf(s, "%s", prefix); seq_printf(s, "%s = 0x%08x\n", regs->name, readl(base + regs->offset)); if (seq_has_overflowed(s)) break; } } EXPORT_SYMBOL_GPL(debugfs_print_regs32); static int debugfs_regset32_show(struct seq_file *s, void *data) { struct debugfs_regset32 *regset = s->private; if (regset->dev) pm_runtime_get_sync(regset->dev); debugfs_print_regs32(s, regset->regs, regset->nregs, regset->base, ""); if (regset->dev) pm_runtime_put(regset->dev); return 0; } DEFINE_SHOW_ATTRIBUTE(debugfs_regset32); /** * debugfs_create_regset32 - create a debugfs file that returns register values * @name: a pointer to a string containing the name of the file to create. * @mode: the permission that the file should have * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @regset: a pointer to a struct debugfs_regset32, which contains a pointer * to an array of register definitions, the array size and the base * address where the register bank is to be found. * * This function creates a file in debugfs with the given name that reports * the names and values of a set of 32-bit registers. If the @mode variable * is so set it can be read from. Writing is not supported. */ void debugfs_create_regset32(const char *name, umode_t mode, struct dentry *parent, struct debugfs_regset32 *regset) { debugfs_create_file(name, mode, parent, regset, &debugfs_regset32_fops); } EXPORT_SYMBOL_GPL(debugfs_create_regset32); #endif /* CONFIG_HAS_IOMEM */ struct debugfs_devm_entry { int (*read)(struct seq_file *seq, void *data); struct device *dev; }; static int debugfs_devm_entry_open(struct inode *inode, struct file *f) { struct debugfs_devm_entry *entry = inode->i_private; return single_open(f, entry->read, entry->dev); } static const struct file_operations debugfs_devm_entry_ops = { .owner = THIS_MODULE, .open = debugfs_devm_entry_open, .release = single_release, .read = seq_read, .llseek = seq_lseek }; /** * debugfs_create_devm_seqfile - create a debugfs file that is bound to device. * * @dev: device related to this debugfs file. * @name: name of the debugfs file. * @parent: a pointer to the parent dentry for this file. This should be a * directory dentry if set. If this parameter is %NULL, then the * file will be created in the root of the debugfs filesystem. * @read_fn: function pointer called to print the seq_file content. */ void debugfs_create_devm_seqfile(struct device *dev, const char *name, struct dentry *parent, int (*read_fn)(struct seq_file *s, void *data)) { struct debugfs_devm_entry *entry; if (IS_ERR(parent)) return; entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL); if (!entry) return; entry->read = read_fn; entry->dev = dev; debugfs_create_file(name, S_IRUGO, parent, entry, &debugfs_devm_entry_ops); } EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
102 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 // SPDX-License-Identifier: GPL-2.0 /* RTT/RTO calculation. * * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) * * https://tools.ietf.org/html/rfc6298 * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf */ #include <linux/net.h> #include "ar-internal.h" #define RXRPC_RTO_MAX (120 * USEC_PER_SEC) #define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * USEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ static u32 rxrpc_rto_min_us(struct rxrpc_call *call) { return 200; } static u32 __rxrpc_set_rto(const struct rxrpc_call *call) { return (call->srtt_us >> 3) + call->rttvar_us; } static u32 rxrpc_bound_rto(u32 rto) { return clamp(200000, rto + 100000, RXRPC_RTO_MAX); } /* * Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ static void rxrpc_rtt_estimator(struct rxrpc_call *call, long sample_rtt_us) { long m = sample_rtt_us; /* RTT */ u32 srtt = call->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. * This is designed to be as fast as possible * m stands for "measurement". * * On a 1990 paper the rto value is changed to: * RTO = rtt + 4 * mdev * * Funny. This algorithm seems to be very broken. * These formulae increase RTO, when it should be decreased, increase * too slowly, when it should be increased quickly, decrease too quickly * etc. I guess in BSD RTO takes ONE value, so that it is absolutely * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ if (srtt != 0) { m -= (srtt >> 3); /* m is now error in rtt est */ srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ m -= (call->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain * for mdev in this case (alpha*beta). * Like Eifel it also prevents growth of rto, * but also it limits too fast rto decreases, * happening in pure Eifel. */ if (m > 0) m >>= 3; } else { m -= (call->mdev_us >> 2); /* similar update on mdev */ } call->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ if (call->mdev_us > call->mdev_max_us) { call->mdev_max_us = call->mdev_us; if (call->mdev_max_us > call->rttvar_us) call->rttvar_us = call->mdev_max_us; } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ call->mdev_us = m << 1; /* make sure rto = 3*rtt */ call->rttvar_us = umax(call->mdev_us, rxrpc_rto_min_us(call)); call->mdev_max_us = call->rttvar_us; } call->srtt_us = umax(srtt, 1); } /* * Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ static void rxrpc_set_rto(struct rxrpc_call *call) { u32 rto; /* 1. If rtt variance happened to be less 50msec, it is hallucination. * It cannot be less due to utterly erratic ACK generation made * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ * to do with delayed acks, because at cwnd>2 true delack timeout * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ rto = __rxrpc_set_rto(call); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo * guarantees that rto is higher. */ call->rto_us = rxrpc_bound_rto(rto); } static void rxrpc_update_rtt_min(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) { /* Window size 5mins in approx usec (ipv4.sysctl_tcp_min_rtt_wlen) */ u32 wlen_us = 5ULL * NSEC_PER_SEC / 1024; minmax_running_min(&call->min_rtt, wlen_us, resp_time / 1024, (u32)rtt_us ? : jiffies_to_usecs(1)); } static void rxrpc_ack_update_rtt(struct rxrpc_call *call, ktime_t resp_time, long rtt_us) { if (rtt_us < 0) return; /* Update RACK min RTT [RFC8985 6.1 Step 1]. */ rxrpc_update_rtt_min(call, resp_time, rtt_us); rxrpc_rtt_estimator(call, rtt_us); rxrpc_set_rto(call); /* Only reset backoff on valid RTT measurement [RFC6298]. */ call->backoff = 0; } /* * Add RTT information to cache. This is called in softirq mode and has * exclusive access to the call RTT data. */ void rxrpc_call_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int rtt_slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, ktime_t send_time, ktime_t resp_time) { s64 rtt_us; rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); if (rtt_us < 0) return; rxrpc_ack_update_rtt(call, resp_time, rtt_us); if (call->rtt_count < 3) call->rtt_count++; call->rtt_taken++; WRITE_ONCE(call->peer->recent_srtt_us, call->srtt_us / 8); WRITE_ONCE(call->peer->recent_rto_us, call->rto_us); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, rtt_us, call->srtt_us, call->rto_us); } /* * Get the retransmission timeout to set in nanoseconds, backing it off each * time we retransmit. */ ktime_t rxrpc_get_rto_backoff(struct rxrpc_call *call, bool retrans) { u64 timo_us; u32 backoff = READ_ONCE(call->backoff); timo_us = call->rto_us; timo_us <<= backoff; if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) WRITE_ONCE(call->backoff, backoff + 1); if (timo_us < 1) timo_us = 1; return ns_to_ktime(timo_us * NSEC_PER_USEC); } void rxrpc_call_init_rtt(struct rxrpc_call *call) { call->rtt_last_req = KTIME_MIN; call->rto_us = RXRPC_TIMEOUT_INIT; call->mdev_us = RXRPC_TIMEOUT_INIT; call->backoff = 0; //minmax_reset(&call->rtt_min, rxrpc_jiffies32, ~0U); }
6 8 8 8 8 8 8 8 5 2 1 1 3 1 25 2 8 1093 1084 26 2 1090 13 238 44 1092 1050 205 54 3 20 3 3 7 1 1 99 3 54 1 11 54 11 54 19 4 4 5 5 27 2 25 19 3 1 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET 802.1Q VLAN * Ethernet-type device handling. * * Authors: Ben Greear <greearb@candelatech.com> * Please send support related email to: netdev@vger.kernel.org * VLAN Home Page: http://www.candelatech.com/~greear/vlan.html * * Fixes: * Fix for packet capture - Nick Eggleston <nick@dccinc.com>; * Add HW acceleration hooks - David S. Miller <davem@redhat.com>; * Correct all the locking - David S. Miller <davem@redhat.com>; * Use hash table for VLAN groups - David S. Miller <davem@redhat.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/capability.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/rculist.h> #include <net/p8022.h> #include <net/arp.h> #include <linux/rtnetlink.h> #include <linux/notifier.h> #include <net/rtnetlink.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/uaccess.h> #include <linux/if_vlan.h> #include "vlan.h" #include "vlanproc.h" #define DRV_VERSION "1.8" /* Global VLAN variables */ unsigned int vlan_net_id __read_mostly; const char vlan_fullname[] = "802.1Q VLAN Support"; const char vlan_version[] = DRV_VERSION; /* End of global variables definitions. */ static int vlan_group_prealloc_vid(struct vlan_group *vg, __be16 vlan_proto, u16 vlan_id) { struct net_device **array; unsigned int vidx; unsigned int size; int pidx; ASSERT_RTNL(); pidx = vlan_proto_idx(vlan_proto); if (pidx < 0) return -EINVAL; vidx = vlan_id / VLAN_GROUP_ARRAY_PART_LEN; array = vg->vlan_devices_arrays[pidx][vidx]; if (array != NULL) return 0; size = sizeof(struct net_device *) * VLAN_GROUP_ARRAY_PART_LEN; array = kzalloc(size, GFP_KERNEL_ACCOUNT); if (array == NULL) return -ENOBUFS; /* paired with smp_rmb() in __vlan_group_get_device() */ smp_wmb(); vg->vlan_devices_arrays[pidx][vidx] = array; return 0; } static void vlan_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev, struct vlan_dev_priv *vlan) { if (!(vlan->flags & VLAN_FLAG_BRIDGE_BINDING)) netif_stacked_transfer_operstate(rootdev, dev); } void unregister_vlan_dev(struct net_device *dev, struct list_head *head) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; struct vlan_info *vlan_info; struct vlan_group *grp; u16 vlan_id = vlan->vlan_id; ASSERT_RTNL(); vlan_info = rtnl_dereference(real_dev->vlan_info); BUG_ON(!vlan_info); grp = &vlan_info->grp; grp->nr_vlan_devs--; if (vlan->flags & VLAN_FLAG_MVRP) vlan_mvrp_request_leave(dev); if (vlan->flags & VLAN_FLAG_GVRP) vlan_gvrp_request_leave(dev); vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, NULL); netdev_upper_dev_unlink(real_dev, dev); /* Because unregister_netdevice_queue() makes sure at least one rcu * grace period is respected before device freeing, * we dont need to call synchronize_net() here. */ unregister_netdevice_queue(dev, head); if (grp->nr_vlan_devs == 0) { vlan_mvrp_uninit_applicant(real_dev); vlan_gvrp_uninit_applicant(real_dev); } vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); } int vlan_check_real_dev(struct net_device *real_dev, __be16 protocol, u16 vlan_id, struct netlink_ext_ack *extack) { const char *name = real_dev->name; if (real_dev->features & NETIF_F_VLAN_CHALLENGED) { pr_info("VLANs not supported on %s\n", name); NL_SET_ERR_MSG_MOD(extack, "VLANs not supported on device"); return -EOPNOTSUPP; } if (vlan_find_dev(real_dev, protocol, vlan_id) != NULL) { NL_SET_ERR_MSG_MOD(extack, "VLAN device already exists"); return -EEXIST; } return 0; } int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; u16 vlan_id = vlan->vlan_id; struct vlan_info *vlan_info; struct vlan_group *grp; int err; err = vlan_vid_add(real_dev, vlan->vlan_proto, vlan_id); if (err) return err; vlan_info = rtnl_dereference(real_dev->vlan_info); /* vlan_info should be there now. vlan_vid_add took care of it */ BUG_ON(!vlan_info); grp = &vlan_info->grp; if (grp->nr_vlan_devs == 0) { err = vlan_gvrp_init_applicant(real_dev); if (err < 0) goto out_vid_del; err = vlan_mvrp_init_applicant(real_dev); if (err < 0) goto out_uninit_gvrp; } err = vlan_group_prealloc_vid(grp, vlan->vlan_proto, vlan_id); if (err < 0) goto out_uninit_mvrp; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; err = netdev_upper_dev_link(real_dev, dev, extack); if (err) goto out_unregister_netdev; vlan_stacked_transfer_operstate(real_dev, dev, vlan); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ /* So, got the sucker initialized, now lets place * it into our local structure. */ vlan_group_set_device(grp, vlan->vlan_proto, vlan_id, dev); grp->nr_vlan_devs++; return 0; out_unregister_netdev: unregister_netdevice(dev); out_uninit_mvrp: if (grp->nr_vlan_devs == 0) vlan_mvrp_uninit_applicant(real_dev); out_uninit_gvrp: if (grp->nr_vlan_devs == 0) vlan_gvrp_uninit_applicant(real_dev); out_vid_del: vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); return err; } /* Attach a VLAN device to a mac address (ie Ethernet Card). * Returns 0 if the device was created or a negative error code otherwise. */ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) { struct net_device *new_dev; struct vlan_dev_priv *vlan; struct net *net = dev_net(real_dev); struct vlan_net *vn = net_generic(net, vlan_net_id); char name[IFNAMSIZ]; int err; if (vlan_id >= VLAN_VID_MASK) return -ERANGE; err = vlan_check_real_dev(real_dev, htons(ETH_P_8021Q), vlan_id, NULL); if (err < 0) return err; /* Gotta set up the fields for the device. */ switch (vn->name_type) { case VLAN_NAME_TYPE_RAW_PLUS_VID: /* name will look like: eth1.0005 */ snprintf(name, IFNAMSIZ, "%s.%.4i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: vlan5 */ snprintf(name, IFNAMSIZ, "vlan%i", vlan_id); break; case VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD: /* Put our vlan.VID in the name. * Name will look like: eth0.5 */ snprintf(name, IFNAMSIZ, "%s.%i", real_dev->name, vlan_id); break; case VLAN_NAME_TYPE_PLUS_VID: /* Put our vlan.VID in the name. * Name will look like: vlan0005 */ default: snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id); } new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, NET_NAME_UNKNOWN, vlan_setup); if (new_dev == NULL) return -ENOBUFS; dev_net_set(new_dev, net); /* need 4 bytes for extra VLAN header info, * hope the underlying device can handle it. */ new_dev->mtu = real_dev->mtu; vlan = vlan_dev_priv(new_dev); vlan->vlan_proto = htons(ETH_P_8021Q); vlan->vlan_id = vlan_id; vlan->real_dev = real_dev; vlan->dent = NULL; vlan->flags = VLAN_FLAG_REORDER_HDR; new_dev->rtnl_link_ops = &vlan_link_ops; err = register_vlan_dev(new_dev, NULL); if (err < 0) goto out_free_newdev; return 0; out_free_newdev: free_netdev(new_dev); return err; } static void vlan_sync_address(struct net_device *dev, struct net_device *vlandev) { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); /* May be called without an actual change */ if (ether_addr_equal(vlan->real_dev_addr, dev->dev_addr)) return; /* vlan continues to inherit address of lower device */ if (vlan_dev_inherit_address(vlandev, dev)) goto out; /* vlan address was different from the old address and is equal to * the new address */ if (!ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) && ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_del(dev, vlandev->dev_addr); /* vlan address was equal to the old address and is different from * the new address */ if (ether_addr_equal(vlandev->dev_addr, vlan->real_dev_addr) && !ether_addr_equal(vlandev->dev_addr, dev->dev_addr)) dev_uc_add(dev, vlandev->dev_addr); out: ether_addr_copy(vlan->real_dev_addr, dev->dev_addr); } static void vlan_transfer_features(struct net_device *dev, struct net_device *vlandev) { struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev); netif_inherit_tso_max(vlandev, dev); if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto)) vlandev->hard_header_len = dev->hard_header_len; else vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN; #if IS_ENABLED(CONFIG_FCOE) vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE; vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE); vlandev->hw_enc_features = vlan_tnl_features(vlan->real_dev); netdev_update_features(vlandev); } static int __vlan_device_event(struct net_device *dev, unsigned long event) { int err = 0; switch (event) { case NETDEV_CHANGENAME: vlan_proc_rem_dev(dev); err = vlan_proc_add_dev(dev); break; case NETDEV_REGISTER: err = vlan_proc_add_dev(dev); break; case NETDEV_UNREGISTER: vlan_proc_rem_dev(dev); break; } return err; } static int vlan_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(ptr); struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct vlan_group *grp; struct vlan_info *vlan_info; int i, flgs; struct net_device *vlandev; struct vlan_dev_priv *vlan; bool last = false; LIST_HEAD(list); int err; if (is_vlan_dev(dev)) { int err = __vlan_device_event(dev, event); if (err) return notifier_from_errno(err); } if ((event == NETDEV_UP) && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) { pr_info("adding VLAN 0 to HW filter on device %s\n", dev->name); vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } if (event == NETDEV_DOWN && (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) vlan_vid_del(dev, htons(ETH_P_8021Q), 0); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) goto out; grp = &vlan_info->grp; /* It is OK that we do not hold the group lock right now, * as we run under the RTNL lock. */ switch (event) { case NETDEV_CHANGE: /* Propagate real device state to vlan devices */ vlan_group_for_each_dev(grp, i, vlandev) vlan_stacked_transfer_operstate(dev, vlandev, vlan_dev_priv(vlandev)); break; case NETDEV_CHANGEADDR: /* Adjust unicast filters on underlying device */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; vlan_sync_address(dev, vlandev); } break; case NETDEV_CHANGEMTU: vlan_group_for_each_dev(grp, i, vlandev) { if (vlandev->mtu <= dev->mtu) continue; dev_set_mtu(vlandev, dev->mtu); } break; case NETDEV_FEAT_CHANGE: /* Propagate device features to underlying device */ vlan_group_for_each_dev(grp, i, vlandev) vlan_transfer_features(dev, vlandev); break; case NETDEV_DOWN: { struct net_device *tmp; LIST_HEAD(close_list); /* Put all VLANs for this dev in the down state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; if (!(flgs & IFF_UP)) continue; vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) list_add(&vlandev->close_list, &close_list); } dev_close_many(&close_list, false); list_for_each_entry_safe(vlandev, tmp, &close_list, close_list) { vlan_stacked_transfer_operstate(dev, vlandev, vlan_dev_priv(vlandev)); list_del_init(&vlandev->close_list); } list_del(&close_list); break; } case NETDEV_UP: /* Put all VLANs for this dev in the up state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = dev_get_flags(vlandev); if (flgs & IFF_UP) continue; vlan = vlan_dev_priv(vlandev); if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING)) dev_change_flags(vlandev, flgs | IFF_UP, extack); vlan_stacked_transfer_operstate(dev, vlandev, vlan); } break; case NETDEV_UNREGISTER: /* twiddle thumbs on netns device moves */ if (dev->reg_state != NETREG_UNREGISTERING) break; vlan_group_for_each_dev(grp, i, vlandev) { /* removal of last vid destroys vlan_info, abort * afterwards */ if (vlan_info->nr_vids == 1) last = true; unregister_vlan_dev(vlandev, &list); if (last) break; } unregister_netdevice_many(&list); break; case NETDEV_PRE_TYPE_CHANGE: /* Forbid underlaying device to change its type. */ if (vlan_uses_dev(dev)) return NOTIFY_BAD; break; case NETDEV_NOTIFY_PEERS: case NETDEV_BONDING_FAILOVER: case NETDEV_RESEND_IGMP: /* Propagate to vlan devices */ vlan_group_for_each_dev(grp, i, vlandev) call_netdevice_notifiers(event, vlandev); break; case NETDEV_CVLAN_FILTER_PUSH_INFO: err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021Q)); if (err) return notifier_from_errno(err); break; case NETDEV_CVLAN_FILTER_DROP_INFO: vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021Q)); break; case NETDEV_SVLAN_FILTER_PUSH_INFO: err = vlan_filter_push_vids(vlan_info, htons(ETH_P_8021AD)); if (err) return notifier_from_errno(err); break; case NETDEV_SVLAN_FILTER_DROP_INFO: vlan_filter_drop_vids(vlan_info, htons(ETH_P_8021AD)); break; } out: return NOTIFY_DONE; } static struct notifier_block vlan_notifier_block __read_mostly = { .notifier_call = vlan_device_event, }; /* * VLAN IOCTL handler. * o execute requested action or pass command to the device driver * arg is really a struct vlan_ioctl_args __user *. */ static int vlan_ioctl_handler(struct net *net, void __user *arg) { int err; struct vlan_ioctl_args args; struct net_device *dev = NULL; if (copy_from_user(&args, arg, sizeof(struct vlan_ioctl_args))) return -EFAULT; /* Null terminate this sucker, just in case. */ args.device1[sizeof(args.device1) - 1] = 0; args.u.device2[sizeof(args.u.device2) - 1] = 0; rtnl_lock(); switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: case SET_VLAN_EGRESS_PRIORITY_CMD: case SET_VLAN_FLAG_CMD: case ADD_VLAN_CMD: case DEL_VLAN_CMD: case GET_VLAN_REALDEV_NAME_CMD: case GET_VLAN_VID_CMD: err = -ENODEV; dev = __dev_get_by_name(net, args.device1); if (!dev) goto out; err = -EINVAL; if (args.cmd != ADD_VLAN_CMD && !is_vlan_dev(dev)) goto out; } switch (args.cmd) { case SET_VLAN_INGRESS_PRIORITY_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; vlan_dev_set_ingress_priority(dev, args.u.skb_priority, args.vlan_qos); err = 0; break; case SET_VLAN_EGRESS_PRIORITY_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = vlan_dev_set_egress_priority(dev, args.u.skb_priority, args.vlan_qos); break; case SET_VLAN_FLAG_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = vlan_dev_change_flags(dev, args.vlan_qos ? args.u.flag : 0, args.u.flag); break; case SET_VLAN_NAME_TYPE_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; if (args.u.name_type < VLAN_NAME_TYPE_HIGHEST) { struct vlan_net *vn; vn = net_generic(net, vlan_net_id); vn->name_type = args.u.name_type; err = 0; } else { err = -EINVAL; } break; case ADD_VLAN_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; err = register_vlan_device(dev, args.u.VID); break; case DEL_VLAN_CMD: err = -EPERM; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) break; unregister_vlan_dev(dev, NULL); err = 0; break; case GET_VLAN_REALDEV_NAME_CMD: err = 0; vlan_dev_get_realdev_name(dev, args.u.device2, sizeof(args.u.device2)); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) err = -EFAULT; break; case GET_VLAN_VID_CMD: err = 0; args.u.VID = vlan_dev_vlan_id(dev); if (copy_to_user(arg, &args, sizeof(struct vlan_ioctl_args))) err = -EFAULT; break; default: err = -EOPNOTSUPP; break; } out: rtnl_unlock(); return err; } static int __net_init vlan_init_net(struct net *net) { struct vlan_net *vn = net_generic(net, vlan_net_id); int err; vn->name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; err = vlan_proc_init(net); return err; } static void __net_exit vlan_exit_net(struct net *net) { vlan_proc_cleanup(net); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit = vlan_exit_net, .id = &vlan_net_id, .size = sizeof(struct vlan_net), }; static int __init vlan_proto_init(void) { int err; pr_info("%s v%s\n", vlan_fullname, vlan_version); err = register_pernet_subsys(&vlan_net_ops); if (err < 0) goto err0; err = register_netdevice_notifier(&vlan_notifier_block); if (err < 0) goto err2; err = vlan_gvrp_init(); if (err < 0) goto err3; err = vlan_mvrp_init(); if (err < 0) goto err4; err = vlan_netlink_init(); if (err < 0) goto err5; vlan_ioctl_set(vlan_ioctl_handler); return 0; err5: vlan_mvrp_uninit(); err4: vlan_gvrp_uninit(); err3: unregister_netdevice_notifier(&vlan_notifier_block); err2: unregister_pernet_subsys(&vlan_net_ops); err0: return err; } static void __exit vlan_cleanup_module(void) { vlan_ioctl_set(NULL); vlan_netlink_fini(); unregister_netdevice_notifier(&vlan_notifier_block); unregister_pernet_subsys(&vlan_net_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ vlan_mvrp_uninit(); vlan_gvrp_uninit(); } module_init(vlan_proto_init); module_exit(vlan_cleanup_module); MODULE_DESCRIPTION("802.1Q/802.1ad VLAN Protocol"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_VERSION);
330 1 501 502 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* delayacct.h - per-task delay accounting * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 */ #ifndef _LINUX_DELAYACCT_H #define _LINUX_DELAYACCT_H #include <uapi/linux/taskstats.h> #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info { raw_spinlock_t lock; /* For each stat XXX, add following, aligned appropriately * * struct timespec XXX_start, XXX_end; * u64 XXX_delay; * u32 XXX_count; * * Atomicity of updates to XXX_delay, XXX_count protected by * single lock above (split into XXX_lock if contention is an issue). */ /* * XXX_count is incremented on every XXX operation, the delay * associated with the operation is added to XXX_delay. * XXX_delay contains the accumulated delay time in nanoseconds. */ u64 blkio_start; u64 blkio_delay_max; u64 blkio_delay_min; u64 blkio_delay; /* wait for sync block io completion */ u64 swapin_start; u64 swapin_delay_max; u64 swapin_delay_min; u64 swapin_delay; /* wait for swapin */ u32 blkio_count; /* total count of the number of sync block */ /* io operations performed */ u32 swapin_count; /* total count of swapin */ u64 freepages_start; u64 freepages_delay_max; u64 freepages_delay_min; u64 freepages_delay; /* wait for memory reclaim */ u64 thrashing_start; u64 thrashing_delay_max; u64 thrashing_delay_min; u64 thrashing_delay; /* wait for thrashing page */ u64 compact_start; u64 compact_delay_max; u64 compact_delay_min; u64 compact_delay; /* wait for memory compact */ u64 wpcopy_start; u64 wpcopy_delay_max; u64 wpcopy_delay_min; u64 wpcopy_delay; /* wait for write-protect copy */ u64 irq_delay_max; u64 irq_delay_min; u64 irq_delay; /* wait for IRQ/SOFTIRQ */ u32 freepages_count; /* total count of memory reclaim */ u32 thrashing_count; /* total count of thrash waits */ u32 compact_count; /* total count of memory compact */ u32 wpcopy_count; /* total count of write-protect copy */ u32 irq_count; /* total count of IRQ/SOFTIRQ */ }; #endif #include <linux/sched.h> #include <linux/slab.h> #include <linux/jump_label.h> #ifdef CONFIG_TASK_DELAY_ACCT DECLARE_STATIC_KEY_FALSE(delayacct_key); extern int delayacct_on; /* Delay accounting turned on/off */ extern struct kmem_cache *delayacct_cache; extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); extern void __delayacct_blkio_end(struct task_struct *); extern int delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); extern void __delayacct_freepages_end(void); extern void __delayacct_thrashing_start(bool *in_thrashing); extern void __delayacct_thrashing_end(bool *in_thrashing); extern void __delayacct_swapin_start(void); extern void __delayacct_swapin_end(void); extern void __delayacct_compact_start(void); extern void __delayacct_compact_end(void); extern void __delayacct_wpcopy_start(void); extern void __delayacct_wpcopy_end(void); extern void __delayacct_irq(struct task_struct *task, u32 delta); static inline void delayacct_tsk_init(struct task_struct *tsk) { /* reinitialize in case parent's non-null pointer was dup'ed*/ tsk->delays = NULL; if (delayacct_on) __delayacct_tsk_init(tsk); } /* Free tsk->delays. Called from bad fork and __put_task_struct * where there's no risk of tsk->delays being accessed elsewhere */ static inline void delayacct_tsk_free(struct task_struct *tsk) { if (tsk->delays) kmem_cache_free(delayacct_cache, tsk->delays); tsk->delays = NULL; } static inline void delayacct_blkio_start(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_blkio_start(); } static inline void delayacct_blkio_end(struct task_struct *p) { if (!static_branch_unlikely(&delayacct_key)) return; if (p->delays) __delayacct_blkio_end(p); } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { if (tsk->delays) return __delayacct_blkio_ticks(tsk); return 0; } static inline void delayacct_freepages_start(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_freepages_start(); } static inline void delayacct_freepages_end(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_freepages_end(); } static inline void delayacct_thrashing_start(bool *in_thrashing) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_thrashing_start(in_thrashing); } static inline void delayacct_thrashing_end(bool *in_thrashing) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_thrashing_end(in_thrashing); } static inline void delayacct_swapin_start(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_swapin_start(); } static inline void delayacct_swapin_end(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_swapin_end(); } static inline void delayacct_compact_start(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_compact_start(); } static inline void delayacct_compact_end(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_compact_end(); } static inline void delayacct_wpcopy_start(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_wpcopy_start(); } static inline void delayacct_wpcopy_end(void) { if (!static_branch_unlikely(&delayacct_key)) return; if (current->delays) __delayacct_wpcopy_end(); } static inline void delayacct_irq(struct task_struct *task, u32 delta) { if (!static_branch_unlikely(&delayacct_key)) return; if (task->delays) __delayacct_irq(task, delta); } #else static inline void delayacct_init(void) {} static inline void delayacct_tsk_init(struct task_struct *tsk) {} static inline void delayacct_tsk_free(struct task_struct *tsk) {} static inline void delayacct_blkio_start(void) {} static inline void delayacct_blkio_end(struct task_struct *p) {} static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { return 0; } static inline __u64 delayacct_blkio_ticks(struct task_struct *tsk) { return 0; } static inline int delayacct_is_task_waiting_on_io(struct task_struct *p) { return 0; } static inline void delayacct_freepages_start(void) {} static inline void delayacct_freepages_end(void) {} static inline void delayacct_thrashing_start(bool *in_thrashing) {} static inline void delayacct_thrashing_end(bool *in_thrashing) {} static inline void delayacct_swapin_start(void) {} static inline void delayacct_swapin_end(void) {} static inline void delayacct_compact_start(void) {} static inline void delayacct_compact_end(void) {} static inline void delayacct_wpcopy_start(void) {} static inline void delayacct_wpcopy_end(void) {} static inline void delayacct_irq(struct task_struct *task, u32 delta) {} #endif /* CONFIG_TASK_DELAY_ACCT */ #endif
337 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_GENERIC_LOCAL64_H #define _ASM_GENERIC_LOCAL64_H #include <linux/percpu.h> #include <asm/types.h> /* * A signed long type for operations which are atomic for a single CPU. * Usually used in combination with per-cpu variables. * * This is the default implementation, which uses atomic64_t. Which is * rather pointless. The whole point behind local64_t is that some processors * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs * running on this CPU. local64_t allows exploitation of such capabilities. */ /* Implement in terms of atomics. */ #if BITS_PER_LONG == 64 #include <asm/local.h> typedef struct { local_t a; } local64_t; #define LOCAL64_INIT(i) { LOCAL_INIT(i) } #define local64_read(l) local_read(&(l)->a) #define local64_set(l,i) local_set((&(l)->a),(i)) #define local64_inc(l) local_inc(&(l)->a) #define local64_dec(l) local_dec(&(l)->a) #define local64_add(i,l) local_add((i),(&(l)->a)) #define local64_sub(i,l) local_sub((i),(&(l)->a)) #define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a)) #define local64_dec_and_test(l) local_dec_and_test(&(l)->a) #define local64_inc_and_test(l) local_inc_and_test(&(l)->a) #define local64_add_negative(i, l) local_add_negative((i), (&(l)->a)) #define local64_add_return(i, l) local_add_return((i), (&(l)->a)) #define local64_sub_return(i, l) local_sub_return((i), (&(l)->a)) #define local64_inc_return(l) local_inc_return(&(l)->a) static inline s64 local64_cmpxchg(local64_t *l, s64 old, s64 new) { return local_cmpxchg(&l->a, old, new); } static inline bool local64_try_cmpxchg(local64_t *l, s64 *old, s64 new) { return local_try_cmpxchg(&l->a, (long *)old, new); } #define local64_xchg(l, n) local_xchg((&(l)->a), (n)) #define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u)) #define local64_inc_not_zero(l) local_inc_not_zero(&(l)->a) /* Non-atomic variants, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ #define __local64_inc(l) local64_set((l), local64_read(l) + 1) #define __local64_dec(l) local64_set((l), local64_read(l) - 1) #define __local64_add(i,l) local64_set((l), local64_read(l) + (i)) #define __local64_sub(i,l) local64_set((l), local64_read(l) - (i)) #else /* BITS_PER_LONG != 64 */ #include <linux/atomic.h> /* Don't use typedef: don't want them to be mixed with atomic_t's. */ typedef struct { atomic64_t a; } local64_t; #define LOCAL64_INIT(i) { ATOMIC_LONG_INIT(i) } #define local64_read(l) atomic64_read(&(l)->a) #define local64_set(l,i) atomic64_set((&(l)->a),(i)) #define local64_inc(l) atomic64_inc(&(l)->a) #define local64_dec(l) atomic64_dec(&(l)->a) #define local64_add(i,l) atomic64_add((i),(&(l)->a)) #define local64_sub(i,l) atomic64_sub((i),(&(l)->a)) #define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a)) #define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a) #define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a) #define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a)) #define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a)) #define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a)) #define local64_inc_return(l) atomic64_inc_return(&(l)->a) #define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n)) #define local64_try_cmpxchg(l, po, n) atomic64_try_cmpxchg((&(l)->a), (po), (n)) #define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n)) #define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u)) #define local64_inc_not_zero(l) atomic64_inc_not_zero(&(l)->a) /* Non-atomic variants, ie. preemption disabled and won't be touched * in interrupt, etc. Some archs can optimize this case well. */ #define __local64_inc(l) local64_set((l), local64_read(l) + 1) #define __local64_dec(l) local64_set((l), local64_read(l) - 1) #define __local64_add(i,l) local64_set((l), local64_read(l) + (i)) #define __local64_sub(i,l) local64_set((l), local64_read(l) - (i)) #endif /* BITS_PER_LONG != 64 */ #endif /* _ASM_GENERIC_LOCAL64_H */
17 5 5 7 1 17 17 22 2 25 24 1 29 29 16 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2007-2017 Nicira, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "flow.h" #include "datapath.h" #include <linux/uaccess.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <net/llc_pdu.h> #include <linux/kernel.h> #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/llc.h> #include <linux/module.h> #include <linux/in.h> #include <linux/rcupdate.h> #include <linux/if_arp.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/sctp.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <linux/icmpv6.h> #include <linux/rculist.h> #include <net/geneve.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/mpls.h> #include <net/vxlan.h> #include <net/tun_proto.h> #include <net/erspan.h> #include "drop.h" #include "flow_netlink.h" struct ovs_len_tbl { int len; const struct ovs_len_tbl *next; }; #define OVS_ATTR_NESTED -1 #define OVS_ATTR_VARIABLE -2 #define OVS_COPY_ACTIONS_MAX_DEPTH 16 static bool actions_may_change_flow(const struct nlattr *actions) { struct nlattr *nla; int rem; nla_for_each_nested(nla, actions, rem) { u16 action = nla_type(nla); switch (action) { case OVS_ACTION_ATTR_OUTPUT: case OVS_ACTION_ATTR_RECIRC: case OVS_ACTION_ATTR_TRUNC: case OVS_ACTION_ATTR_USERSPACE: case OVS_ACTION_ATTR_DROP: case OVS_ACTION_ATTR_PSAMPLE: break; case OVS_ACTION_ATTR_CT: case OVS_ACTION_ATTR_CT_CLEAR: case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_POP_ETH: case OVS_ACTION_ATTR_POP_MPLS: case OVS_ACTION_ATTR_POP_NSH: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_ETH: case OVS_ACTION_ATTR_PUSH_MPLS: case OVS_ACTION_ATTR_PUSH_NSH: case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_METER: case OVS_ACTION_ATTR_CHECK_PKT_LEN: case OVS_ACTION_ATTR_ADD_MPLS: case OVS_ACTION_ATTR_DEC_TTL: default: return true; } } return false; } static void update_range(struct sw_flow_match *match, size_t offset, size_t size, bool is_mask) { struct sw_flow_key_range *range; size_t start = rounddown(offset, sizeof(long)); size_t end = roundup(offset + size, sizeof(long)); if (!is_mask) range = &match->range; else range = &match->mask->range; if (range->start == range->end) { range->start = start; range->end = end; return; } if (range->start > start) range->start = start; if (range->end < end) range->end = end; } #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ do { \ update_range(match, offsetof(struct sw_flow_key, field), \ sizeof((match)->key->field), is_mask); \ if (is_mask) \ (match)->mask->key.field = value; \ else \ (match)->key->field = value; \ } while (0) #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ do { \ update_range(match, offset, len, is_mask); \ if (is_mask) \ memcpy((u8 *)&(match)->mask->key + offset, value_p, \ len); \ else \ memcpy((u8 *)(match)->key + offset, value_p, len); \ } while (0) #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask) \ SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ value_p, len, is_mask) #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ do { \ update_range(match, offsetof(struct sw_flow_key, field), \ sizeof((match)->key->field), is_mask); \ if (is_mask) \ memset((u8 *)&(match)->mask->key.field, value, \ sizeof((match)->mask->key.field)); \ else \ memset((u8 *)&(match)->key->field, value, \ sizeof((match)->key->field)); \ } while (0) #define SW_FLOW_KEY_BITMAP_COPY(match, field, value_p, nbits, is_mask) ({ \ update_range(match, offsetof(struct sw_flow_key, field), \ bitmap_size(nbits), is_mask); \ bitmap_copy(is_mask ? (match)->mask->key.field : (match)->key->field, \ value_p, nbits); \ }) static bool match_validate(const struct sw_flow_match *match, u64 key_attrs, u64 mask_attrs, bool log) { u64 key_expected = 0; u64 mask_allowed = key_attrs; /* At most allow all key attributes */ /* The following mask attributes allowed only if they * pass the validation tests. */ mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4) | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4) | (1 << OVS_KEY_ATTR_IPV6) | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6) | (1 << OVS_KEY_ATTR_TCP) | (1 << OVS_KEY_ATTR_TCP_FLAGS) | (1 << OVS_KEY_ATTR_UDP) | (1 << OVS_KEY_ATTR_SCTP) | (1 << OVS_KEY_ATTR_ICMP) | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) | (1 << OVS_KEY_ATTR_ND) | (1 << OVS_KEY_ATTR_MPLS) | (1 << OVS_KEY_ATTR_NSH)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) | (1 << OVS_KEY_ATTR_IN_PORT) | (1 << OVS_KEY_ATTR_ETHERTYPE)); /* Check key attributes. */ if (match->key->eth.type == htons(ETH_P_ARP) || match->key->eth.type == htons(ETH_P_RARP)) { key_expected |= 1 << OVS_KEY_ATTR_ARP; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } if (eth_p_mpls(match->key->eth.type)) { key_expected |= 1 << OVS_KEY_ATTR_MPLS; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; } if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1 << OVS_KEY_ATTR_IPV4; if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_IPV4; mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4; } if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { key_expected |= 1 << OVS_KEY_ATTR_UDP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_UDP; } if (match->key->ip.proto == IPPROTO_SCTP) { key_expected |= 1 << OVS_KEY_ATTR_SCTP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; } if (match->key->ip.proto == IPPROTO_TCP) { key_expected |= 1 << OVS_KEY_ATTR_TCP; key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; if (match->mask && (match->mask->key.ip.proto == 0xff)) { mask_allowed |= 1 << OVS_KEY_ATTR_TCP; mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; } } if (match->key->ip.proto == IPPROTO_ICMP) { key_expected |= 1 << OVS_KEY_ATTR_ICMP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_ICMP; } } } if (match->key->eth.type == htons(ETH_P_IPV6)) { key_expected |= 1 << OVS_KEY_ATTR_IPV6; if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_IPV6; mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6; } if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) { if (match->key->ip.proto == IPPROTO_UDP) { key_expected |= 1 << OVS_KEY_ATTR_UDP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_UDP; } if (match->key->ip.proto == IPPROTO_SCTP) { key_expected |= 1 << OVS_KEY_ATTR_SCTP; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_SCTP; } if (match->key->ip.proto == IPPROTO_TCP) { key_expected |= 1 << OVS_KEY_ATTR_TCP; key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS; if (match->mask && (match->mask->key.ip.proto == 0xff)) { mask_allowed |= 1 << OVS_KEY_ATTR_TCP; mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS; } } if (match->key->ip.proto == IPPROTO_ICMPV6) { key_expected |= 1 << OVS_KEY_ATTR_ICMPV6; if (match->mask && (match->mask->key.ip.proto == 0xff)) mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6; if (match->key->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { key_expected |= 1 << OVS_KEY_ATTR_ND; /* Original direction conntrack tuple * uses the same space as the ND fields * in the key, so both are not allowed * at the same time. */ mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); if (match->mask && (match->mask->key.tp.src == htons(0xff))) mask_allowed |= 1 << OVS_KEY_ATTR_ND; } } } } if (match->key->eth.type == htons(ETH_P_NSH)) { key_expected |= 1 << OVS_KEY_ATTR_NSH; if (match->mask && match->mask->key.eth.type == htons(0xffff)) { mask_allowed |= 1 << OVS_KEY_ATTR_NSH; } } if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", (unsigned long long)key_attrs, (unsigned long long)key_expected); return false; } if ((mask_attrs & mask_allowed) != mask_attrs) { /* Mask attributes check failed. */ OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)", (unsigned long long)mask_attrs, (unsigned long long)mask_allowed); return false; } return true; } size_t ovs_tun_key_attr_size(void) { /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider * updating this function. */ return nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and * OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS is mutually exclusive with * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ } static size_t ovs_nsh_key_attr_size(void) { /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider * updating this function. */ return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */ /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are * mutually exclusive, so the bigger one can cover * the small one. */ + nla_total_size(NSH_CTX_HDRS_MAX_LEN); } size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ BUILD_BUG_ON(OVS_KEY_ATTR_MAX != 32); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + ovs_tun_key_attr_size() + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_STATE */ + nla_total_size(2) /* OVS_KEY_ATTR_CT_ZONE */ + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + nla_total_size(0) /* OVS_KEY_ATTR_NSH */ + ovs_nsh_key_attr_size() + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ + nla_total_size(28) /* OVS_KEY_ATTR_ND */ + nla_total_size(2); /* OVS_KEY_ATTR_IPV6_EXTHDRS */ } static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = { [OVS_VXLAN_EXT_GBP] = { .len = sizeof(u32) }, }; static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = { [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) }, [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) }, [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) }, [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 }, [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 }, [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 }, [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 }, [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) }, [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 }, [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE }, [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED, .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = OVS_ATTR_VARIABLE }, [OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE] = { .len = 0 }, }; static const struct ovs_len_tbl ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = { [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) }, [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) }, [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED }, [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) }, [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) }, [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) }, [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) }, [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) }, [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) }, [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) }, [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) }, [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) }, [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) }, [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) }, [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) }, [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) }, [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED, .next = ovs_tunnel_key_lens, }, [OVS_KEY_ATTR_MPLS] = { .len = OVS_ATTR_VARIABLE }, [OVS_KEY_ATTR_CT_STATE] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_ZONE] = { .len = sizeof(u16) }, [OVS_KEY_ATTR_CT_MARK] = { .len = sizeof(u32) }, [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) }, [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = { .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, .next = ovs_nsh_key_attr_lens, }, [OVS_KEY_ATTR_IPV6_EXTHDRS] = { .len = sizeof(struct ovs_key_ipv6_exthdrs) }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) { return expected_len == attr_len || expected_len == OVS_ATTR_NESTED || expected_len == OVS_ATTR_VARIABLE; } static bool is_all_zero(const u8 *fp, size_t size) { int i; if (!fp) return false; for (i = 0; i < size; i++) if (fp[i]) return false; return true; } static int __parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool log, bool nz) { const struct nlattr *nla; u64 attrs; int rem; attrs = *attrsp; nla_for_each_nested(nla, attr, rem) { u16 type = nla_type(nla); int expected_len; if (type > OVS_KEY_ATTR_MAX) { OVS_NLERR(log, "Key type %d is out of range max %d", type, OVS_KEY_ATTR_MAX); return -EINVAL; } if (type == OVS_KEY_ATTR_PACKET_TYPE || type == OVS_KEY_ATTR_ND_EXTENSIONS || type == OVS_KEY_ATTR_TUNNEL_INFO) { OVS_NLERR(log, "Key type %d is not supported", type); return -EINVAL; } if (attrs & (1ULL << type)) { OVS_NLERR(log, "Duplicate key (type %d).", type); return -EINVAL; } expected_len = ovs_key_lens[type].len; if (!check_attr_len(nla_len(nla), expected_len)) { OVS_NLERR(log, "Key %d has unexpected len %d expected %d", type, nla_len(nla), expected_len); return -EINVAL; } if (!nz || !is_all_zero(nla_data(nla), nla_len(nla))) { attrs |= 1ULL << type; a[type] = nla; } } if (rem) { OVS_NLERR(log, "Message has %d unknown bytes.", rem); return -EINVAL; } *attrsp = attrs; return 0; } static int parse_flow_mask_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool log) { return __parse_flow_nlattrs(attr, a, attrsp, log, true); } int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[], u64 *attrsp, bool log) { return __parse_flow_nlattrs(attr, a, attrsp, log, false); } static int genev_tun_opt_from_nlattr(const struct nlattr *a, struct sw_flow_match *match, bool is_mask, bool log) { unsigned long opt_key_offset; if (nla_len(a) > sizeof(match->key->tun_opts)) { OVS_NLERR(log, "Geneve option length err (len %d, max %zu).", nla_len(a), sizeof(match->key->tun_opts)); return -EINVAL; } if (nla_len(a) % 4 != 0) { OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.", nla_len(a)); return -EINVAL; } /* We need to record the length of the options passed * down, otherwise packets with the same format but * additional options will be silently matched. */ if (!is_mask) { SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a), false); } else { /* This is somewhat unusual because it looks at * both the key and mask while parsing the * attributes (and by extension assumes the key * is parsed first). Normally, we would verify * that each is the correct length and that the * attributes line up in the validate function. * However, that is difficult because this is * variable length and we won't have the * information later. */ if (match->key->tun_opts_len != nla_len(a)) { OVS_NLERR(log, "Geneve option len %d != mask len %d", match->key->tun_opts_len, nla_len(a)); return -EINVAL; } SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); } opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) { struct nlattr *a; int rem; unsigned long opt_key_offset; struct vxlan_metadata opts; BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); memset(&opts, 0, sizeof(opts)); nla_for_each_nested(a, attr, rem) { int type = nla_type(a); if (type > OVS_VXLAN_EXT_MAX) { OVS_NLERR(log, "VXLAN extension %d out of range max %d", type, OVS_VXLAN_EXT_MAX); return -EINVAL; } if (!check_attr_len(nla_len(a), ovs_vxlan_ext_key_lens[type].len)) { OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d", type, nla_len(a), ovs_vxlan_ext_key_lens[type].len); return -EINVAL; } switch (type) { case OVS_VXLAN_EXT_GBP: opts.gbp = nla_get_u32(a); break; default: OVS_NLERR(log, "Unknown VXLAN extension attribute %d", type); return -EINVAL; } } if (rem) { OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.", rem); return -EINVAL; } if (!is_mask) SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false); else SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), is_mask); return 0; } static int erspan_tun_opt_from_nlattr(const struct nlattr *a, struct sw_flow_match *match, bool is_mask, bool log) { unsigned long opt_key_offset; BUILD_BUG_ON(sizeof(struct erspan_metadata) > sizeof(match->key->tun_opts)); if (nla_len(a) > sizeof(match->key->tun_opts)) { OVS_NLERR(log, "ERSPAN option length err (len %d, max %zu).", nla_len(a), sizeof(match->key->tun_opts)); return -EINVAL; } if (!is_mask) SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(struct erspan_metadata), false); else SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true); opt_key_offset = TUN_METADATA_OFFSET(nla_len(a)); SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a), nla_len(a), is_mask); return 0; } static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) { bool ttl = false, ipv4 = false, ipv6 = false; IP_TUNNEL_DECLARE_FLAGS(tun_flags) = { }; bool info_bridge_mode = false; int opts_type = 0; struct nlattr *a; int rem; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int err; if (type > OVS_TUNNEL_KEY_ATTR_MAX) { OVS_NLERR(log, "Tunnel attr %d out of range max %d", type, OVS_TUNNEL_KEY_ATTR_MAX); return -EINVAL; } if (!check_attr_len(nla_len(a), ovs_tunnel_key_lens[type].len)) { OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_tunnel_key_lens[type].len); return -EINVAL; } switch (type) { case OVS_TUNNEL_KEY_ATTR_ID: SW_FLOW_KEY_PUT(match, tun_key.tun_id, nla_get_be64(a), is_mask); __set_bit(IP_TUNNEL_KEY_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src, nla_get_in_addr(a), is_mask); ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV4_DST: SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst, nla_get_in_addr(a), is_mask); ipv4 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src, nla_get_in6_addr(a), is_mask); ipv6 = true; break; case OVS_TUNNEL_KEY_ATTR_IPV6_DST: SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst, nla_get_in6_addr(a), is_mask); ipv6 = true; break; case OVS_TUNNEL_KEY_ATTR_TOS: SW_FLOW_KEY_PUT(match, tun_key.tos, nla_get_u8(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TTL: SW_FLOW_KEY_PUT(match, tun_key.ttl, nla_get_u8(a), is_mask); ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: __set_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_CSUM: __set_bit(IP_TUNNEL_CSUM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_TP_SRC: SW_FLOW_KEY_PUT(match, tun_key.tp_src, nla_get_be16(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_TP_DST: SW_FLOW_KEY_PUT(match, tun_key.tp_dst, nla_get_be16(a), is_mask); break; case OVS_TUNNEL_KEY_ATTR_OAM: __set_bit(IP_TUNNEL_OAM_BIT, tun_flags); break; case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: if (opts_type) { OVS_NLERR(log, "Multiple metadata blocks provided"); return -EINVAL; } err = genev_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: if (opts_type) { OVS_NLERR(log, "Multiple metadata blocks provided"); return -EINVAL; } err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_PAD: break; case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: if (opts_type) { OVS_NLERR(log, "Multiple metadata blocks provided"); return -EINVAL; } err = erspan_tun_opt_from_nlattr(a, match, is_mask, log); if (err) return err; __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_flags); opts_type = type; break; case OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE: info_bridge_mode = true; ipv4 = true; break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); return -EINVAL; } } SW_FLOW_KEY_BITMAP_COPY(match, tun_key.tun_flags, tun_flags, __IP_TUNNEL_FLAG_NUM, is_mask); if (is_mask) SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true); else SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET, false); if (rem > 0) { OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.", rem); return -EINVAL; } if (ipv4 && ipv6) { OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes"); return -EINVAL; } if (!is_mask) { if (!ipv4 && !ipv6) { OVS_NLERR(log, "IP tunnel dst address not specified"); return -EINVAL; } if (ipv4) { if (info_bridge_mode) { __clear_bit(IP_TUNNEL_KEY_BIT, tun_flags); if (match->key->tun_key.u.ipv4.src || match->key->tun_key.u.ipv4.dst || match->key->tun_key.tp_src || match->key->tun_key.tp_dst || match->key->tun_key.ttl || match->key->tun_key.tos || !ip_tunnel_flags_empty(tun_flags)) { OVS_NLERR(log, "IPv4 tun info is not correct"); return -EINVAL; } } else if (!match->key->tun_key.u.ipv4.dst) { OVS_NLERR(log, "IPv4 tunnel dst address is zero"); return -EINVAL; } } if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) { OVS_NLERR(log, "IPv6 tunnel dst address is zero"); return -EINVAL; } if (!ttl && !info_bridge_mode) { OVS_NLERR(log, "IP tunnel TTL not specified."); return -EINVAL; } } return opts_type; } static int vxlan_opt_to_nlattr(struct sk_buff *skb, const void *tun_opts, int swkey_tun_opts_len) { const struct vxlan_metadata *opts = tun_opts; struct nlattr *nla; nla = nla_nest_start_noflag(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS); if (!nla) return -EMSGSIZE; if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0) return -EMSGSIZE; nla_nest_end(skb, nla); return 0; } static int __ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *output, const void *tun_opts, int swkey_tun_opts_len, unsigned short tun_proto, u8 mode) { if (test_bit(IP_TUNNEL_KEY_BIT, output->tun_flags) && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id, OVS_TUNNEL_KEY_ATTR_PAD)) return -EMSGSIZE; if (mode & IP_TUNNEL_INFO_BRIDGE) return nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE) ? -EMSGSIZE : 0; switch (tun_proto) { case AF_INET: if (output->u.ipv4.src && nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC, output->u.ipv4.src)) return -EMSGSIZE; if (output->u.ipv4.dst && nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST, output->u.ipv4.dst)) return -EMSGSIZE; break; case AF_INET6: if (!ipv6_addr_any(&output->u.ipv6.src) && nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC, &output->u.ipv6.src)) return -EMSGSIZE; if (!ipv6_addr_any(&output->u.ipv6.dst) && nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST, &output->u.ipv6.dst)) return -EMSGSIZE; break; } if (output->tos && nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos)) return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl)) return -EMSGSIZE; if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT)) return -EMSGSIZE; if (test_bit(IP_TUNNEL_CSUM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM)) return -EMSGSIZE; if (output->tp_src && nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src)) return -EMSGSIZE; if (output->tp_dst && nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst)) return -EMSGSIZE; if (test_bit(IP_TUNNEL_OAM_BIT, output->tun_flags) && nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM)) return -EMSGSIZE; if (swkey_tun_opts_len) { if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; else if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, output->tun_flags) && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, output->tun_flags) && nla_put(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, swkey_tun_opts_len, tun_opts)) return -EMSGSIZE; } return 0; } static int ip_tun_to_nlattr(struct sk_buff *skb, const struct ip_tunnel_key *output, const void *tun_opts, int swkey_tun_opts_len, unsigned short tun_proto, u8 mode) { struct nlattr *nla; int err; nla = nla_nest_start_noflag(skb, OVS_KEY_ATTR_TUNNEL); if (!nla) return -EMSGSIZE; err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len, tun_proto, mode); if (err) return err; nla_nest_end(skb, nla); return 0; } int ovs_nla_put_tunnel_info(struct sk_buff *skb, struct ip_tunnel_info *tun_info) { return __ip_tun_to_nlattr(skb, &tun_info->key, ip_tunnel_info_opts(tun_info), tun_info->options_len, ip_tunnel_info_af(tun_info), tun_info->mode); } static int encode_vlan_from_nlattrs(struct sw_flow_match *match, const struct nlattr *a[], bool is_mask, bool inner) { __be16 tci = 0; __be16 tpid = 0; if (a[OVS_KEY_ATTR_VLAN]) tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (a[OVS_KEY_ATTR_ETHERTYPE]) tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); if (likely(!inner)) { SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); } else { SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); } return 0; } static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, u64 key_attrs, bool inner, const struct nlattr **a, bool log) { __be16 tci = 0; if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { /* Not a VLAN. */ return 0; } if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); return -EINVAL; } if (a[OVS_KEY_ATTR_VLAN]) tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (!(tci & htons(VLAN_CFI_MASK))) { if (tci) { OVS_NLERR(log, "%s TCI does not have VLAN_CFI_MASK bit set.", (inner) ? "C-VLAN" : "VLAN"); return -EINVAL; } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { /* Corner case for truncated VLAN header. */ OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", (inner) ? "C-VLAN" : "VLAN"); return -EINVAL; } } return 1; } static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, u64 key_attrs, bool inner, const struct nlattr **a, bool log) { __be16 tci = 0; __be16 tpid = 0; bool encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK)); bool i_encap_valid = !!(match->key->eth.cvlan.tci & htons(VLAN_CFI_MASK)); if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { /* Not a VLAN. */ return 0; } if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", (inner) ? "C-VLAN" : "VLAN"); return -EINVAL; } if (a[OVS_KEY_ATTR_VLAN]) tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); if (a[OVS_KEY_ATTR_ETHERTYPE]) tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); if (tpid != htons(0xffff)) { OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); return -EINVAL; } if (!(tci & htons(VLAN_CFI_MASK))) { OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_CFI_MASK bit.", (inner) ? "C-VLAN" : "VLAN"); return -EINVAL; } return 1; } static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, u64 *key_attrs, bool inner, const struct nlattr **a, bool is_mask, bool log) { int err; const struct nlattr *encap; if (!is_mask) err = validate_vlan_from_nlattrs(match, *key_attrs, inner, a, log); else err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, a, log); if (err <= 0) return err; err = encode_vlan_from_nlattrs(match, a, is_mask, inner); if (err) return err; *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); encap = a[OVS_KEY_ATTR_ENCAP]; if (!is_mask) err = parse_flow_nlattrs(encap, a, key_attrs, log); else err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); return err; } static int parse_vlan_from_nlattrs(struct sw_flow_match *match, u64 *key_attrs, const struct nlattr **a, bool is_mask, bool log) { int err; bool encap_valid = false; err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, is_mask, log); if (err) return err; encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_CFI_MASK)); if (encap_valid) { err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, is_mask, log); if (err) return err; } return 0; } static int parse_eth_type_from_nlattrs(struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) { __be16 eth_type; eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); if (is_mask) { /* Always exact match EtherType. */ eth_type = htons(0xffff); } else if (!eth_proto_is_802_3(eth_type)) { OVS_NLERR(log, "EtherType %x is less than min %x", ntohs(eth_type), ETH_P_802_3_MIN); return -EINVAL; } SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); return 0; } static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) { u8 mac_proto = MAC_PROTO_ETHERNET; if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) { u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]); SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH); } if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) { u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]); SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID); } if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) { SW_FLOW_KEY_PUT(match, phy.priority, nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY); } if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); if (is_mask) { in_port = 0xffffffff; /* Always exact match in_port. */ } else if (in_port >= DP_MAX_PORTS) { OVS_NLERR(log, "Port %d exceeds max allowable %d", in_port, DP_MAX_PORTS); return -EINVAL; } SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); } else if (!is_mask) { SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask); } if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) { uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]); SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK); } if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) { if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match, is_mask, log) < 0) return -EINVAL; *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL); } if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) { u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]); if (ct_state & ~CT_SUPPORTED_MASK) { OVS_NLERR(log, "ct_state flags %08x unsupported", ct_state); return -EINVAL; } SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) { u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]); SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE); } if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) { u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]); SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK); } if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) && ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) { const struct ovs_key_ct_labels *cl; cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]); SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels, sizeof(*cl), is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS); } if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) { const struct ovs_key_ct_tuple_ipv4 *ct; ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]); SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask); SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4); } if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) { const struct ovs_key_ct_tuple_ipv6 *ct; ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]); SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src, sizeof(match->key->ipv6.ct_orig.src), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst, sizeof(match->key->ipv6.ct_orig.dst), is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask); SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask); SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask); *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6); } /* For layer 3 packets the Ethernet type is provided * and treated as metadata but no MAC addresses are provided. */ if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) && (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE))) mac_proto = MAC_PROTO_NONE; /* Always exact match mac_proto */ SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask); if (mac_proto == MAC_PROTO_NONE) return parse_eth_type_from_nlattrs(match, attrs, a, is_mask, log); return 0; } int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, size_t size) { struct nlattr *a; int rem; u8 flags = 0; u8 ttl = 0; int mdlen = 0; /* validate_nsh has check this, so we needn't do duplicate check here */ if (size < NSH_BASE_HDR_LEN) return -ENOBUFS; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); switch (type) { case OVS_NSH_KEY_ATTR_BASE: { const struct ovs_nsh_key_base *base = nla_data(a); flags = base->flags; ttl = base->ttl; nh->np = base->np; nh->mdtype = base->mdtype; nh->path_hdr = base->path_hdr; break; } case OVS_NSH_KEY_ATTR_MD1: mdlen = nla_len(a); if (mdlen > size - NSH_BASE_HDR_LEN) return -ENOBUFS; memcpy(&nh->md1, nla_data(a), mdlen); break; case OVS_NSH_KEY_ATTR_MD2: mdlen = nla_len(a); if (mdlen > size - NSH_BASE_HDR_LEN) return -ENOBUFS; memcpy(&nh->md2, nla_data(a), mdlen); break; default: return -EINVAL; } } /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ nh->ver_flags_ttl_len = 0; nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); return 0; } int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) { struct nlattr *a; int rem; /* validate_nsh has check this, so we needn't do duplicate check here */ nla_for_each_nested(a, attr, rem) { int type = nla_type(a); switch (type) { case OVS_NSH_KEY_ATTR_BASE: { const struct ovs_nsh_key_base *base = nla_data(a); const struct ovs_nsh_key_base *base_mask = base + 1; nsh->base = *base; nsh_mask->base = *base_mask; break; } case OVS_NSH_KEY_ATTR_MD1: { const struct ovs_nsh_key_md1 *md1 = nla_data(a); const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; memcpy(nsh->context, md1->context, sizeof(*md1)); memcpy(nsh_mask->context, md1_mask->context, sizeof(*md1_mask)); break; } case OVS_NSH_KEY_ATTR_MD2: /* Not supported yet */ return -ENOTSUPP; default: return -EINVAL; } } return 0; } static int nsh_key_put_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool is_push_nsh, bool log) { struct nlattr *a; int rem; bool has_base = false; bool has_md1 = false; bool has_md2 = false; u8 mdtype = 0; int mdlen = 0; if (WARN_ON(is_push_nsh && is_mask)) return -EINVAL; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); int i; if (type > OVS_NSH_KEY_ATTR_MAX) { OVS_NLERR(log, "nsh attr %d is out of range max %d", type, OVS_NSH_KEY_ATTR_MAX); return -EINVAL; } if (!check_attr_len(nla_len(a), ovs_nsh_key_attr_lens[type].len)) { OVS_NLERR( log, "nsh attr %d has unexpected len %d expected %d", type, nla_len(a), ovs_nsh_key_attr_lens[type].len ); return -EINVAL; } switch (type) { case OVS_NSH_KEY_ATTR_BASE: { const struct ovs_nsh_key_base *base = nla_data(a); has_base = true; mdtype = base->mdtype; SW_FLOW_KEY_PUT(match, nsh.base.flags, base->flags, is_mask); SW_FLOW_KEY_PUT(match, nsh.base.ttl, base->ttl, is_mask); SW_FLOW_KEY_PUT(match, nsh.base.mdtype, base->mdtype, is_mask); SW_FLOW_KEY_PUT(match, nsh.base.np, base->np, is_mask); SW_FLOW_KEY_PUT(match, nsh.base.path_hdr, base->path_hdr, is_mask); break; } case OVS_NSH_KEY_ATTR_MD1: { const struct ovs_nsh_key_md1 *md1 = nla_data(a); has_md1 = true; for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) SW_FLOW_KEY_PUT(match, nsh.context[i], md1->context[i], is_mask); break; } case OVS_NSH_KEY_ATTR_MD2: if (!is_push_nsh) /* Not supported MD type 2 yet */ return -ENOTSUPP; has_md2 = true; mdlen = nla_len(a); if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) { OVS_NLERR( log, "Invalid MD length %d for MD type %d", mdlen, mdtype ); return -EINVAL; } break; default: OVS_NLERR(log, "Unknown nsh attribute %d", type); return -EINVAL; } } if (rem > 0) { OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem); return -EINVAL; } if (has_md1 && has_md2) { OVS_NLERR( 1, "invalid nsh attribute: md1 and md2 are exclusive." ); return -EINVAL; } if (!is_mask) { if ((has_md1 && mdtype != NSH_M_TYPE1) || (has_md2 && mdtype != NSH_M_TYPE2)) { OVS_NLERR(1, "nsh attribute has unmatched MD type %d.", mdtype); return -EINVAL; } if (is_push_nsh && (!has_base || (!has_md1 && !has_md2))) { OVS_NLERR( 1, "push_nsh: missing base or metadata attributes" ); return -EINVAL; } } return 0; } static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask, bool log) { int err; err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log); if (err) return err; if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) { const struct ovs_key_ethernet *eth_key; eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]); SW_FLOW_KEY_MEMCPY(match, eth.src, eth_key->eth_src, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, eth.dst, eth_key->eth_dst, ETH_ALEN, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET); if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { /* VLAN attribute is always parsed before getting here since it * may occur multiple times. */ OVS_NLERR(log, "VLAN attribute unexpected."); return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask, log); if (err) return err; } else if (!is_mask) { SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask); } } else if (!match->key->eth.type) { OVS_NLERR(log, "Either Ethernet header or EtherType is required."); return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_IPV4)) { const struct ovs_key_ipv4 *ipv4_key; ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]); if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) { OVS_NLERR(log, "IPv4 frag type %d is out of range max %d", ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } SW_FLOW_KEY_PUT(match, ip.proto, ipv4_key->ipv4_proto, is_mask); SW_FLOW_KEY_PUT(match, ip.tos, ipv4_key->ipv4_tos, is_mask); SW_FLOW_KEY_PUT(match, ip.ttl, ipv4_key->ipv4_ttl, is_mask); SW_FLOW_KEY_PUT(match, ip.frag, ipv4_key->ipv4_frag, is_mask); SW_FLOW_KEY_PUT(match, ipv4.addr.src, ipv4_key->ipv4_src, is_mask); SW_FLOW_KEY_PUT(match, ipv4.addr.dst, ipv4_key->ipv4_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV4); } if (attrs & (1 << OVS_KEY_ATTR_IPV6)) { const struct ovs_key_ipv6 *ipv6_key; ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]); if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) { OVS_NLERR(log, "IPv6 frag type %d is out of range max %d", ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX); return -EINVAL; } if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)", ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv6.label, ipv6_key->ipv6_label, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, ipv6_key->ipv6_proto, is_mask); SW_FLOW_KEY_PUT(match, ip.tos, ipv6_key->ipv6_tclass, is_mask); SW_FLOW_KEY_PUT(match, ip.ttl, ipv6_key->ipv6_hlimit, is_mask); SW_FLOW_KEY_PUT(match, ip.frag, ipv6_key->ipv6_frag, is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src, ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst, ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_IPV6); } if (attrs & (1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS)) { const struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key; ipv6_exthdrs_key = nla_data(a[OVS_KEY_ATTR_IPV6_EXTHDRS]); SW_FLOW_KEY_PUT(match, ipv6.exthdrs, ipv6_exthdrs_key->hdrs, is_mask); attrs &= ~(1ULL << OVS_KEY_ATTR_IPV6_EXTHDRS); } if (attrs & (1 << OVS_KEY_ATTR_ARP)) { const struct ovs_key_arp *arp_key; arp_key = nla_data(a[OVS_KEY_ATTR_ARP]); if (!is_mask && (arp_key->arp_op & htons(0xff00))) { OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).", arp_key->arp_op); return -EINVAL; } SW_FLOW_KEY_PUT(match, ipv4.addr.src, arp_key->arp_sip, is_mask); SW_FLOW_KEY_PUT(match, ipv4.addr.dst, arp_key->arp_tip, is_mask); SW_FLOW_KEY_PUT(match, ip.proto, ntohs(arp_key->arp_op), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ARP); } if (attrs & (1 << OVS_KEY_ATTR_NSH)) { if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match, is_mask, false, log) < 0) return -EINVAL; attrs &= ~(1 << OVS_KEY_ATTR_NSH); } if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; u32 hdr_len; u32 label_count, label_count_mask, i; mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); hdr_len = nla_len(a[OVS_KEY_ATTR_MPLS]); label_count = hdr_len / sizeof(struct ovs_key_mpls); if (label_count == 0 || label_count > MPLS_LABEL_DEPTH || hdr_len % sizeof(struct ovs_key_mpls)) return -EINVAL; label_count_mask = GENMASK(label_count - 1, 0); for (i = 0 ; i < label_count; i++) SW_FLOW_KEY_PUT(match, mpls.lse[i], mpls_key[i].mpls_lse, is_mask); SW_FLOW_KEY_PUT(match, mpls.num_labels_mask, label_count_mask, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_MPLS); } if (attrs & (1 << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]); SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask); SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP); } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { SW_FLOW_KEY_PUT(match, tp.flags, nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); } if (attrs & (1 << OVS_KEY_ATTR_UDP)) { const struct ovs_key_udp *udp_key; udp_key = nla_data(a[OVS_KEY_ATTR_UDP]); SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask); SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_UDP); } if (attrs & (1 << OVS_KEY_ATTR_SCTP)) { const struct ovs_key_sctp *sctp_key; sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]); SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask); SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_SCTP); } if (attrs & (1 << OVS_KEY_ATTR_ICMP)) { const struct ovs_key_icmp *icmp_key; icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]); SW_FLOW_KEY_PUT(match, tp.src, htons(icmp_key->icmp_type), is_mask); SW_FLOW_KEY_PUT(match, tp.dst, htons(icmp_key->icmp_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMP); } if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) { const struct ovs_key_icmpv6 *icmpv6_key; icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]); SW_FLOW_KEY_PUT(match, tp.src, htons(icmpv6_key->icmpv6_type), is_mask); SW_FLOW_KEY_PUT(match, tp.dst, htons(icmpv6_key->icmpv6_code), is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6); } if (attrs & (1 << OVS_KEY_ATTR_ND)) { const struct ovs_key_nd *nd_key; nd_key = nla_data(a[OVS_KEY_ATTR_ND]); SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target, nd_key->nd_target, sizeof(match->key->ipv6.nd.target), is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN, is_mask); SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_ND); } if (attrs != 0) { OVS_NLERR(log, "Unknown key attributes %llx", (unsigned long long)attrs); return -EINVAL; } return 0; } static void nlattr_set(struct nlattr *attr, u8 val, const struct ovs_len_tbl *tbl) { struct nlattr *nla; int rem; /* The nlattr stream should already have been validated */ nla_for_each_nested(nla, attr, rem) { if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl); else memset(nla_data(nla), val, nla_len(nla)); if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; } } static void mask_set_nlattr(struct nlattr *attr, u8 val) { nlattr_set(attr, val, ovs_key_lens); } /** * ovs_nla_get_match - parses Netlink attributes into a flow key and * mask. In case the 'mask' is NULL, the flow is treated as exact match * flow. Otherwise, it is treated as a wildcarded flow, except the mask * does not include any don't care bit. * @net: Used to determine per-namespace field support. * @match: receives the extracted flow match information. * @nla_key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute * sequence. The fields should of the packet that triggered the creation * of this flow. * @nla_mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* * Netlink attribute specifies the mask field of the wildcarded flow. * @log: Boolean to allow kernel error logging. Normally true, but when * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. */ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, const struct nlattr *nla_key, const struct nlattr *nla_mask, bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; struct nlattr *newmask = NULL; u64 key_attrs = 0; u64 mask_attrs = 0; int err; err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); if (err) return err; err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); if (err) return err; if (match->mask) { if (!nla_mask) { /* Create an exact match mask. We need to set to 0xff * all the 'match->mask' fields that have been touched * in 'match->key'. We cannot simply memset * 'match->mask', because padding bytes and fields not * specified in 'match->key' should be left to 0. * Instead, we use a stream of netlink attributes, * copied from 'key' and set to 0xff. * ovs_key_from_nlattrs() will take care of filling * 'match->mask' appropriately. */ newmask = kmemdup(nla_key, nla_total_size(nla_len(nla_key)), GFP_KERNEL); if (!newmask) return -ENOMEM; mask_set_nlattr(newmask, 0xff); /* The userspace does not send tunnel attributes that * are 0, but we should not wildcard them nonetheless. */ if (match->key->tun_proto) SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); nla_mask = newmask; } err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log); if (err) goto free_newmask; /* Always match on tci. */ SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); if (err) goto free_newmask; err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, log); if (err) goto free_newmask; } if (!match_validate(match, key_attrs, mask_attrs, log)) err = -EINVAL; free_newmask: kfree(newmask); return err; } static size_t get_ufid_len(const struct nlattr *attr, bool log) { size_t len; if (!attr) return 0; len = nla_len(attr); if (len < 1 || len > MAX_UFID_LENGTH) { OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)", nla_len(attr), MAX_UFID_LENGTH); return 0; } return len; } /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID, * or false otherwise. */ bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr, bool log) { sfid->ufid_len = get_ufid_len(attr, log); if (sfid->ufid_len) memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len); return sfid->ufid_len; } int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid, const struct sw_flow_key *key, bool log) { struct sw_flow_key *new_key; if (ovs_nla_get_ufid(sfid, ufid, log)) return 0; /* If UFID was not provided, use unmasked key. */ new_key = kmalloc(sizeof(*new_key), GFP_KERNEL); if (!new_key) return -ENOMEM; memcpy(new_key, key, sizeof(*key)); sfid->unmasked_key = new_key; return 0; } u32 ovs_nla_get_ufid_flags(const struct nlattr *attr) { return nla_get_u32_default(attr, 0); } /** * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key. * @net: Network namespace. * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack * metadata. * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink * attributes. * @attrs: Bit mask for the netlink attributes included in @a. * @log: Boolean to allow kernel error logging. Normally true, but when * probing for feature compatibility this should be passed in as false to * suppress unnecessary error logging. * * This parses a series of Netlink attributes that form a flow key, which must * take the same form accepted by flow_from_nlattrs(), but only enough of it to * get the metadata, that is, the parts of the flow key that cannot be * extracted from the packet itself. * * This must be called before the packet key fields are filled in 'key'. */ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *a[OVS_KEY_ATTR_MAX + 1], u64 attrs, struct sw_flow_key *key, bool log) { struct sw_flow_match match; memset(&match, 0, sizeof(match)); match.key = key; key->ct_state = 0; key->ct_zone = 0; key->ct_orig_proto = 0; memset(&key->ct, 0, sizeof(key->ct)); memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig)); memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig)); key->phy.in_port = DP_MAX_PORTS; return metadata_from_nlattrs(net, &match, &attrs, a, false, log); } static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, bool is_mask) { __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) return -EMSGSIZE; return 0; } static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, struct sk_buff *skb) { struct nlattr *start; start = nla_nest_start_noflag(skb, OVS_KEY_ATTR_NSH); if (!start) return -EMSGSIZE; if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base)) goto nla_put_failure; if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) { if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, sizeof(nsh->context), nsh->context)) goto nla_put_failure; } /* Don't support MD type 2 yet */ nla_nest_end(skb, start); return 0; nla_put_failure: return -EMSGSIZE; } static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; struct nlattr *nla; struct nlattr *encap = NULL; struct nlattr *in_encap = NULL; if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) goto nla_put_failure; if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) goto nla_put_failure; if ((swkey->tun_proto || is_mask)) { const void *opts = NULL; if (ip_tunnel_is_options_present(output->tun_key.tun_flags)) opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); if (ip_tun_to_nlattr(skb, &output->tun_key, opts, swkey->tun_opts_len, swkey->tun_proto, 0)) goto nla_put_failure; } if (swkey->phy.in_port == DP_MAX_PORTS) { if (is_mask && (output->phy.in_port == 0xffff)) if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) goto nla_put_failure; } else { u16 upper_u16; upper_u16 = !is_mask ? 0 : 0xffff; if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, (upper_u16 << 16) | output->phy.in_port)) goto nla_put_failure; } if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; if (ovs_ct_put_key(swkey, output, skb)) goto nla_put_failure; if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; eth_key = nla_data(nla); ether_addr_copy(eth_key->eth_src, output->eth.src); ether_addr_copy(eth_key->eth_dst, output->eth.dst); if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP); if (!swkey->eth.vlan.tci) goto unencap; if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) goto nla_put_failure; in_encap = nla_nest_start_noflag(skb, OVS_KEY_ATTR_ENCAP); if (!swkey->eth.cvlan.tci) goto unencap; } } if (swkey->eth.type == htons(ETH_P_802_2)) { /* * Ethertype 802.2 is represented in the netlink with omitted * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and * 0xffff in the mask attribute. Ethertype can also * be wildcarded. */ if (is_mask && output->eth.type) if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; goto unencap; } } if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; if (eth_type_vlan(swkey->eth.type)) { /* There are 3 VLAN tags, we don't know anything about the rest * of the packet, so truncate here. */ WARN_ON_ONCE(!(encap && in_encap)); goto unencap; } if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ipv4 *ipv4_key; nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); if (!nla) goto nla_put_failure; ipv4_key = nla_data(nla); ipv4_key->ipv4_src = output->ipv4.addr.src; ipv4_key->ipv4_dst = output->ipv4.addr.dst; ipv4_key->ipv4_proto = output->ip.proto; ipv4_key->ipv4_tos = output->ip.tos; ipv4_key->ipv4_ttl = output->ip.ttl; ipv4_key->ipv4_frag = output->ip.frag; } else if (swkey->eth.type == htons(ETH_P_IPV6)) { struct ovs_key_ipv6 *ipv6_key; struct ovs_key_ipv6_exthdrs *ipv6_exthdrs_key; nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); if (!nla) goto nla_put_failure; ipv6_key = nla_data(nla); memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, sizeof(ipv6_key->ipv6_src)); memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, sizeof(ipv6_key->ipv6_dst)); ipv6_key->ipv6_label = output->ipv6.label; ipv6_key->ipv6_proto = output->ip.proto; ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6_EXTHDRS, sizeof(*ipv6_exthdrs_key)); if (!nla) goto nla_put_failure; ipv6_exthdrs_key = nla_data(nla); ipv6_exthdrs_key->hdrs = output->ipv6.exthdrs; } else if (swkey->eth.type == htons(ETH_P_NSH)) { if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) goto nla_put_failure; } else if (swkey->eth.type == htons(ETH_P_ARP) || swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); if (!nla) goto nla_put_failure; arp_key = nla_data(nla); memset(arp_key, 0, sizeof(struct ovs_key_arp)); arp_key->arp_sip = output->ipv4.addr.src; arp_key->arp_tip = output->ipv4.addr.dst; arp_key->arp_op = htons(output->ip.proto); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); } else if (eth_p_mpls(swkey->eth.type)) { u8 i, num_labels; struct ovs_key_mpls *mpls_key; num_labels = hweight_long(output->mpls.num_labels_mask); nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, num_labels * sizeof(*mpls_key)); if (!nla) goto nla_put_failure; mpls_key = nla_data(nla); for (i = 0; i < num_labels; i++) mpls_key[i].mpls_lse = output->mpls.lse[i]; } if ((swkey->eth.type == htons(ETH_P_IP) || swkey->eth.type == htons(ETH_P_IPV6)) && swkey->ip.frag != OVS_FRAG_TYPE_LATER) { if (swkey->ip.proto == IPPROTO_TCP) { struct ovs_key_tcp *tcp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); if (!nla) goto nla_put_failure; tcp_key = nla_data(nla); tcp_key->tcp_src = output->tp.src; tcp_key->tcp_dst = output->tp.dst; if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, output->tp.flags)) goto nla_put_failure; } else if (swkey->ip.proto == IPPROTO_UDP) { struct ovs_key_udp *udp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); if (!nla) goto nla_put_failure; udp_key = nla_data(nla); udp_key->udp_src = output->tp.src; udp_key->udp_dst = output->tp.dst; } else if (swkey->ip.proto == IPPROTO_SCTP) { struct ovs_key_sctp *sctp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); if (!nla) goto nla_put_failure; sctp_key = nla_data(nla); sctp_key->sctp_src = output->tp.src; sctp_key->sctp_dst = output->tp.dst; } else if (swkey->eth.type == htons(ETH_P_IP) && swkey->ip.proto == IPPROTO_ICMP) { struct ovs_key_icmp *icmp_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); if (!nla) goto nla_put_failure; icmp_key = nla_data(nla); icmp_key->icmp_type = ntohs(output->tp.src); icmp_key->icmp_code = ntohs(output->tp.dst); } else if (swkey->eth.type == htons(ETH_P_IPV6) && swkey->ip.proto == IPPROTO_ICMPV6) { struct ovs_key_icmpv6 *icmpv6_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, sizeof(*icmpv6_key)); if (!nla) goto nla_put_failure; icmpv6_key = nla_data(nla); icmpv6_key->icmpv6_type = ntohs(output->tp.src); icmpv6_key->icmpv6_code = ntohs(output->tp.dst); if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) || swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) { struct ovs_key_nd *nd_key; nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); if (!nla) goto nla_put_failure; nd_key = nla_data(nla); memcpy(nd_key->nd_target, &output->ipv6.nd.target, sizeof(nd_key->nd_target)); ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); } } } unencap: if (in_encap) nla_nest_end(skb, in_encap); if (encap) nla_nest_end(skb, encap); return 0; nla_put_failure: return -EMSGSIZE; } int ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, int attr, bool is_mask, struct sk_buff *skb) { int err; struct nlattr *nla; nla = nla_nest_start_noflag(skb, attr); if (!nla) return -EMSGSIZE; err = __ovs_nla_put_key(swkey, output, is_mask, skb); if (err) return err; nla_nest_end(skb, nla); return 0; } /* Called with ovs_mutex or RCU read lock. */ int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb) { if (ovs_identifier_is_ufid(&flow->id)) return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len, flow->id.ufid); return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key, OVS_FLOW_ATTR_KEY, false, skb); } /* Called with ovs_mutex or RCU read lock. */ int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb) { return ovs_nla_put_key(&flow->key, &flow->key, OVS_FLOW_ATTR_KEY, false, skb); } /* Called with ovs_mutex or RCU read lock. */ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb) { return ovs_nla_put_key(&flow->key, &flow->mask->key, OVS_FLOW_ATTR_MASK, true, skb); } #define MAX_ACTIONS_BUFSIZE (32 * 1024) static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE); sfa = kmalloc(kmalloc_size_roundup(sizeof(*sfa) + size), GFP_KERNEL); if (!sfa) return ERR_PTR(-ENOMEM); sfa->actions_len = 0; return sfa; } static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len); static void ovs_nla_free_check_pkt_len_action(const struct nlattr *action) { const struct nlattr *a; int rem; nla_for_each_nested(a, action, rem) { switch (nla_type(a)) { case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL: case OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER: ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); break; } } } static void ovs_nla_free_clone_action(const struct nlattr *action) { const struct nlattr *a = nla_data(action); int rem = nla_len(action); switch (nla_type(a)) { case OVS_CLONE_ATTR_EXEC: /* The real list of actions follows this attribute. */ a = nla_next(a, &rem); ovs_nla_free_nested_actions(a, rem); break; } } static void ovs_nla_free_dec_ttl_action(const struct nlattr *action) { const struct nlattr *a = nla_data(action); switch (nla_type(a)) { case OVS_DEC_TTL_ATTR_ACTION: ovs_nla_free_nested_actions(nla_data(a), nla_len(a)); break; } } static void ovs_nla_free_sample_action(const struct nlattr *action) { const struct nlattr *a = nla_data(action); int rem = nla_len(action); switch (nla_type(a)) { case OVS_SAMPLE_ATTR_ARG: /* The real list of actions follows this attribute. */ a = nla_next(a, &rem); ovs_nla_free_nested_actions(a, rem); break; } } static void ovs_nla_free_set_action(const struct nlattr *a) { const struct nlattr *ovs_key = nla_data(a); struct ovs_tunnel_info *ovs_tun; switch (nla_type(ovs_key)) { case OVS_KEY_ATTR_TUNNEL_INFO: ovs_tun = nla_data(ovs_key); dst_release((struct dst_entry *)ovs_tun->tun_dst); break; } } static void ovs_nla_free_nested_actions(const struct nlattr *actions, int len) { const struct nlattr *a; int rem; /* Whenever new actions are added, the need to update this * function should be considered. */ BUILD_BUG_ON(OVS_ACTION_ATTR_MAX != 25); if (!actions) return; nla_for_each_attr(a, actions, len, rem) { switch (nla_type(a)) { case OVS_ACTION_ATTR_CHECK_PKT_LEN: ovs_nla_free_check_pkt_len_action(a); break; case OVS_ACTION_ATTR_CLONE: ovs_nla_free_clone_action(a); break; case OVS_ACTION_ATTR_CT: ovs_ct_free_action(a); break; case OVS_ACTION_ATTR_DEC_TTL: ovs_nla_free_dec_ttl_action(a); break; case OVS_ACTION_ATTR_SAMPLE: ovs_nla_free_sample_action(a); break; case OVS_ACTION_ATTR_SET: ovs_nla_free_set_action(a); break; } } } void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts) { if (!sf_acts) return; ovs_nla_free_nested_actions(sf_acts->actions, sf_acts->actions_len); kfree(sf_acts); } static void __ovs_nla_free_flow_actions(struct rcu_head *head) { ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu)); } /* Schedules 'sf_acts' to be freed after the next RCU grace period. * The caller must hold rcu_read_lock for this to be sensible. */ void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts) { call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions); } static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int attr_len, bool log) { struct sw_flow_actions *acts; int new_acts_size; size_t req_size = NLA_ALIGN(attr_len); int next_offset = offsetof(struct sw_flow_actions, actions) + (*sfa)->actions_len; if (req_size <= (ksize(*sfa) - next_offset)) goto out; new_acts_size = max(next_offset + req_size, ksize(*sfa) * 2); if (new_acts_size > MAX_ACTIONS_BUFSIZE) { if ((next_offset + req_size) > MAX_ACTIONS_BUFSIZE) { OVS_NLERR(log, "Flow action size exceeds max %u", MAX_ACTIONS_BUFSIZE); return ERR_PTR(-EMSGSIZE); } new_acts_size = MAX_ACTIONS_BUFSIZE; } acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return ERR_CAST(acts); memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len); acts->actions_len = (*sfa)->actions_len; acts->orig_len = (*sfa)->orig_len; kfree(*sfa); *sfa = acts; out: (*sfa)->actions_len += req_size; return (struct nlattr *) ((unsigned char *)(*sfa) + next_offset); } static struct nlattr *__add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len, bool log) { struct nlattr *a; a = reserve_sfa_size(sfa, nla_attr_size(len), log); if (IS_ERR(a)) return a; a->nla_type = attrtype; a->nla_len = nla_attr_size(len); if (data) memcpy(nla_data(a), data, len); memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len)); return a; } int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data, int len, bool log) { struct nlattr *a; a = __add_action(sfa, attrtype, data, len, log); return PTR_ERR_OR_ZERO(a); } static inline int add_nested_action_start(struct sw_flow_actions **sfa, int attrtype, bool log) { int used = (*sfa)->actions_len; int err; err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log); if (err) return err; return used; } static inline void add_nested_action_end(struct sw_flow_actions *sfa, int st_offset) { struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions + st_offset); a->nla_len = sfa->actions_len - st_offset; } static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log, u32 depth); static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log, bool last, u32 depth) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; const struct nlattr *a; int rem, start, err; struct sample_arg arg; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { int type = nla_type(a); if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type]) return -EINVAL; attrs[type] = a; } if (rem) return -EINVAL; probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY]; if (!probability || nla_len(probability) != sizeof(u32)) return -EINVAL; actions = attrs[OVS_SAMPLE_ATTR_ACTIONS]; if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) return -EINVAL; /* validation done, copy sample action. */ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log); if (start < 0) return start; /* When both skb and flow may be changed, put the sample * into a deferred fifo. On the other hand, if only skb * may be modified, the actions can be executed in place. * * Do this analysis at the flow installation time. * Set 'clone_action->exec' to true if the actions can be * executed without being deferred. * * If the sample is the last action, it can always be excuted * rather than deferred. */ arg.exec = last || !actions_may_change_flow(actions); arg.probability = nla_get_u32(probability); err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg), log); if (err) return err; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, mpls_label_count, log, depth + 1); if (err) return err; add_nested_action_end(*sfa, start); return 0; } static int validate_and_copy_dec_ttl(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log, u32 depth) { const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1]; int start, action_start, err, rem; const struct nlattr *a, *actions; memset(attrs, 0, sizeof(attrs)); nla_for_each_nested(a, attr, rem) { int type = nla_type(a); /* Ignore unknown attributes to be future proof. */ if (type > OVS_DEC_TTL_ATTR_MAX) continue; if (!type || attrs[type]) { OVS_NLERR(log, "Duplicate or invalid key (type %d).", type); return -EINVAL; } attrs[type] = a; } if (rem) { OVS_NLERR(log, "Message has %d unknown bytes.", rem); return -EINVAL; } actions = attrs[OVS_DEC_TTL_ATTR_ACTION]; if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN)) { OVS_NLERR(log, "Missing valid actions attribute."); return -EINVAL; } start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); if (start < 0) return start; action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log); if (action_start < 0) return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, mpls_label_count, log, depth + 1); if (err) return err; add_nested_action_end(*sfa, action_start); add_nested_action_end(*sfa, start); return 0; } static int validate_and_copy_clone(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log, bool last, u32 depth) { int start, err; u32 exec; if (nla_len(attr) && nla_len(attr) < NLA_HDRLEN) return -EINVAL; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CLONE, log); if (start < 0) return start; exec = last || !actions_may_change_flow(attr); err = ovs_nla_add_action(sfa, OVS_CLONE_ATTR_EXEC, &exec, sizeof(exec), log); if (err) return err; err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type, vlan_tci, mpls_label_count, log, depth + 1); if (err) return err; add_nested_action_end(*sfa, start); return 0; } void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, bool reset_key, struct sw_flow_mask *mask) { memset(match, 0, sizeof(*match)); match->key = key; match->mask = mask; if (reset_key) memset(key, 0, sizeof(*key)); if (mask) { memset(&mask->key, 0, sizeof(mask->key)); mask->range.start = mask->range.end = 0; } } static int validate_geneve_opts(struct sw_flow_key *key) { struct geneve_opt *option; int opts_len = key->tun_opts_len; bool crit_opt = false; option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len); while (opts_len > 0) { int len; if (opts_len < sizeof(*option)) return -EINVAL; len = sizeof(*option) + option->length * 4; if (len > opts_len) return -EINVAL; crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE); option = (struct geneve_opt *)((u8 *)option + len); opts_len -= len; } if (crit_opt) __set_bit(IP_TUNNEL_CRIT_OPT_BIT, key->tun_key.tun_flags); return 0; } static int validate_and_copy_set_tun(const struct nlattr *attr, struct sw_flow_actions **sfa, bool log) { IP_TUNNEL_DECLARE_FLAGS(dst_opt_type) = { }; struct sw_flow_match match; struct sw_flow_key key; struct metadata_dst *tun_dst; struct ip_tunnel_info *tun_info; struct ovs_tunnel_info *ovs_tun; struct nlattr *a; int err = 0, start, opts_type; ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) return opts_type; if (key.tun_opts_len) { switch (opts_type) { case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS: err = validate_geneve_opts(&key); if (err < 0) return err; __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, dst_opt_type); break; case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, dst_opt_type); break; } } start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log); if (start < 0) return start; tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL, GFP_KERNEL); if (!tun_dst) return -ENOMEM; err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL); if (err) { dst_release((struct dst_entry *)tun_dst); return err; } a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL, sizeof(*ovs_tun), log); if (IS_ERR(a)) { dst_release((struct dst_entry *)tun_dst); return PTR_ERR(a); } ovs_tun = nla_data(a); ovs_tun->tun_dst = tun_dst; tun_info = &tun_dst->u.tun_info; tun_info->mode = IP_TUNNEL_INFO_TX; if (key.tun_proto == AF_INET6) tun_info->mode |= IP_TUNNEL_INFO_IPV6; else if (key.tun_proto == AF_INET && key.tun_key.u.ipv4.dst == 0) tun_info->mode |= IP_TUNNEL_INFO_BRIDGE; tun_info->key = key.tun_key; /* We need to store the options in the action itself since * everything else will go away after flow setup. We can append * it to tun_info and then point there. */ ip_tunnel_info_opts_set(tun_info, TUN_METADATA_OPTS(&key, key.tun_opts_len), key.tun_opts_len, dst_opt_type); add_nested_action_end(*sfa, start); return err; } static bool validate_nsh(const struct nlattr *attr, bool is_mask, bool is_push_nsh, bool log) { struct sw_flow_match match; struct sw_flow_key key; int ret = 0; ovs_match_init(&match, &key, true, NULL); ret = nsh_key_put_from_nlattr(attr, &match, is_mask, is_push_nsh, log); return !ret; } /* Return false if there are any non-masked bits set. * Mask follows data immediately, before any netlink padding. */ static bool validate_masked(u8 *data, int len) { u8 *mask = data + len; while (len--) if (*data++ & ~*mask++) return false; return true; } static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, bool *skip_copy, u8 mac_proto, __be16 eth_type, bool masked, bool log) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); size_t key_len; /* There can be only one key in a action */ if (nla_total_size(nla_len(ovs_key)) != nla_len(a)) return -EINVAL; key_len = nla_len(ovs_key); if (masked) key_len /= 2; if (key_type > OVS_KEY_ATTR_MAX || !check_attr_len(key_len, ovs_key_lens[key_type].len)) return -EINVAL; if (masked && !validate_masked(nla_data(ovs_key), key_len)) return -EINVAL; switch (key_type) { case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: case OVS_KEY_ATTR_CT_MARK: case OVS_KEY_ATTR_CT_LABELS: break; case OVS_KEY_ATTR_ETHERNET: if (mac_proto != MAC_PROTO_ETHERNET) return -EINVAL; break; case OVS_KEY_ATTR_TUNNEL: { int err; if (masked) return -EINVAL; /* Masked tunnel set not supported. */ *skip_copy = true; err = validate_and_copy_set_tun(a, sfa, log); if (err) return err; break; } case OVS_KEY_ATTR_IPV4: { const struct ovs_key_ipv4 *ipv4_key; if (eth_type != htons(ETH_P_IP)) return -EINVAL; ipv4_key = nla_data(ovs_key); if (masked) { const struct ovs_key_ipv4 *mask = ipv4_key + 1; /* Non-writeable fields. */ if (mask->ipv4_proto || mask->ipv4_frag) return -EINVAL; } else { if (ipv4_key->ipv4_proto != flow_key->ip.proto) return -EINVAL; if (ipv4_key->ipv4_frag != flow_key->ip.frag) return -EINVAL; } break; } case OVS_KEY_ATTR_IPV6: { const struct ovs_key_ipv6 *ipv6_key; if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; ipv6_key = nla_data(ovs_key); if (masked) { const struct ovs_key_ipv6 *mask = ipv6_key + 1; /* Non-writeable fields. */ if (mask->ipv6_proto || mask->ipv6_frag) return -EINVAL; /* Invalid bits in the flow label mask? */ if (ntohl(mask->ipv6_label) & 0xFFF00000) return -EINVAL; } else { if (ipv6_key->ipv6_proto != flow_key->ip.proto) return -EINVAL; if (ipv6_key->ipv6_frag != flow_key->ip.frag) return -EINVAL; } if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000) return -EINVAL; break; } case OVS_KEY_ATTR_TCP: if ((eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6)) || flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; break; case OVS_KEY_ATTR_UDP: if ((eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6)) || flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; break; case OVS_KEY_ATTR_MPLS: if (!eth_p_mpls(eth_type)) return -EINVAL; break; case OVS_KEY_ATTR_SCTP: if ((eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6)) || flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; break; case OVS_KEY_ATTR_NSH: if (eth_type != htons(ETH_P_NSH)) return -EINVAL; if (!validate_nsh(nla_data(a), masked, false, log)) return -EINVAL; break; default: return -EINVAL; } /* Convert non-masked non-tunnel set actions to masked set actions. */ if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) { int start, len = key_len * 2; struct nlattr *at; *skip_copy = true; start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET_TO_MASKED, log); if (start < 0) return start; at = __add_action(sfa, key_type, NULL, len, log); if (IS_ERR(at)) return PTR_ERR(at); memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */ memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */ /* Clear non-writeable bits from otherwise writeable fields. */ if (key_type == OVS_KEY_ATTR_IPV6) { struct ovs_key_ipv6 *mask = nla_data(at) + key_len; mask->ipv6_label &= htonl(0x000FFFFF); } add_nested_action_end(*sfa, start); } return 0; } static int validate_userspace(const struct nlattr *attr) { static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = { [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 }, [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC }, [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 }, }; struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1]; int error; error = nla_parse_nested_deprecated(a, OVS_USERSPACE_ATTR_MAX, attr, userspace_policy, NULL); if (error) return error; if (!a[OVS_USERSPACE_ATTR_PID] || !nla_get_u32(a[OVS_USERSPACE_ATTR_PID])) return -EINVAL; return 0; } static const struct nla_policy cpl_policy[OVS_CHECK_PKT_LEN_ATTR_MAX + 1] = { [OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] = {.type = NLA_U16 }, [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER] = {.type = NLA_NESTED }, [OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL] = {.type = NLA_NESTED }, }; static int validate_and_copy_check_pkt_len(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log, bool last, u32 depth) { const struct nlattr *acts_if_greater, *acts_if_lesser_eq; struct nlattr *a[OVS_CHECK_PKT_LEN_ATTR_MAX + 1]; struct check_pkt_len_arg arg; int nested_acts_start; int start, err; err = nla_parse_deprecated_strict(a, OVS_CHECK_PKT_LEN_ATTR_MAX, nla_data(attr), nla_len(attr), cpl_policy, NULL); if (err) return err; if (!a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN] || !nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN])) return -EINVAL; acts_if_lesser_eq = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL]; acts_if_greater = a[OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER]; /* Both the nested action should be present. */ if (!acts_if_greater || !acts_if_lesser_eq) return -EINVAL; /* validation done, copy the nested actions. */ start = add_nested_action_start(sfa, OVS_ACTION_ATTR_CHECK_PKT_LEN, log); if (start < 0) return start; arg.pkt_len = nla_get_u16(a[OVS_CHECK_PKT_LEN_ATTR_PKT_LEN]); arg.exec_for_lesser_equal = last || !actions_may_change_flow(acts_if_lesser_eq); arg.exec_for_greater = last || !actions_may_change_flow(acts_if_greater); err = ovs_nla_add_action(sfa, OVS_CHECK_PKT_LEN_ATTR_ARG, &arg, sizeof(arg), log); if (err) return err; nested_acts_start = add_nested_action_start(sfa, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL, log); if (nested_acts_start < 0) return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_lesser_eq, key, sfa, eth_type, vlan_tci, mpls_label_count, log, depth + 1); if (err) return err; add_nested_action_end(*sfa, nested_acts_start); nested_acts_start = add_nested_action_start(sfa, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER, log); if (nested_acts_start < 0) return nested_acts_start; err = __ovs_nla_copy_actions(net, acts_if_greater, key, sfa, eth_type, vlan_tci, mpls_label_count, log, depth + 1); if (err) return err; add_nested_action_end(*sfa, nested_acts_start); add_nested_action_end(*sfa, start); return 0; } static int validate_psample(const struct nlattr *attr) { static const struct nla_policy policy[OVS_PSAMPLE_ATTR_MAX + 1] = { [OVS_PSAMPLE_ATTR_GROUP] = { .type = NLA_U32 }, [OVS_PSAMPLE_ATTR_COOKIE] = { .type = NLA_BINARY, .len = OVS_PSAMPLE_COOKIE_MAX_SIZE, }, }; struct nlattr *a[OVS_PSAMPLE_ATTR_MAX + 1]; int err; if (!IS_ENABLED(CONFIG_PSAMPLE)) return -EOPNOTSUPP; err = nla_parse_nested(a, OVS_PSAMPLE_ATTR_MAX, attr, policy, NULL); if (err) return err; return a[OVS_PSAMPLE_ATTR_GROUP] ? 0 : -EINVAL; } static int copy_action(const struct nlattr *from, struct sw_flow_actions **sfa, bool log) { int totlen = NLA_ALIGN(from->nla_len); struct nlattr *to; to = reserve_sfa_size(sfa, from->nla_len, log); if (IS_ERR(to)) return PTR_ERR(to); memcpy(to, from, totlen); return 0; } static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci, u32 mpls_label_count, bool log, u32 depth) { u8 mac_proto = ovs_key_mac_proto(key); const struct nlattr *a; int rem, err; if (depth > OVS_COPY_ACTIONS_MAX_DEPTH) return -EOVERFLOW; nla_for_each_nested(a, attr, rem) { /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CT] = (u32)-1, [OVS_ACTION_ATTR_CT_CLEAR] = 0, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), [OVS_ACTION_ATTR_POP_ETH] = 0, [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, [OVS_ACTION_ATTR_POP_NSH] = 0, [OVS_ACTION_ATTR_METER] = sizeof(u32), [OVS_ACTION_ATTR_CLONE] = (u32)-1, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, [OVS_ACTION_ATTR_DROP] = sizeof(u32), [OVS_ACTION_ATTR_PSAMPLE] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); bool skip_copy; if (type > OVS_ACTION_ATTR_MAX || (action_lens[type] != nla_len(a) && action_lens[type] != (u32)-1)) return -EINVAL; skip_copy = false; switch (type) { case OVS_ACTION_ATTR_UNSPEC: return -EINVAL; case OVS_ACTION_ATTR_USERSPACE: err = validate_userspace(a); if (err) return err; break; case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; break; case OVS_ACTION_ATTR_TRUNC: { const struct ovs_action_trunc *trunc = nla_data(a); if (trunc->max_len < ETH_HLEN) return -EINVAL; break; } case OVS_ACTION_ATTR_HASH: { const struct ovs_action_hash *act_hash = nla_data(a); switch (act_hash->hash_alg) { case OVS_HASH_ALG_L4: fallthrough; case OVS_HASH_ALG_SYM_L4: break; default: return -EINVAL; } break; } case OVS_ACTION_ATTR_POP_VLAN: if (mac_proto != MAC_PROTO_ETHERNET) return -EINVAL; vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: if (mac_proto != MAC_PROTO_ETHERNET) return -EINVAL; vlan = nla_data(a); if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_CFI_MASK))) return -EINVAL; vlan_tci = vlan->vlan_tci; break; case OVS_ACTION_ATTR_RECIRC: break; case OVS_ACTION_ATTR_ADD_MPLS: { const struct ovs_action_add_mpls *mpls = nla_data(a); if (!eth_p_mpls(mpls->mpls_ethertype)) return -EINVAL; if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) { if (vlan_tci & htons(VLAN_CFI_MASK) || (eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6) && eth_type != htons(ETH_P_ARP) && eth_type != htons(ETH_P_RARP) && !eth_p_mpls(eth_type))) return -EINVAL; mpls_label_count++; } else { if (mac_proto == MAC_PROTO_ETHERNET) { mpls_label_count = 1; mac_proto = MAC_PROTO_NONE; } else { mpls_label_count++; } } eth_type = mpls->mpls_ethertype; break; } case OVS_ACTION_ATTR_PUSH_MPLS: { const struct ovs_action_push_mpls *mpls = nla_data(a); if (!eth_p_mpls(mpls->mpls_ethertype)) return -EINVAL; /* Prohibit push MPLS other than to a white list * for packets that have a known tag order. */ if (vlan_tci & htons(VLAN_CFI_MASK) || (eth_type != htons(ETH_P_IP) && eth_type != htons(ETH_P_IPV6) && eth_type != htons(ETH_P_ARP) && eth_type != htons(ETH_P_RARP) && !eth_p_mpls(eth_type))) return -EINVAL; eth_type = mpls->mpls_ethertype; mpls_label_count++; break; } case OVS_ACTION_ATTR_POP_MPLS: { __be16 proto; if (vlan_tci & htons(VLAN_CFI_MASK) || !eth_p_mpls(eth_type)) return -EINVAL; /* Disallow subsequent L2.5+ set actions and mpls_pop * actions once the last MPLS label in the packet is * popped as there is no check here to ensure that * the new eth type is valid and thus set actions could * write off the end of the packet or otherwise corrupt * it. * * Support for these actions is planned using packet * recirculation. */ proto = nla_get_be16(a); if (proto == htons(ETH_P_TEB) && mac_proto != MAC_PROTO_NONE) return -EINVAL; mpls_label_count--; if (!eth_p_mpls(proto) || !mpls_label_count) eth_type = htons(0); else eth_type = proto; break; } case OVS_ACTION_ATTR_SET: err = validate_set(a, key, sfa, &skip_copy, mac_proto, eth_type, false, log); if (err) return err; break; case OVS_ACTION_ATTR_SET_MASKED: err = validate_set(a, key, sfa, &skip_copy, mac_proto, eth_type, true, log); if (err) return err; break; case OVS_ACTION_ATTR_SAMPLE: { bool last = nla_is_last(a, rem); err = validate_and_copy_sample(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, log, last, depth); if (err) return err; skip_copy = true; break; } case OVS_ACTION_ATTR_CT: err = ovs_ct_copy_action(net, a, key, sfa, log); if (err) return err; skip_copy = true; break; case OVS_ACTION_ATTR_CT_CLEAR: break; case OVS_ACTION_ATTR_PUSH_ETH: /* Disallow pushing an Ethernet header if one * is already present */ if (mac_proto != MAC_PROTO_NONE) return -EINVAL; mac_proto = MAC_PROTO_ETHERNET; break; case OVS_ACTION_ATTR_POP_ETH: if (mac_proto != MAC_PROTO_ETHERNET) return -EINVAL; if (vlan_tci & htons(VLAN_CFI_MASK)) return -EINVAL; mac_proto = MAC_PROTO_NONE; break; case OVS_ACTION_ATTR_PUSH_NSH: if (mac_proto != MAC_PROTO_ETHERNET) { u8 next_proto; next_proto = tun_p_from_eth_p(eth_type); if (!next_proto) return -EINVAL; } mac_proto = MAC_PROTO_NONE; if (!validate_nsh(nla_data(a), false, true, true)) return -EINVAL; break; case OVS_ACTION_ATTR_POP_NSH: { __be16 inner_proto; if (eth_type != htons(ETH_P_NSH)) return -EINVAL; inner_proto = tun_p_to_eth_p(key->nsh.base.np); if (!inner_proto) return -EINVAL; if (key->nsh.base.np == TUN_P_ETHERNET) mac_proto = MAC_PROTO_ETHERNET; else mac_proto = MAC_PROTO_NONE; break; } case OVS_ACTION_ATTR_METER: /* Non-existent meters are simply ignored. */ break; case OVS_ACTION_ATTR_CLONE: { bool last = nla_is_last(a, rem); err = validate_and_copy_clone(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, log, last, depth); if (err) return err; skip_copy = true; break; } case OVS_ACTION_ATTR_CHECK_PKT_LEN: { bool last = nla_is_last(a, rem); err = validate_and_copy_check_pkt_len(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, log, last, depth); if (err) return err; skip_copy = true; break; } case OVS_ACTION_ATTR_DEC_TTL: err = validate_and_copy_dec_ttl(net, a, key, sfa, eth_type, vlan_tci, mpls_label_count, log, depth); if (err) return err; skip_copy = true; break; case OVS_ACTION_ATTR_DROP: if (!nla_is_last(a, rem)) return -EINVAL; break; case OVS_ACTION_ATTR_PSAMPLE: err = validate_psample(a); if (err) return err; break; default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; } if (!skip_copy) { err = copy_action(a, sfa, log); if (err) return err; } } if (rem > 0) return -EINVAL; return 0; } /* 'key' must be the masked key. */ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa, bool log) { int err; u32 mpls_label_count = 0; *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE)); if (IS_ERR(*sfa)) return PTR_ERR(*sfa); if (eth_p_mpls(key->eth.type)) mpls_label_count = hweight_long(key->mpls.num_labels_mask); (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type, key->eth.vlan.tci, mpls_label_count, log, 0); if (err) ovs_nla_free_flow_actions(*sfa); return err; } static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { struct nlattr *start, *ac_start = NULL, *sample_arg; int err = 0, rem = nla_len(attr); const struct sample_arg *arg; struct nlattr *actions; start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SAMPLE); if (!start) return -EMSGSIZE; sample_arg = nla_data(attr); arg = nla_data(sample_arg); actions = nla_next(sample_arg, &rem); if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) { err = -EMSGSIZE; goto out; } ac_start = nla_nest_start_noflag(skb, OVS_SAMPLE_ATTR_ACTIONS); if (!ac_start) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_actions(actions, rem, skb); out: if (err) { nla_nest_cancel(skb, ac_start); nla_nest_cancel(skb, start); } else { nla_nest_end(skb, ac_start); nla_nest_end(skb, start); } return err; } static int clone_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { struct nlattr *start; int err = 0, rem = nla_len(attr); start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CLONE); if (!start) return -EMSGSIZE; /* Skipping the OVS_CLONE_ATTR_EXEC that is always the first attribute. */ attr = nla_next(nla_data(attr), &rem); err = ovs_nla_put_actions(attr, rem, skb); if (err) nla_nest_cancel(skb, start); else nla_nest_end(skb, start); return err; } static int check_pkt_len_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { struct nlattr *start, *ac_start = NULL; const struct check_pkt_len_arg *arg; const struct nlattr *a, *cpl_arg; int err = 0, rem = nla_len(attr); start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_CHECK_PKT_LEN); if (!start) return -EMSGSIZE; /* The first nested attribute in 'attr' is always * 'OVS_CHECK_PKT_LEN_ATTR_ARG'. */ cpl_arg = nla_data(attr); arg = nla_data(cpl_arg); if (nla_put_u16(skb, OVS_CHECK_PKT_LEN_ATTR_PKT_LEN, arg->pkt_len)) { err = -EMSGSIZE; goto out; } /* Second nested attribute in 'attr' is always * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'. */ a = nla_next(cpl_arg, &rem); ac_start = nla_nest_start_noflag(skb, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL); if (!ac_start) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); if (err) { nla_nest_cancel(skb, ac_start); goto out; } else { nla_nest_end(skb, ac_start); } /* Third nested attribute in 'attr' is always * OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER. */ a = nla_next(a, &rem); ac_start = nla_nest_start_noflag(skb, OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_GREATER); if (!ac_start) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); if (err) { nla_nest_cancel(skb, ac_start); goto out; } else { nla_nest_end(skb, ac_start); } nla_nest_end(skb, start); return 0; out: nla_nest_cancel(skb, start); return err; } static int dec_ttl_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { struct nlattr *start, *action_start; const struct nlattr *a; int err = 0, rem; start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL); if (!start) return -EMSGSIZE; nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) { switch (nla_type(a)) { case OVS_DEC_TTL_ATTR_ACTION: action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION); if (!action_start) { err = -EMSGSIZE; goto out; } err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb); if (err) goto out; nla_nest_end(skb, action_start); break; default: /* Ignore all other option to be future compatible */ break; } } nla_nest_end(skb, start); return 0; out: nla_nest_cancel(skb, start); return err; } static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); struct nlattr *start; int err; switch (key_type) { case OVS_KEY_ATTR_TUNNEL_INFO: { struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key); struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info; start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET); if (!start) return -EMSGSIZE; err = ip_tun_to_nlattr(skb, &tun_info->key, ip_tunnel_info_opts(tun_info), tun_info->options_len, ip_tunnel_info_af(tun_info), tun_info->mode); if (err) return err; nla_nest_end(skb, start); break; } default: if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key)) return -EMSGSIZE; break; } return 0; } static int masked_set_action_to_set_action_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); struct nlattr *nla; size_t key_len = nla_len(ovs_key) / 2; /* Revert the conversion we did from a non-masked set action to * masked set action. */ nla = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_SET); if (!nla) return -EMSGSIZE; if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key))) return -EMSGSIZE; nla_nest_end(skb, nla); return 0; } int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; int rem, err; nla_for_each_attr(a, attr, len, rem) { int type = nla_type(a); switch (type) { case OVS_ACTION_ATTR_SET: err = set_action_to_attr(a, skb); if (err) return err; break; case OVS_ACTION_ATTR_SET_TO_MASKED: err = masked_set_action_to_set_action_attr(a, skb); if (err) return err; break; case OVS_ACTION_ATTR_SAMPLE: err = sample_action_to_attr(a, skb); if (err) return err; break; case OVS_ACTION_ATTR_CT: err = ovs_ct_action_to_attr(nla_data(a), skb); if (err) return err; break; case OVS_ACTION_ATTR_CLONE: err = clone_action_to_attr(a, skb); if (err) return err; break; case OVS_ACTION_ATTR_CHECK_PKT_LEN: err = check_pkt_len_action_to_attr(a, skb); if (err) return err; break; case OVS_ACTION_ATTR_DEC_TTL: err = dec_ttl_action_to_attr(a, skb); if (err) return err; break; default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; break; } } return 0; }
75 75 75 75 75 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27