Total coverage: 221155 (12%)of 1889828
35 1 34 35 35 35 1 4 1 14 14 8 1 5 14 14 14 14 2 14 14 14 14 9 9 9 14 14 26 33 1 1 41 32 32 14 18 32 32 32 32 35 35 27 3 1 1 1 1 1 35 9 12 9 5 14 9 1 1 2 1 8 21 1 5 1 1 3 1 1 1 43 1 1 42 41 42 1 35 35 1 35 14 21 35 35 25 10 31 4 35 9 26 16 9 1 26 17 9 22 4 26 32 32 32 32 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 // SPDX-License-Identifier: GPL-2.0+ // // em28xx-cards.c - driver for Empia EM2800/EM2820/2840 USB // video capture devices // // Copyright (C) 2005 Ludovico Cavedon <cavedon@sssup.it> // Markus Rechberger <mrechberger@gmail.com> // Mauro Carvalho Chehab <mchehab@kernel.org> // Sascha Sommer <saschasommer@freenet.de> // Copyright (C) 2012 Frank Schäfer <fschaefer.oss@googlemail.com> #include "em28xx.h" #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/delay.h> #include <linux/i2c.h> #include <linux/usb.h> #include <media/tuner.h> #include <media/drv-intf/msp3400.h> #include <media/i2c/saa7115.h> #include <dt-bindings/media/tvp5150.h> #include <media/i2c/tvaudio.h> #include <media/tveeprom.h> #include <media/v4l2-common.h> #include <sound/ac97_codec.h> #define DRIVER_NAME "em28xx" static int tuner = -1; module_param(tuner, int, 0444); MODULE_PARM_DESC(tuner, "tuner type"); static unsigned int disable_ir; module_param(disable_ir, int, 0444); MODULE_PARM_DESC(disable_ir, "disable infrared remote support"); static unsigned int disable_usb_speed_check; module_param(disable_usb_speed_check, int, 0444); MODULE_PARM_DESC(disable_usb_speed_check, "override min bandwidth requirement of 480M bps"); static unsigned int card[] = {[0 ... (EM28XX_MAXBOARDS - 1)] = -1U }; module_param_array(card, int, NULL, 0444); MODULE_PARM_DESC(card, "card type"); static int usb_xfer_mode = -1; module_param(usb_xfer_mode, int, 0444); MODULE_PARM_DESC(usb_xfer_mode, "USB transfer mode for frame data (-1 = auto, 0 = prefer isoc, 1 = prefer bulk)"); /* Bitmask marking allocated devices from 0 to EM28XX_MAXBOARDS - 1 */ static DECLARE_BITMAP(em28xx_devused, EM28XX_MAXBOARDS); struct em28xx_hash_table { unsigned long hash; unsigned int model; unsigned int tuner; }; static void em28xx_pre_card_setup(struct em28xx *dev); /* * Reset sequences for analog/digital modes */ /* Reset for the most [analog] boards */ static const struct em28xx_reg_seq default_analog[] = { {EM2820_R08_GPIO_CTRL, 0x6d, ~EM_GPIO_4, 10}, { -1, -1, -1, -1}, }; /* Reset for the most [digital] boards */ static const struct em28xx_reg_seq default_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6e, ~EM_GPIO_4, 10}, { -1, -1, -1, -1}, }; /* Board :Zolid Hybrid Tv Stick */ static struct em28xx_reg_seq zolid_tuner[] = { {EM2820_R08_GPIO_CTRL, 0xfd, 0xff, 100}, {EM2820_R08_GPIO_CTRL, 0xfe, 0xff, 100}, { -1, -1, -1, -1}, }; static struct em28xx_reg_seq zolid_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6a, 0xff, 100}, {EM2820_R08_GPIO_CTRL, 0x7a, 0xff, 100}, {EM2880_R04_GPO, 0x04, 0xff, 100}, {EM2880_R04_GPO, 0x0c, 0xff, 100}, { -1, -1, -1, -1}, }; /* Board Hauppauge WinTV HVR 900 analog */ static const struct em28xx_reg_seq hauppauge_wintv_hvr_900_analog[] = { {EM2820_R08_GPIO_CTRL, 0x2d, ~EM_GPIO_4, 10}, { 0x05, 0xff, 0x10, 10}, { -1, -1, -1, -1}, }; /* Board Hauppauge WinTV HVR 900 digital */ static const struct em28xx_reg_seq hauppauge_wintv_hvr_900_digital[] = { {EM2820_R08_GPIO_CTRL, 0x2e, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x04, 0x0f, 10}, {EM2880_R04_GPO, 0x0c, 0x0f, 10}, { -1, -1, -1, -1}, }; /* Board Hauppauge WinTV HVR 900 (R2) digital */ static const struct em28xx_reg_seq hauppauge_wintv_hvr_900R2_digital[] = { {EM2820_R08_GPIO_CTRL, 0x2e, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x0c, 0x0f, 10}, { -1, -1, -1, -1}, }; /* Boards - EM2880 MSI DIGIVOX AD and EM2880_BOARD_MSI_DIGIVOX_AD_II */ static const struct em28xx_reg_seq em2880_msi_digivox_ad_analog[] = { {EM2820_R08_GPIO_CTRL, 0x69, ~EM_GPIO_4, 10}, { -1, -1, -1, -1}, }; /* Board - EM2882 Kworld 315U digital */ static const struct em28xx_reg_seq em2882_kworld_315u_digital[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0xfe, 0xff, 10}, {EM2880_R04_GPO, 0x04, 0xff, 10}, {EM2880_R04_GPO, 0x0c, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0x7e, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq em2882_kworld_315u_tuner_gpio[] = { {EM2880_R04_GPO, 0x08, 0xff, 10}, {EM2880_R04_GPO, 0x0c, 0xff, 10}, {EM2880_R04_GPO, 0x08, 0xff, 10}, {EM2880_R04_GPO, 0x0c, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq kworld_330u_analog[] = { {EM2820_R08_GPIO_CTRL, 0x6d, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x00, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq kworld_330u_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6e, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x08, 0xff, 10}, { -1, -1, -1, -1}, }; /* * Evga inDtube * GPIO0 - Enable digital power (s5h1409) - low to enable * GPIO1 - Enable analog power (tvp5150/emp202) - low to enable * GPIO4 - xc3028 reset * GOP3 - s5h1409 reset */ static const struct em28xx_reg_seq evga_indtube_analog[] = { {EM2820_R08_GPIO_CTRL, 0x79, 0xff, 60}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq evga_indtube_digital[] = { {EM2820_R08_GPIO_CTRL, 0x7a, 0xff, 1}, {EM2880_R04_GPO, 0x04, 0xff, 10}, {EM2880_R04_GPO, 0x0c, 0xff, 1}, { -1, -1, -1, -1}, }; /* * KWorld PlusTV 340U, UB435-Q and UB435-Q V2 (ATSC) GPIOs map: * EM_GPIO_0 - currently unknown * EM_GPIO_1 - LED disable/enable (1 = off, 0 = on) * EM_GPIO_2 - currently unknown * EM_GPIO_3 - currently unknown * EM_GPIO_4 - TDA18271HD/C1 tuner (1 = active, 0 = in reset) * EM_GPIO_5 - LGDT3304 ATSC/QAM demod (1 = active, 0 = in reset) * EM_GPIO_6 - currently unknown * EM_GPIO_7 - currently unknown */ static const struct em28xx_reg_seq kworld_a340_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6d, ~EM_GPIO_4, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq kworld_ub435q_v3_digital[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xfe, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xbe, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xfe, 0xff, 100}, { -1, -1, -1, -1}, }; /* Pinnacle Hybrid Pro eb1a:2881 */ static const struct em28xx_reg_seq pinnacle_hybrid_pro_analog[] = { {EM2820_R08_GPIO_CTRL, 0xfd, ~EM_GPIO_4, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq pinnacle_hybrid_pro_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6e, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x04, 0xff, 100},/* zl10353 reset */ {EM2880_R04_GPO, 0x0c, 0xff, 1}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_cinergy_USB_XS_FR_analog[] = { {EM2820_R08_GPIO_CTRL, 0x6d, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x00, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_cinergy_USB_XS_FR_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6e, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x08, 0xff, 10}, { -1, -1, -1, -1}, }; /* * PCTV HD Mini (80e) GPIOs * 0-5: not used * 6: demod reset, active low * 7: LED on, active high */ static const struct em28xx_reg_seq em2874_pctv_80e_digital[] = { {EM28XX_R06_I2C_CLK, 0x45, 0xff, 10}, /*400 KHz*/ {EM2874_R80_GPIO_P0_CTRL, 0x00, 0xff, 100},/*Demod reset*/ {EM2874_R80_GPIO_P0_CTRL, 0x40, 0xff, 10}, { -1, -1, -1, -1}, }; /* * eb1a:2868 Reddo DVB-C USB TV Box * GPIO4 - CU1216L NIM * Other GPIOs seems to be don't care. */ static const struct em28xx_reg_seq reddo_dvb_c_usb_box[] = { {EM2820_R08_GPIO_CTRL, 0xfe, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0xde, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0xfe, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0x7f, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0x6f, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, { -1, -1, -1, -1}, }; /* Callback for the most boards */ static const struct em28xx_reg_seq default_tuner_gpio[] = { {EM2820_R08_GPIO_CTRL, EM_GPIO_4, EM_GPIO_4, 10}, {EM2820_R08_GPIO_CTRL, 0, EM_GPIO_4, 10}, {EM2820_R08_GPIO_CTRL, EM_GPIO_4, EM_GPIO_4, 10}, { -1, -1, -1, -1}, }; /* Mute/unmute */ static const struct em28xx_reg_seq compro_unmute_tv_gpio[] = { {EM2820_R08_GPIO_CTRL, 5, 7, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq compro_unmute_svid_gpio[] = { {EM2820_R08_GPIO_CTRL, 4, 7, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq compro_mute_gpio[] = { {EM2820_R08_GPIO_CTRL, 6, 7, 10}, { -1, -1, -1, -1}, }; /* Terratec AV350 */ static const struct em28xx_reg_seq terratec_av350_mute_gpio[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0x7f, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_av350_unmute_gpio[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq silvercrest_reg_seq[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2820_R08_GPIO_CTRL, 0x01, 0xf7, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq vc211a_enable[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0x07, 10}, {EM2820_R08_GPIO_CTRL, 0xff, 0x0f, 10}, {EM2820_R08_GPIO_CTRL, 0xff, 0x0b, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq dikom_dk300_digital[] = { {EM2820_R08_GPIO_CTRL, 0x6e, ~EM_GPIO_4, 10}, {EM2880_R04_GPO, 0x08, 0xff, 10}, { -1, -1, -1, -1}, }; /* Reset for the most [digital] boards */ static const struct em28xx_reg_seq leadership_digital[] = { {EM2874_R80_GPIO_P0_CTRL, 0x70, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq leadership_reset[] = { {EM2874_R80_GPIO_P0_CTRL, 0xf0, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xb0, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xf0, 0xff, 10}, { -1, -1, -1, -1}, }; /* * 2013:024f PCTV nanoStick T2 290e * GPIO_6 - demod reset * GPIO_7 - LED */ static const struct em28xx_reg_seq pctv_290e[] = { {EM2874_R80_GPIO_P0_CTRL, 0x00, 0xff, 80}, {EM2874_R80_GPIO_P0_CTRL, 0x40, 0xff, 80}, /* GPIO_6 = 1 */ {EM2874_R80_GPIO_P0_CTRL, 0xc0, 0xff, 80}, /* GPIO_7 = 1 */ { -1, -1, -1, -1}, }; #if 0 static const struct em28xx_reg_seq terratec_h5_gpio[] = { {EM2820_R08_GPIO_CTRL, 0xff, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xf2, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 50}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_h5_digital[] = { {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xe6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xa6, 0xff, 10}, { -1, -1, -1, -1}, }; #endif /* * 2013:024f PCTV DVB-S2 Stick 460e * GPIO_0 - POWER_ON * GPIO_1 - BOOST * GPIO_2 - VUV_LNB (red LED) * GPIO_3 - EXT_12V * GPIO_4 - INT_DEM (DEMOD GPIO_0) * GPIO_5 - INT_LNB * GPIO_6 - RESET_DEM * GPIO_7 - LED (green LED) */ static const struct em28xx_reg_seq pctv_460e[] = { {EM2874_R80_GPIO_P0_CTRL, 0x01, 0xff, 50}, { 0x0d, 0xff, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0x41, 0xff, 50}, /* GPIO_6=1 */ { 0x0d, 0x42, 0xff, 50}, {EM2874_R80_GPIO_P0_CTRL, 0x61, 0xff, 50}, /* GPIO_5=1 */ { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq c3tech_digital_duo_digital[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xfd, 0xff, 10}, /* xc5000 reset */ {EM2874_R80_GPIO_P0_CTRL, 0xf9, 0xff, 35}, {EM2874_R80_GPIO_P0_CTRL, 0xfd, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xfe, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xbe, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xfe, 0xff, 20}, { -1, -1, -1, -1}, }; /* * 2013:0258 PCTV DVB-S2 Stick (461e) * GPIO 0 = POWER_ON * GPIO 1 = BOOST * GPIO 2 = VUV_LNB (red LED) * GPIO 3 = #EXT_12V * GPIO 4 = INT_DEM * GPIO 5 = INT_LNB * GPIO 6 = #RESET_DEM * GPIO 7 = P07_LED (green LED) */ static const struct em28xx_reg_seq pctv_461e[] = { {EM2874_R80_GPIO_P0_CTRL, 0x7f, 0xff, 0}, {0x0d, 0xff, 0xff, 0}, {EM2874_R80_GPIO_P0_CTRL, 0x3f, 0xff, 100}, /* reset demod */ {EM2874_R80_GPIO_P0_CTRL, 0x7f, 0xff, 200}, /* reset demod */ {0x0d, 0x42, 0xff, 0}, {EM2874_R80_GPIO_P0_CTRL, 0xeb, 0xff, 0}, {EM2874_R5F_TS_ENABLE, 0x84, 0x84, 0}, /* parallel? | null discard */ { -1, -1, -1, -1}, }; #if 0 static const struct em28xx_reg_seq hauppauge_930c_gpio[] = { {EM2874_R80_GPIO_P0_CTRL, 0x6f, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0x4f, 0xff, 10}, /* xc5000 reset */ {EM2874_R80_GPIO_P0_CTRL, 0x6f, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0x4f, 0xff, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq hauppauge_930c_digital[] = { {EM2874_R80_GPIO_P0_CTRL, 0xf6, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xe6, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xa6, 0xff, 10}, { -1, -1, -1, -1}, }; #endif /* * 1b80:e425 MaxMedia UB425-TC * 1b80:e1cc Delock 61959 * GPIO_6 - demod reset, 0=active * GPIO_7 - LED, 0=active */ static const struct em28xx_reg_seq maxmedia_ub425_tc[] = { {EM2874_R80_GPIO_P0_CTRL, 0x83, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xc3, 0xff, 100}, /* GPIO_6 = 1 */ {EM2874_R80_GPIO_P0_CTRL, 0x43, 0xff, 000}, /* GPIO_7 = 0 */ { -1, -1, -1, -1}, }; /* * 2304:0242 PCTV QuatroStick (510e) * GPIO_2: decoder reset, 0=active * GPIO_4: decoder suspend, 0=active * GPIO_6: demod reset, 0=active * GPIO_7: LED, 1=active */ static const struct em28xx_reg_seq pctv_510e[] = { {EM2874_R80_GPIO_P0_CTRL, 0x10, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0x14, 0xff, 100}, /* GPIO_2 = 1 */ {EM2874_R80_GPIO_P0_CTRL, 0x54, 0xff, 050}, /* GPIO_6 = 1 */ { -1, -1, -1, -1}, }; /* * 2013:0251 PCTV QuatroStick nano (520e) * GPIO_2: decoder reset, 0=active * GPIO_4: decoder suspend, 0=active * GPIO_6: demod reset, 0=active * GPIO_7: LED, 1=active */ static const struct em28xx_reg_seq pctv_520e[] = { {EM2874_R80_GPIO_P0_CTRL, 0x10, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0x14, 0xff, 100}, /* GPIO_2 = 1 */ {EM2874_R80_GPIO_P0_CTRL, 0x54, 0xff, 050}, /* GPIO_6 = 1 */ {EM2874_R80_GPIO_P0_CTRL, 0xd4, 0xff, 000}, /* GPIO_7 = 1 */ { -1, -1, -1, -1}, }; /* * 1ae7:9003/9004 SpeedLink Vicious And Devine Laplace webcam * reg 0x80/0x84: * GPIO_0: capturing LED, 0=on, 1=off * GPIO_2: AV mute button, 0=pressed, 1=unpressed * GPIO 3: illumination button, 0=pressed, 1=unpressed * GPIO_6: illumination/flash LED, 0=on, 1=off * reg 0x81/0x85: * GPIO_7: snapshot button, 0=pressed, 1=unpressed */ static const struct em28xx_reg_seq speedlink_vad_laplace_reg_seq[] = { {EM2820_R08_GPIO_CTRL, 0xf7, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xb2, 10}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq pctv_292e[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0}, {0x0d, 0xff, 0xff, 950}, {EM2874_R80_GPIO_P0_CTRL, 0xbd, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xfd, 0xff, 410}, {EM2874_R80_GPIO_P0_CTRL, 0x7d, 0xff, 300}, {EM2874_R80_GPIO_P0_CTRL, 0x7c, 0xff, 60}, {0x0d, 0x42, 0xff, 50}, {EM2874_R5F_TS_ENABLE, 0x85, 0xff, 0}, {-1, -1, -1, -1}, }; static const struct em28xx_reg_seq terratec_t2_stick_hd[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0}, {0x0d, 0xff, 0xff, 600}, {EM2874_R80_GPIO_P0_CTRL, 0xfc, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xbc, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xfc, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0x00, 0xff, 300}, {EM2874_R80_GPIO_P0_CTRL, 0xf8, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xfc, 0xff, 300}, {0x0d, 0x42, 0xff, 1000}, {EM2874_R5F_TS_ENABLE, 0x85, 0xff, 0}, {-1, -1, -1, -1}, }; static const struct em28xx_reg_seq plex_px_bcud[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0}, {0x0d, 0xff, 0xff, 0}, {EM2874_R50_IR_CONFIG, 0x01, 0xff, 0}, {EM28XX_R06_I2C_CLK, 0x40, 0xff, 0}, {EM2874_R80_GPIO_P0_CTRL, 0xfd, 0xff, 100}, {EM28XX_R12_VINENABLE, 0x20, 0x20, 0}, {0x0d, 0x42, 0xff, 1000}, {EM2874_R80_GPIO_P0_CTRL, 0xfc, 0xff, 10}, {EM2874_R80_GPIO_P0_CTRL, 0xfd, 0xff, 10}, {0x73, 0xfd, 0xff, 100}, {-1, -1, -1, -1}, }; /* * 2040:0265 Hauppauge WinTV-dualHD DVB Isoc * 2040:8265 Hauppauge WinTV-dualHD DVB Bulk * 2040:026d Hauppauge WinTV-dualHD ATSC/QAM Isoc * 2040:826d Hauppauge WinTV-dualHD ATSC/QAM Bulk * reg 0x80/0x84: * GPIO_0: Yellow LED tuner 1, 0=on, 1=off * GPIO_1: Green LED tuner 1, 0=on, 1=off * GPIO_2: Yellow LED tuner 2, 0=on, 1=off * GPIO_3: Green LED tuner 2, 0=on, 1=off * GPIO_5: Reset #2, 0=active * GPIO_6: Reset #1, 0=active */ static const struct em28xx_reg_seq hauppauge_dualhd_dvb[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0}, {0x0d, 0xff, 0xff, 200}, {0x50, 0x04, 0xff, 300}, {EM2874_R80_GPIO_P0_CTRL, 0xbf, 0xff, 100}, /* demod 1 reset */ {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xdf, 0xff, 100}, /* demod 2 reset */ {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 100}, {EM2874_R5F_TS_ENABLE, 0x00, 0xff, 50}, /* disable TS filters */ {EM2874_R5D_TS1_PKT_SIZE, 0x05, 0xff, 50}, {EM2874_R5E_TS2_PKT_SIZE, 0x05, 0xff, 50}, {-1, -1, -1, -1}, }; /* Hauppauge USB QuadHD */ static struct em28xx_reg_seq hauppauge_usb_quadhd_atsc_reg_seq[] = { {EM2874_R80_GPIO_P0_CTRL, 0xff, 0xff, 0}, {0x0d, 0xff, 0xff, 200}, {0x50, 0x04, 0xff, 300}, {EM2874_R80_GPIO_P0_CTRL, 0xb0, 0xf0, 100}, /* demod 1 reset */ {EM2874_R80_GPIO_P0_CTRL, 0xf0, 0xf0, 100}, {EM2874_R80_GPIO_P0_CTRL, 0xd0, 0xf0, 100}, /* demod 2 reset */ {EM2874_R80_GPIO_P0_CTRL, 0xf0, 0xf0, 100}, {EM2874_R5F_TS_ENABLE, 0x44, 0xff, 50}, {EM2874_R5D_TS1_PKT_SIZE, 0x05, 0xff, 50}, {EM2874_R5E_TS2_PKT_SIZE, 0x05, 0xff, 50}, {-1, -1, -1, -1}, }; /* * MyGica USB TV Box * GPIO_1,0: 00=Composite audio * 01=Tuner audio * 10=Mute audio * 11=FM radio? (if equipped) * GPIO_2-6: Unused * GPIO_7: ?? */ static const struct em28xx_reg_seq mygica_utv3_composite_audio_gpio[] = { {EM2820_R08_GPIO_CTRL, 0xfc, 0xff, 0}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq mygica_utv3_tuner_audio_gpio[] = { {EM2820_R08_GPIO_CTRL, 0xfd, 0xff, 0}, { -1, -1, -1, -1}, }; static const struct em28xx_reg_seq mygica_utv3_suspend_gpio[] = { {EM2820_R08_GPIO_CTRL, 0xfe, 0xff, 0}, { -1, -1, -1, -1}, }; /* * Button definitions */ static const struct em28xx_button std_snapshot_button[] = { { .role = EM28XX_BUTTON_SNAPSHOT, .reg_r = EM28XX_R0C_USBSUSP, .reg_clearing = EM28XX_R0C_USBSUSP, .mask = EM28XX_R0C_USBSUSP_SNAPSHOT, .inverted = 0, }, {-1, 0, 0, 0, 0}, }; static const struct em28xx_button speedlink_vad_laplace_buttons[] = { { .role = EM28XX_BUTTON_SNAPSHOT, .reg_r = EM2874_R85_GPIO_P1_STATE, .mask = 0x80, .inverted = 1, }, { .role = EM28XX_BUTTON_ILLUMINATION, .reg_r = EM2874_R84_GPIO_P0_STATE, .mask = 0x08, .inverted = 1, }, {-1, 0, 0, 0, 0}, }; /* * LED definitions */ static struct em28xx_led speedlink_vad_laplace_leds[] = { { .role = EM28XX_LED_ANALOG_CAPTURING, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = 0x01, .inverted = 1, }, { .role = EM28XX_LED_ILLUMINATION, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = 0x40, .inverted = 1, }, {-1, 0, 0, 0}, }; static struct em28xx_led kworld_ub435q_v3_leds[] = { { .role = EM28XX_LED_DIGITAL_CAPTURING, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = 0x80, .inverted = 1, }, {-1, 0, 0, 0}, }; static struct em28xx_led pctv_80e_leds[] = { { .role = EM28XX_LED_DIGITAL_CAPTURING, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = 0x80, .inverted = 0, }, {-1, 0, 0, 0}, }; static struct em28xx_led terratec_grabby_leds[] = { { .role = EM28XX_LED_ANALOG_CAPTURING, .gpio_reg = EM2820_R08_GPIO_CTRL, .gpio_mask = EM_GPIO_3, .inverted = 1, }, {-1, 0, 0, 0}, }; static struct em28xx_led hauppauge_dualhd_leds[] = { { .role = EM28XX_LED_DIGITAL_CAPTURING, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = EM_GPIO_1, .inverted = 1, }, { .role = EM28XX_LED_DIGITAL_CAPTURING_TS2, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = EM_GPIO_3, .inverted = 1, }, {-1, 0, 0, 0}, }; static struct em28xx_led hauppauge_usb_quadhd_leds[] = { { .role = EM28XX_LED_DIGITAL_CAPTURING, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = EM_GPIO_2, .inverted = 1, }, { .role = EM28XX_LED_DIGITAL_CAPTURING_TS2, .gpio_reg = EM2874_R80_GPIO_P0_CTRL, .gpio_mask = EM_GPIO_0, .inverted = 1, }, {-1, 0, 0, 0}, }; /* * Board definitions */ const struct em28xx_board em28xx_boards[] = { [EM2750_BOARD_UNKNOWN] = { .name = "EM2710/EM2750/EM2751 webcam grabber", .xclk = EM28XX_XCLK_FREQUENCY_20MHZ, .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = 0, .amux = EM28XX_AMUX_VIDEO, .gpio = silvercrest_reg_seq, } }, }, [EM2800_BOARD_UNKNOWN] = { .name = "Unknown EM2800 video grabber", .is_em2800 = 1, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .tuner_type = TUNER_ABSENT, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_UNKNOWN] = { .name = "Unknown EM2750/28xx video grabber", .tuner_type = TUNER_ABSENT, .is_webcam = 1, /* To enable sensor probe */ }, [EM2882_BOARD_ZOLID_HYBRID_TV_STICK] = { .name = ":ZOLID HYBRID TV STICK", .tuner_type = TUNER_XC2028, .tuner_gpio = zolid_tuner, .decoder = EM28XX_TVP5150, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = zolid_digital, }, [EM2750_BOARD_DLCW_130] = { /* Beijing Huaqi Information Digital Technology Co., Ltd */ .name = "Huaqi DLCW-130", .valid = EM28XX_BOARD_NOT_VALIDATED, .xclk = EM28XX_XCLK_FREQUENCY_48MHZ, .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = 0, .amux = EM28XX_AMUX_VIDEO, } }, }, [EM2820_BOARD_KWORLD_PVRTV2800RF] = { .name = "Kworld PVR TV 2800 RF", .tuner_type = TUNER_TEMIC_PAL, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_GADMEI_TVR200] = { .name = "Gadmei TVR200", .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_TERRATEC_CINERGY_250] = { .name = "Terratec Cinergy 250 USB", .tuner_type = TUNER_LG_PAL_NEW_TAPC, .has_ir_i2c = 1, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_PINNACLE_USB_2] = { .name = "Pinnacle PCTV USB 2", .tuner_type = TUNER_LG_PAL_NEW_TAPC, .has_ir_i2c = 1, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_HAUPPAUGE_WINTV_USB_2] = { .name = "Hauppauge WinTV USB 2", .tuner_type = TUNER_PHILIPS_FM1236_MK3, .tda9887_conf = TDA9887_PRESENT | TDA9887_PORT1_ACTIVE | TDA9887_PORT2_ACTIVE, .decoder = EM28XX_TVP5150, .has_msp34xx = 1, .has_ir_i2c = 1, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = MSP_INPUT_DEFAULT, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = MSP_INPUT(MSP_IN_SCART1, MSP_IN_TUNER1, MSP_DSP_IN_SCART, MSP_DSP_IN_SCART), } }, }, [EM2820_BOARD_DLINK_USB_TV] = { .name = "D-Link DUB-T210 TV Tuner", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_HERCULES_SMART_TV_USB2] = { .name = "Hercules Smart TV USB 2.0", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_PINNACLE_USB_2_FM1216ME] = { .name = "Pinnacle PCTV USB 2 (Philips FM1216ME)", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_PHILIPS_FM1216ME_MK3, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_GADMEI_UTV310] = { .name = "Gadmei UTV310", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_TNF_5335MF, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_LEADTEK_WINFAST_USBII_DELUXE] = { .name = "Leadtek Winfast USB II Deluxe", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_PHILIPS_FM1216ME_MK3, .has_ir_i2c = 1, .tvaudio_addr = 0x58, .tda9887_conf = TDA9887_PRESENT | TDA9887_PORT2_ACTIVE | TDA9887_QSS, .decoder = EM28XX_SAA711X, .adecoder = EM28XX_TVAUDIO, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE4, .amux = EM28XX_AMUX_AUX, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE5, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, .radio = { .type = EM28XX_RADIO, .amux = EM28XX_AMUX_AUX, } }, [EM2820_BOARD_VIDEOLOGY_20K14XUSB] = { .name = "Videology 20K14XUSB USB2.0", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = 0, .amux = EM28XX_AMUX_VIDEO, } }, }, [EM2820_BOARD_SILVERCREST_WEBCAM] = { .name = "Silvercrest Webcam 1.3mpix", .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = 0, .amux = EM28XX_AMUX_VIDEO, .gpio = silvercrest_reg_seq, } }, }, [EM2821_BOARD_SUPERCOMP_USB_2] = { .name = "Supercomp USB 2.0 TV", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_PHILIPS_FM1236_MK3, .tda9887_conf = TDA9887_PRESENT | TDA9887_PORT1_ACTIVE | TDA9887_PORT2_ACTIVE, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2821_BOARD_USBGEAR_VD204] = { .name = "Usbgear VD204v9", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_ABSENT, /* Capture only device */ .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2860_BOARD_NETGMBH_CAM] = { /* Beijing Huaqi Information Digital Technology Co., Ltd */ .name = "NetGMBH Cam", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = 0, .amux = EM28XX_AMUX_VIDEO, } }, }, [EM2860_BOARD_TYPHOON_DVD_MAKER] = { .name = "Typhoon DVD Maker", .decoder = EM28XX_SAA711X, .tuner_type = TUNER_ABSENT, /* Capture only device */ .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2860_BOARD_GADMEI_UTV330] = { .name = "Gadmei UTV330", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_TNF_5335MF, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2861_BOARD_GADMEI_UTV330PLUS] = { .name = "Gadmei UTV330+", .tuner_type = TUNER_TNF_5335MF, .tda9887_conf = TDA9887_PRESENT, .ir_codes = RC_MAP_GADMEI_RM008Z, .decoder = EM28XX_SAA711X, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2860_BOARD_TERRATEC_HYBRID_XS] = { .name = "Terratec Cinergy A Hybrid XS", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2861_BOARD_KWORLD_PVRTV_300U] = { .name = "KWorld PVRTV 300U", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2861_BOARD_YAKUMO_MOVIE_MIXER] = { .name = "Yakumo MovieMixer", .tuner_type = TUNER_ABSENT, /* Capture only device */ .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2860_BOARD_TVP5150_REFERENCE_DESIGN] = { .name = "EM2860/TVP5150 Reference Design", .tuner_type = TUNER_ABSENT, /* Capture only device */ .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2861_BOARD_PLEXTOR_PX_TV100U] = { .name = "Plextor ConvertX PX-TV100U", .tuner_type = TUNER_TNF_5335MF, .xclk = EM28XX_XCLK_I2S_MSB_TIMING | EM28XX_XCLK_FREQUENCY_12MHZ, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_TVP5150, .has_msp34xx = 1, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, .gpio = pinnacle_hybrid_pro_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = pinnacle_hybrid_pro_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = pinnacle_hybrid_pro_analog, } }, }, /* Those boards with em2870 are DVB Only*/ [EM2870_BOARD_TERRATEC_XS] = { .name = "Terratec Cinergy T XS", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, }, [EM2870_BOARD_TERRATEC_XS_MT2060] = { .name = "Terratec Cinergy T XS (MT2060)", .xclk = EM28XX_XCLK_IR_RC5_MODE | EM28XX_XCLK_FREQUENCY_12MHZ, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE, .tuner_type = TUNER_ABSENT, /* MT2060 */ .has_dvb = 1, .tuner_gpio = default_tuner_gpio, }, [EM2870_BOARD_KWORLD_350U] = { .name = "Kworld 350 U DVB-T", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, }, [EM2870_BOARD_KWORLD_355U] = { .name = "Kworld 355 U DVB-T", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_ABSENT, .tuner_gpio = default_tuner_gpio, .has_dvb = 1, .dvb_gpio = default_digital, }, [EM2870_BOARD_PINNACLE_PCTV_DVB] = { .name = "Pinnacle PCTV DVB-T", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_ABSENT, /* MT2060 */ /* djh - I have serious doubts this is right... */ .xclk = EM28XX_XCLK_IR_RC5_MODE | EM28XX_XCLK_FREQUENCY_10MHZ, }, [EM2870_BOARD_COMPRO_VIDEOMATE] = { .name = "Compro, VideoMate U3", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_ABSENT, /* MT2060 */ }, [EM2880_BOARD_TERRATEC_HYBRID_XS_FR] = { .name = "Terratec Hybrid XS Secam", .has_msp34xx = 1, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .has_dvb = 1, .dvb_gpio = terratec_cinergy_USB_XS_FR_digital, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = terratec_cinergy_USB_XS_FR_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_cinergy_USB_XS_FR_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_cinergy_USB_XS_FR_analog, } }, }, [EM2884_BOARD_TERRATEC_H5] = { .name = "Terratec Cinergy H5", .has_dvb = 1, #if 0 .tuner_type = TUNER_PHILIPS_TDA8290, .tuner_addr = 0x41, .dvb_gpio = terratec_h5_digital, /* FIXME: probably wrong */ .tuner_gpio = terratec_h5_gpio, #else .tuner_type = TUNER_ABSENT, #endif .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, [EM2884_BOARD_TERRATEC_H6] = { .name = "Terratec Cinergy H6 rev. 2", .has_dvb = 1, .ir_codes = RC_MAP_NEC_TERRATEC_CINERGY_XS, #if 0 .tuner_type = TUNER_PHILIPS_TDA8290, .tuner_addr = 0x41, .dvb_gpio = terratec_h5_digital, /* FIXME: probably wrong */ .tuner_gpio = terratec_h5_gpio, #else .tuner_type = TUNER_ABSENT, #endif .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, [EM2884_BOARD_HAUPPAUGE_WINTV_HVR_930C] = { .name = "Hauppauge WinTV HVR 930C", .has_dvb = 1, #if 0 /* FIXME: Add analog support */ .tuner_type = TUNER_XC5000, .tuner_addr = 0x41, .dvb_gpio = hauppauge_930c_digital, .tuner_gpio = hauppauge_930c_gpio, #else .tuner_type = TUNER_ABSENT, #endif .ir_codes = RC_MAP_HAUPPAUGE, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, [EM2884_BOARD_C3TECH_DIGITAL_DUO] = { .name = "C3 Tech Digital Duo HDTV/SDTV USB", .has_dvb = 1, /* FIXME: Add analog support - need a saa7136 driver */ .tuner_type = TUNER_ABSENT, /* Digital-only TDA18271HD */ .ir_codes = RC_MAP_EMPTY, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE, .dvb_gpio = c3tech_digital_duo_digital, }, [EM2884_BOARD_CINERGY_HTC_STICK] = { .name = "Terratec Cinergy HTC Stick", .has_dvb = 1, .ir_codes = RC_MAP_NEC_TERRATEC_CINERGY_XS, .tuner_type = TUNER_ABSENT, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, [EM2884_BOARD_ELGATO_EYETV_HYBRID_2008] = { .name = "Elgato EyeTV Hybrid 2008 INT", .has_dvb = 1, .ir_codes = RC_MAP_NEC_TERRATEC_CINERGY_XS, .tuner_type = TUNER_ABSENT, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, [EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900] = { .name = "Hauppauge WinTV HVR 900", .tda9887_conf = TDA9887_PRESENT, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900_digital, .ir_codes = RC_MAP_HAUPPAUGE, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900_R2] = { .name = "Hauppauge WinTV HVR 900 (R2)", .tda9887_conf = TDA9887_PRESENT, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900R2_digital, .ir_codes = RC_MAP_HAUPPAUGE, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850] = { .name = "Hauppauge WinTV HVR 850", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900_digital, .ir_codes = RC_MAP_HAUPPAUGE, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950] = { .name = "Hauppauge WinTV HVR 950", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900_digital, .ir_codes = RC_MAP_HAUPPAUGE, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2880_BOARD_PINNACLE_PCTV_HD_PRO] = { .name = "Pinnacle PCTV HD Pro Stick", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900_digital, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600] = { .name = "AMD ATI TV Wonder HD 600", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900_digital, .ir_codes = RC_MAP_ATI_TV_WONDER_HD_600, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2880_BOARD_TERRATEC_HYBRID_XS] = { .name = "Terratec Hybrid XS", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .has_dvb = 1, .dvb_gpio = default_digital, .ir_codes = RC_MAP_TERRATEC_CINERGY_XS, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */ .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = default_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, } }, }, /* * maybe there's a reason behind it why Terratec sells the Hybrid XS * as Prodigy XS with a different PID, let's keep it separated for now * maybe we'll need it later on */ [EM2880_BOARD_TERRATEC_PRODIGY_XS] = { .name = "Terratec Prodigy XS", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2820_BOARD_MSI_VOX_USB_2] = { .name = "MSI VOX USB 2.0", .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tda9887_conf = TDA9887_PRESENT | TDA9887_PORT1_ACTIVE | TDA9887_PORT2_ACTIVE, .max_range_640_480 = 1, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE4, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2800_BOARD_TERRATEC_CINERGY_200] = { .name = "Terratec Cinergy 200 USB", .is_em2800 = 1, .has_ir_i2c = 1, .tuner_type = TUNER_LG_TALN, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2800_BOARD_GRABBEEX_USB2800] = { .name = "eMPIA Technology, Inc. GrabBeeX+ Video Encoder", .is_em2800 = 1, .decoder = EM28XX_SAA711X, .tuner_type = TUNER_ABSENT, /* capture only board */ .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2800_BOARD_VC211A] = { .name = "Actionmaster/LinXcel/Digitus VC211A", .is_em2800 = 1, .tuner_type = TUNER_ABSENT, /* Capture-only board */ .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, .gpio = vc211a_enable, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, .gpio = vc211a_enable, } }, }, [EM2800_BOARD_LEADTEK_WINFAST_USBII] = { .name = "Leadtek Winfast USB II", .is_em2800 = 1, .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2800_BOARD_KWORLD_USB2800] = { .name = "Kworld USB2800", .is_em2800 = 1, .tuner_type = TUNER_PHILIPS_FCV1236D, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_PINNACLE_DVC_90] = { .name = "Pinnacle Dazzle DVC 90/100/101/107 / Kaiser Baas Video to DVD maker / Kworld DVD Maker 2 / Plextor ConvertX PX-AV100U", .tuner_type = TUNER_ABSENT, /* capture only board */ .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2800_BOARD_VGEAR_POCKETTV] = { .name = "V-Gear PocketTV", .is_em2800 = 1, .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_PROLINK_PLAYTV_BOX4_USB2] = { .name = "Pixelview PlayTV Box 4 USB 2.0", .tda9887_conf = TDA9887_PRESENT, .tuner_type = TUNER_YMEC_TVF_5533MF, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, .aout = EM28XX_AOUT_MONO | /* I2S */ EM28XX_AOUT_MASTER, /* Line out pin */ }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_PROLINK_PLAYTV_USB2] = { .name = "SIIG AVTuner-PVR / Pixelview Prolink PlayTV USB 2.0", .buttons = std_snapshot_button, .tda9887_conf = TDA9887_PRESENT, .tuner_type = TUNER_YMEC_TVF_5533MF, .tuner_addr = 0x60, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, .aout = EM28XX_AOUT_MONO | /* I2S */ EM28XX_AOUT_MASTER, /* Line out pin */ }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2860_BOARD_SAA711X_REFERENCE_DESIGN] = { .name = "EM2860/SAA711X Reference Design", .buttons = std_snapshot_button, .tuner_type = TUNER_ABSENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, } }, }, [EM2874_BOARD_LEADERSHIP_ISDBT] = { .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_100_KHZ, .xclk = EM28XX_XCLK_FREQUENCY_10MHZ, .name = "EM2874 Leadership ISDBT", .tuner_type = TUNER_ABSENT, .tuner_gpio = leadership_reset, .dvb_gpio = leadership_digital, .has_dvb = 1, }, [EM2880_BOARD_MSI_DIGIVOX_AD] = { .name = "MSI DigiVox A/D", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = em2880_msi_digivox_ad_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = em2880_msi_digivox_ad_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = em2880_msi_digivox_ad_analog, } }, }, [EM2880_BOARD_MSI_DIGIVOX_AD_II] = { .name = "MSI DigiVox A/D II", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = em2880_msi_digivox_ad_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = em2880_msi_digivox_ad_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = em2880_msi_digivox_ad_analog, } }, }, [EM2880_BOARD_KWORLD_DVB_305U] = { .name = "KWorld DVB-T 305U", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2880_BOARD_KWORLD_DVB_310U] = { .name = "KWorld DVB-T 310U", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .has_dvb = 1, .dvb_gpio = default_digital, .mts_firmware = 1, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = default_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, }, { /* S-video has not been tested yet */ .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, } }, }, [EM2882_BOARD_KWORLD_ATSC_315U] = { .name = "KWorld ATSC 315U HDTV TV Box", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_THOMSON_DTT761X, .tuner_gpio = em2882_kworld_315u_tuner_gpio, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_SAA711X, .has_dvb = 1, .dvb_gpio = em2882_kworld_315u_digital, .ir_codes = RC_MAP_KWORLD_315U, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE, #if 0 /* FIXME: Analog mode - still not ready */ .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, .gpio = em2882_kworld_315u_analog, .aout = EM28XX_AOUT_PCM_IN | EM28XX_AOUT_PCM_STEREO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, .gpio = em2882_kworld_315u_analog1, .aout = EM28XX_AOUT_PCM_IN | EM28XX_AOUT_PCM_STEREO, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, .gpio = em2882_kworld_315u_analog1, .aout = EM28XX_AOUT_PCM_IN | EM28XX_AOUT_PCM_STEREO, } }, #endif }, [EM2880_BOARD_EMPIRE_DUAL_TV] = { .name = "Empire dual TV", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .has_dvb = 1, .dvb_gpio = default_digital, .mts_firmware = 1, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = default_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, } }, }, [EM2881_BOARD_DNT_DA2_HYBRID] = { .name = "DNT DA2 Hybrid", .valid = EM28XX_BOARD_NOT_VALIDATED, .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = default_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = default_analog, } }, }, [EM2881_BOARD_PINNACLE_HYBRID_PRO] = { .name = "Pinnacle Hybrid Pro", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .has_dvb = 1, .dvb_gpio = pinnacle_hybrid_pro_digital, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = pinnacle_hybrid_pro_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = pinnacle_hybrid_pro_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = pinnacle_hybrid_pro_analog, } }, }, [EM2882_BOARD_PINNACLE_HYBRID_PRO_330E] = { .name = "Pinnacle Hybrid Pro (330e)", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900R2_digital, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2882_BOARD_KWORLD_VS_DVBT] = { .name = "Kworld VS-DVB-T 323UR", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = kworld_330u_digital, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */ .ir_codes = RC_MAP_KWORLD_315U, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2882_BOARD_TERRATEC_HYBRID_XS] = { .name = "Terratec Cinergy Hybrid T USB XS (em2882)", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .mts_firmware = 1, .decoder = EM28XX_TVP5150, .has_dvb = 1, .dvb_gpio = hauppauge_wintv_hvr_900_digital, .ir_codes = RC_MAP_TERRATEC_CINERGY_XS, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = hauppauge_wintv_hvr_900_analog, } }, }, [EM2882_BOARD_DIKOM_DK300] = { .name = "Dikom DK300", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = dikom_dk300_digital, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = default_analog, } }, }, [EM2883_BOARD_KWORLD_HYBRID_330U] = { .name = "Kworld PlusTV HD Hybrid 330", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = kworld_330u_digital, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_EEPROM_ON_BOARD | EM28XX_I2C_EEPROM_KEY_VALID, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = kworld_330u_analog, .aout = EM28XX_AOUT_PCM_IN | EM28XX_AOUT_PCM_STEREO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = kworld_330u_analog, .aout = EM28XX_AOUT_PCM_IN | EM28XX_AOUT_PCM_STEREO, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = kworld_330u_analog, } }, }, [EM2820_BOARD_COMPRO_VIDEOMATE_FORYOU] = { .name = "Compro VideoMate ForYou/Stereo", .tuner_type = TUNER_LG_PAL_NEW_TAPC, .tvaudio_addr = 0xb0, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_TVP5150, .adecoder = EM28XX_TVAUDIO, .mute_gpio = compro_mute_gpio, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = compro_unmute_tv_gpio, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = compro_unmute_svid_gpio, } }, }, [EM2860_BOARD_KAIOMY_TVNPC_U2] = { .name = "Kaiomy TVnPC U2", .vchannels = 3, .tuner_type = TUNER_XC2028, .tuner_addr = 0x61, .mts_firmware = 1, .decoder = EM28XX_TVP5150, .tuner_gpio = default_tuner_gpio, .ir_codes = RC_MAP_KAIOMY, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, .radio = { .type = EM28XX_RADIO, .amux = EM28XX_AMUX_LINE_IN, } }, [EM2860_BOARD_EASYCAP] = { .name = "Easy Cap Capture DC-60", .vchannels = 2, .tuner_type = TUNER_ABSENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2820_BOARD_IODATA_GVMVP_SZ] = { .name = "IO-DATA GV-MVP/SZ", .tuner_type = TUNER_PHILIPS_FM1236_MK3, .tuner_gpio = default_tuner_gpio, .tda9887_conf = TDA9887_PRESENT, .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, }, { /* Composite has not been tested yet */ .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_VIDEO, }, { /* S-video has not been tested yet */ .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_VIDEO, } }, }, [EM2860_BOARD_TERRATEC_GRABBY] = { .name = "Terratec Grabby", .vchannels = 2, .tuner_type = TUNER_ABSENT, .decoder = EM28XX_SAA711X, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, .buttons = std_snapshot_button, .leds = terratec_grabby_leds, }, [EM2860_BOARD_TERRATEC_AV350] = { .name = "Terratec AV350", .vchannels = 2, .tuner_type = TUNER_ABSENT, .decoder = EM28XX_TVP5150, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, .mute_gpio = terratec_av350_mute_gpio, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = terratec_av350_unmute_gpio, } }, }, [EM2860_BOARD_ELGATO_VIDEO_CAPTURE] = { .name = "Elgato Video Capture", .decoder = EM28XX_SAA711X, .tuner_type = TUNER_ABSENT, /* Capture only device */ .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, [EM2882_BOARD_EVGA_INDTUBE] = { .name = "Evga inDtube", .tuner_type = TUNER_XC2028, .tuner_gpio = default_tuner_gpio, .decoder = EM28XX_TVP5150, .xclk = EM28XX_XCLK_FREQUENCY_12MHZ, /* NEC IR */ .mts_firmware = 1, .has_dvb = 1, .dvb_gpio = evga_indtube_digital, .ir_codes = RC_MAP_EVGA_INDTUBE, .input = { { .type = EM28XX_VMUX_TELEVISION, .vmux = TVP5150_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = evga_indtube_analog, }, { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, .gpio = evga_indtube_analog, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, .gpio = evga_indtube_analog, } }, }, /* * eb1a:2868 Empia EM2870 + Philips CU1216L NIM * (Philips TDA10023 + Infineon TUA6034) */ [EM2870_BOARD_REDDO_DVB_C_USB_BOX] = { .name = "Reddo DVB-C USB TV Box", .tuner_type = TUNER_ABSENT, .tuner_gpio = reddo_dvb_c_usb_box, .has_dvb = 1, }, /* * 1b80:a340 - Empia EM2870, NXP TDA18271HD and LG DT3304, sold * initially as the KWorld PlusTV 340U, then as the UB435-Q. * Early variants have a TDA18271HD/C1, later ones a TDA18271HD/C2 */ [EM2870_BOARD_KWORLD_A340] = { .name = "KWorld PlusTV 340U or UB435-Q (ATSC)", .tuner_type = TUNER_ABSENT, /* Digital-only TDA18271HD */ .has_dvb = 1, .dvb_gpio = kworld_a340_digital, .tuner_gpio = default_tuner_gpio, }, /* * 2013:024f PCTV nanoStick T2 290e. * Empia EM28174, Sony CXD2820R and NXP TDA18271HD/C2 */ [EM28174_BOARD_PCTV_290E] = { .name = "PCTV nanoStick T2 290e", .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_100_KHZ, .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_290e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, }, /* * 2013:024f PCTV DVB-S2 Stick 460e * Empia EM28174, NXP TDA10071, Conexant CX24118A and Allegro A8293 */ [EM28174_BOARD_PCTV_460E] = { .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .name = "PCTV DVB-S2 Stick (460e)", .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_460e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, }, /* * eb1a:5006 Honestech VIDBOX NW03 * Empia EM2860, Philips SAA7113, Empia EMP202, No Tuner */ [EM2860_BOARD_HT_VIDBOX_NW03] = { .name = "Honestech Vidbox NW03", .tuner_type = TUNER_ABSENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, /* S-VIDEO needs check */ .amux = EM28XX_AMUX_LINE_IN, } }, }, /* * 1b80:e425 MaxMedia UB425-TC * Empia EM2874B + Micronas DRX 3913KA2 + NXP TDA18271HDC2 */ [EM2874_BOARD_MAXMEDIA_UB425_TC] = { .name = "MaxMedia UB425-TC", .tuner_type = TUNER_ABSENT, .tuner_gpio = maxmedia_ub425_tc, .has_dvb = 1, .ir_codes = RC_MAP_REDDO, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, /* * 2304:0242 PCTV QuatroStick (510e) * Empia EM2884 + Micronas DRX 3926K + NXP TDA18271HDC2 */ [EM2884_BOARD_PCTV_510E] = { .name = "PCTV QuatroStick (510e)", .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_510e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, /* * 2013:0251 PCTV QuatroStick nano (520e) * Empia EM2884 + Micronas DRX 3926K + NXP TDA18271HDC2 */ [EM2884_BOARD_PCTV_520E] = { .name = "PCTV QuatroStick nano (520e)", .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_520e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, [EM2884_BOARD_TERRATEC_HTC_USB_XS] = { .name = "Terratec Cinergy HTC USB XS", .has_dvb = 1, .ir_codes = RC_MAP_NEC_TERRATEC_CINERGY_XS, .tuner_type = TUNER_ABSENT, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, /* * 1b80:e1cc Delock 61959 * Empia EM2874B + Micronas DRX 3913KA2 + NXP TDA18271HDC2 * mostly the same as MaxMedia UB-425-TC but different remote */ [EM2874_BOARD_DELOCK_61959] = { .name = "Delock 61959", .tuner_type = TUNER_ABSENT, .tuner_gpio = maxmedia_ub425_tc, .has_dvb = 1, .ir_codes = RC_MAP_DELOCK_61959, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, }, /* * 1b80:e346 KWorld USB ATSC TV Stick UB435-Q V2 * Empia EM2874B + LG DT3305 + NXP TDA18271HDC2 */ [EM2874_BOARD_KWORLD_UB435Q_V2] = { .name = "KWorld USB ATSC TV Stick UB435-Q V2", .tuner_type = TUNER_ABSENT, .has_dvb = 1, .dvb_gpio = kworld_a340_digital, .tuner_gpio = default_tuner_gpio, .def_i2c_bus = 1, }, /* * 1b80:e34c KWorld USB ATSC TV Stick UB435-Q V3 * Empia EM2874B + LG DT3305 + NXP TDA18271HDC2 */ [EM2874_BOARD_KWORLD_UB435Q_V3] = { .name = "KWorld USB ATSC TV Stick UB435-Q V3", .tuner_type = TUNER_ABSENT, .has_dvb = 1, .tuner_gpio = kworld_ub435q_v3_digital, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_100_KHZ, .leds = kworld_ub435q_v3_leds, }, [EM2874_BOARD_PCTV_HD_MINI_80E] = { .name = "Pinnacle PCTV HD Mini", .tuner_type = TUNER_ABSENT, .has_dvb = 1, .dvb_gpio = em2874_pctv_80e_digital, .decoder = EM28XX_NODECODER, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, .leds = pctv_80e_leds, }, /* * 1ae7:9003/9004 SpeedLink Vicious And Devine Laplace webcam * Empia EM2765 + OmniVision OV2640 */ [EM2765_BOARD_SPEEDLINK_VAD_LAPLACE] = { .name = "SpeedLink Vicious And Devine Laplace webcam", .xclk = EM28XX_XCLK_FREQUENCY_24MHZ, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_100_KHZ, .def_i2c_bus = 1, .tuner_type = TUNER_ABSENT, .is_webcam = 1, .input = { { .type = EM28XX_VMUX_COMPOSITE, .amux = EM28XX_AMUX_VIDEO, .gpio = speedlink_vad_laplace_reg_seq, } }, .buttons = speedlink_vad_laplace_buttons, .leds = speedlink_vad_laplace_leds, }, /* * 2013:0258 PCTV DVB-S2 Stick (461e) * Empia EM28178, Montage M88DS3103, Montage M88TS2022, Allegro A8293 */ [EM28178_BOARD_PCTV_461E] = { .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .name = "PCTV DVB-S2 Stick (461e)", .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_461e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, }, /* * 2013:0259 PCTV DVB-S2 Stick (461e_v2) * Empia EM28178, Montage M88DS3103b, Montage M88TS2022, Allegro A8293 */ [EM28178_BOARD_PCTV_461E_V2] = { .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .name = "PCTV DVB-S2 Stick (461e v2)", .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_461e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, }, /* * 2013:025f PCTV tripleStick (292e). * Empia EM28178, Silicon Labs Si2168, Silicon Labs Si2157 */ [EM28178_BOARD_PCTV_292E] = { .name = "PCTV tripleStick (292e)", .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .tuner_type = TUNER_ABSENT, .tuner_gpio = pctv_292e, .has_dvb = 1, .ir_codes = RC_MAP_PINNACLE_PCTV_HD, }, [EM2861_BOARD_LEADTEK_VC100] = { .name = "Leadtek VC100", .tuner_type = TUNER_ABSENT, /* Capture only device */ .decoder = EM28XX_TVP5150, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = TVP5150_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, /* * eb1a:8179 Terratec Cinergy T2 Stick HD. * Empia EM28178, Silicon Labs Si2168, Silicon Labs Si2146 */ [EM28178_BOARD_TERRATEC_T2_STICK_HD] = { .name = "Terratec Cinergy T2 Stick HD", .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .tuner_type = TUNER_ABSENT, .tuner_gpio = terratec_t2_stick_hd, .has_dvb = 1, .ir_codes = RC_MAP_TERRATEC_SLIM_2, }, /* * 3275:0085 PLEX PX-BCUD. * Empia EM28178, TOSHIBA TC90532XBG, Sharp QM1D1C0042 */ [EM28178_BOARD_PLEX_PX_BCUD] = { .name = "PLEX PX-BCUD", .xclk = EM28XX_XCLK_FREQUENCY_4_3MHZ, .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE, .tuner_type = TUNER_ABSENT, .tuner_gpio = plex_px_bcud, .has_dvb = 1, }, /* * 2040:0265 Hauppauge WinTV-dualHD (DVB version) Isoc. * 2040:8265 Hauppauge WinTV-dualHD (DVB version) Bulk. * Empia EM28274, 2x Silicon Labs Si2168, 2x Silicon Labs Si2157 */ [EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_DVB] = { .name = "Hauppauge WinTV-dualHD DVB", .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .tuner_type = TUNER_SI2157, .tuner_gpio = hauppauge_dualhd_dvb, .has_dvb = 1, .has_dual_ts = 1, .ir_codes = RC_MAP_HAUPPAUGE, .leds = hauppauge_dualhd_leds, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = TVP5150_COMPOSITE1, .amux = EM28XX_AMUX_LINE_IN, } }, }, /* * 2040:026d Hauppauge WinTV-dualHD (model 01595 - ATSC/QAM) Isoc. * 2040:826d Hauppauge WinTV-dualHD (model 01595 - ATSC/QAM) Bulk. * Empia EM28274, 2x LG LGDT3306A, 2x Silicon Labs Si2157 */ [EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_01595] = { .name = "Hauppauge WinTV-dualHD 01595 ATSC/QAM", .def_i2c_bus = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_400_KHZ, .tuner_type = TUNER_ABSENT, .tuner_gpio = hauppauge_dualhd_dvb, .has_dvb = 1, .has_dual_ts = 1, .ir_codes = RC_MAP_HAUPPAUGE, .leds = hauppauge_dualhd_leds, }, /* * 1b80:e349 Magix USB Videowandler-2 * (same chips as Honestech VIDBOX NW03) * Empia EM2860, Philips SAA7113, Empia EMP202, No Tuner */ [EM2861_BOARD_MAGIX_VIDEOWANDLER2] = { .name = "Magix USB Videowandler-2", .tuner_type = TUNER_ABSENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .amux = EM28XX_AMUX_LINE_IN, } }, }, /* * 1f4d:1abe MyGica iGrabber * (same as several other EM2860 devices) * Empia EM2860, Philips SAA7113, Empia EMP202, No Tuner */ [EM2860_BOARD_MYGICA_IGRABBER] = { .name = "MyGica iGrabber", .vchannels = 2, .tuner_type = TUNER_ABSENT, .decoder = EM28XX_SAA711X, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_LINE_IN, }, { .type = EM28XX_VMUX_SVIDEO, .vmux = SAA7115_SVIDEO3, .amux = EM28XX_AMUX_LINE_IN, } }, }, /* 2040:826d Hauppauge USB QuadHD * Empia 28274, Max Linear 692 ATSC combo demod/tuner */ [EM2874_BOARD_HAUPPAUGE_USB_QUADHD] = { .name = "Hauppauge USB QuadHD ATSC", .def_i2c_bus = 1, .has_dual_ts = 1, .has_dvb = 1, .i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_100_KHZ, .tuner_type = TUNER_ABSENT, .tuner_gpio = hauppauge_usb_quadhd_atsc_reg_seq, .leds = hauppauge_usb_quadhd_leds, }, /* * eb1a:2860 MyGica UTV3 Analog USB2.0 TV Box * Empia EM2860, Philips SAA7113, NXP TDA9801T demod, * Tena TNF931D-DFDR1 tuner (contains NXP TDA6509A), * ST HCF4052 demux (switches audio to line out), * no audio over USB */ [EM2860_BOARD_MYGICA_UTV3] = { .name = "MyGica UTV3 Analog USB2.0 TV Box", .xclk = EM28XX_XCLK_IR_RC5_MODE | EM28XX_XCLK_FREQUENCY_12MHZ, .tuner_type = TUNER_TENA_TNF_931D_DFDR1, .ir_codes = RC_MAP_MYGICA_UTV3, .decoder = EM28XX_SAA711X, .suspend_gpio = mygica_utv3_suspend_gpio, .input = { { .type = EM28XX_VMUX_COMPOSITE, .vmux = SAA7115_COMPOSITE0, .amux = EM28XX_AMUX_VIDEO, .gpio = mygica_utv3_composite_audio_gpio, }, { .type = EM28XX_VMUX_TELEVISION, .vmux = SAA7115_COMPOSITE2, .amux = EM28XX_AMUX_VIDEO, .gpio = mygica_utv3_tuner_audio_gpio, } }, }, }; EXPORT_SYMBOL_GPL(em28xx_boards); static const unsigned int em28xx_bcount = ARRAY_SIZE(em28xx_boards); /* table of devices that work with this driver */ struct usb_device_id em28xx_id_table[] = { { USB_DEVICE(0xeb1a, 0x2750), .driver_info = EM2750_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2751), .driver_info = EM2750_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2800), .driver_info = EM2800_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2710), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2820), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2821), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2860), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2861), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2862), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2863), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2870), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2881), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2883), /* used by :Zolid Hybrid Tv Stick */ .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2868), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2875), .driver_info = EM2820_BOARD_UNKNOWN }, { USB_DEVICE(0xeb1a, 0x2885), /* MSI Digivox Trio */ .driver_info = EM2884_BOARD_TERRATEC_H5 }, { USB_DEVICE(0xeb1a, 0xe300), .driver_info = EM2861_BOARD_KWORLD_PVRTV_300U }, { USB_DEVICE(0xeb1a, 0xe303), .driver_info = EM2860_BOARD_KAIOMY_TVNPC_U2 }, { USB_DEVICE(0xeb1a, 0xe305), .driver_info = EM2880_BOARD_KWORLD_DVB_305U }, { USB_DEVICE(0xeb1a, 0xe310), .driver_info = EM2880_BOARD_MSI_DIGIVOX_AD }, { USB_DEVICE(0xeb1a, 0xa313), .driver_info = EM2882_BOARD_KWORLD_ATSC_315U }, { USB_DEVICE(0xeb1a, 0xa316), .driver_info = EM2883_BOARD_KWORLD_HYBRID_330U }, { USB_DEVICE(0xeb1a, 0xe320), .driver_info = EM2880_BOARD_MSI_DIGIVOX_AD_II }, { USB_DEVICE(0xeb1a, 0xe323), .driver_info = EM2882_BOARD_KWORLD_VS_DVBT }, { USB_DEVICE(0xeb1a, 0xe350), .driver_info = EM2870_BOARD_KWORLD_350U }, { USB_DEVICE(0xeb1a, 0xe355), .driver_info = EM2870_BOARD_KWORLD_355U }, { USB_DEVICE(0xeb1a, 0x2801), .driver_info = EM2800_BOARD_GRABBEEX_USB2800 }, { USB_DEVICE(0xeb1a, 0xe357), .driver_info = EM2870_BOARD_KWORLD_355U }, { USB_DEVICE(0xeb1a, 0xe359), .driver_info = EM2870_BOARD_KWORLD_355U }, { USB_DEVICE(0x1b80, 0xe302), /* Kaiser Baas Video to DVD maker */ .driver_info = EM2820_BOARD_PINNACLE_DVC_90 }, { USB_DEVICE(0x1b80, 0xe304), /* Kworld DVD Maker 2 */ .driver_info = EM2820_BOARD_PINNACLE_DVC_90 }, { USB_DEVICE(0x0ccd, 0x0036), .driver_info = EM2820_BOARD_TERRATEC_CINERGY_250 }, { USB_DEVICE(0x0ccd, 0x004c), .driver_info = EM2880_BOARD_TERRATEC_HYBRID_XS_FR }, { USB_DEVICE(0x0ccd, 0x004f), .driver_info = EM2860_BOARD_TERRATEC_HYBRID_XS }, { USB_DEVICE(0x0ccd, 0x005e), .driver_info = EM2882_BOARD_TERRATEC_HYBRID_XS }, { USB_DEVICE(0x0ccd, 0x0042), .driver_info = EM2882_BOARD_TERRATEC_HYBRID_XS }, { USB_DEVICE(0x0ccd, 0x0043), .driver_info = EM2870_BOARD_TERRATEC_XS_MT2060 }, { USB_DEVICE(0x0ccd, 0x008e), /* Cinergy HTC USB XS Rev. 1 */ .driver_info = EM2884_BOARD_TERRATEC_HTC_USB_XS }, { USB_DEVICE(0x0ccd, 0x00ac), /* Cinergy HTC USB XS Rev. 2 */ .driver_info = EM2884_BOARD_TERRATEC_HTC_USB_XS }, { USB_DEVICE(0x0ccd, 0x10a2), /* H5 Rev. 1 */ .driver_info = EM2884_BOARD_TERRATEC_H5 }, { USB_DEVICE(0x0ccd, 0x10ad), /* H5 Rev. 2 */ .driver_info = EM2884_BOARD_TERRATEC_H5 }, { USB_DEVICE(0x0ccd, 0x10b6), /* H5 Rev. 3 */ .driver_info = EM2884_BOARD_TERRATEC_H5 }, { USB_DEVICE(0x0ccd, 0x10b2), /* H6 */ .driver_info = EM2884_BOARD_TERRATEC_H6 }, { USB_DEVICE(0x0ccd, 0x0084), .driver_info = EM2860_BOARD_TERRATEC_AV350 }, { USB_DEVICE(0x0ccd, 0x0096), .driver_info = EM2860_BOARD_TERRATEC_GRABBY }, { USB_DEVICE(0x0ccd, 0x10AF), .driver_info = EM2860_BOARD_TERRATEC_GRABBY }, { USB_DEVICE(0x0ccd, 0x00b2), .driver_info = EM2884_BOARD_CINERGY_HTC_STICK }, { USB_DEVICE(0x0fd9, 0x0018), .driver_info = EM2884_BOARD_ELGATO_EYETV_HYBRID_2008 }, { USB_DEVICE(0x0fd9, 0x0033), .driver_info = EM2860_BOARD_ELGATO_VIDEO_CAPTURE }, { USB_DEVICE(0x185b, 0x2870), .driver_info = EM2870_BOARD_COMPRO_VIDEOMATE }, { USB_DEVICE(0x185b, 0x2041), .driver_info = EM2820_BOARD_COMPRO_VIDEOMATE_FORYOU }, { USB_DEVICE(0x2040, 0x4200), .driver_info = EM2820_BOARD_HAUPPAUGE_WINTV_USB_2 }, { USB_DEVICE(0x2040, 0x4201), .driver_info = EM2820_BOARD_HAUPPAUGE_WINTV_USB_2 }, { USB_DEVICE(0x2040, 0x6500), .driver_info = EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900 }, { USB_DEVICE(0x2040, 0x6502), .driver_info = EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900_R2 }, { USB_DEVICE(0x2040, 0x6513), /* HCW HVR-980 */ .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950 }, { USB_DEVICE(0x2040, 0x6517), /* HP HVR-950 */ .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950 }, { USB_DEVICE(0x2040, 0x651b), /* RP HVR-950 */ .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950 }, { USB_DEVICE(0x2040, 0x651f), .driver_info = EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850 }, { USB_DEVICE(0x2040, 0x0265), .driver_info = EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_DVB }, { USB_DEVICE(0x2040, 0x8265), .driver_info = EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_DVB }, { USB_DEVICE(0x2040, 0x026d), .driver_info = EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_01595 }, { USB_DEVICE(0x2040, 0x826d), .driver_info = EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_01595 }, { USB_DEVICE(0x2040, 0x846d), .driver_info = EM2874_BOARD_HAUPPAUGE_USB_QUADHD }, { USB_DEVICE(0x0438, 0xb002), .driver_info = EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600 }, { USB_DEVICE(0x2001, 0xf112), .driver_info = EM2820_BOARD_DLINK_USB_TV }, { USB_DEVICE(0x2304, 0x0207), .driver_info = EM2820_BOARD_PINNACLE_DVC_90 }, { USB_DEVICE(0x2304, 0x0208), .driver_info = EM2820_BOARD_PINNACLE_USB_2 }, { USB_DEVICE(0x2304, 0x021a), .driver_info = EM2820_BOARD_PINNACLE_DVC_90 }, { USB_DEVICE(0x2304, 0x0226), .driver_info = EM2882_BOARD_PINNACLE_HYBRID_PRO_330E }, { USB_DEVICE(0x2304, 0x0227), .driver_info = EM2880_BOARD_PINNACLE_PCTV_HD_PRO }, { USB_DEVICE(0x2304, 0x023f), .driver_info = EM2874_BOARD_PCTV_HD_MINI_80E }, { USB_DEVICE(0x0413, 0x6023), .driver_info = EM2800_BOARD_LEADTEK_WINFAST_USBII }, { USB_DEVICE(0x093b, 0xa003), .driver_info = EM2820_BOARD_PINNACLE_DVC_90 }, { USB_DEVICE(0x093b, 0xa005), .driver_info = EM2861_BOARD_PLEXTOR_PX_TV100U }, { USB_DEVICE(0x04bb, 0x0515), .driver_info = EM2820_BOARD_IODATA_GVMVP_SZ }, { USB_DEVICE(0xeb1a, 0x50a6), .driver_info = EM2860_BOARD_GADMEI_UTV330 }, { USB_DEVICE(0x1b80, 0xa340), .driver_info = EM2870_BOARD_KWORLD_A340 }, { USB_DEVICE(0x1b80, 0xe346), .driver_info = EM2874_BOARD_KWORLD_UB435Q_V2 }, { USB_DEVICE(0x1b80, 0xe34c), .driver_info = EM2874_BOARD_KWORLD_UB435Q_V3 }, { USB_DEVICE(0x2013, 0x024f), .driver_info = EM28174_BOARD_PCTV_290E }, { USB_DEVICE(0x2013, 0x024c), .driver_info = EM28174_BOARD_PCTV_460E }, { USB_DEVICE(0x2040, 0x1605), .driver_info = EM2884_BOARD_HAUPPAUGE_WINTV_HVR_930C }, { USB_DEVICE(0x1b80, 0xe755), .driver_info = EM2884_BOARD_C3TECH_DIGITAL_DUO }, { USB_DEVICE(0xeb1a, 0x5006), .driver_info = EM2860_BOARD_HT_VIDBOX_NW03 }, { USB_DEVICE(0x1b80, 0xe309), /* Sveon STV40 */ .driver_info = EM2860_BOARD_EASYCAP }, { USB_DEVICE(0x1b80, 0xe425), .driver_info = EM2874_BOARD_MAXMEDIA_UB425_TC }, { USB_DEVICE(0x1f4d, 0x1abe), .driver_info = EM2860_BOARD_MYGICA_IGRABBER }, { USB_DEVICE(0x2304, 0x0242), .driver_info = EM2884_BOARD_PCTV_510E }, { USB_DEVICE(0x2013, 0x0251), .driver_info = EM2884_BOARD_PCTV_520E }, { USB_DEVICE(0x1b80, 0xe1cc), .driver_info = EM2874_BOARD_DELOCK_61959 }, { USB_DEVICE(0x1ae7, 0x9003), .driver_info = EM2765_BOARD_SPEEDLINK_VAD_LAPLACE }, { USB_DEVICE(0x1ae7, 0x9004), .driver_info = EM2765_BOARD_SPEEDLINK_VAD_LAPLACE }, { USB_DEVICE(0x2013, 0x0258), .driver_info = EM28178_BOARD_PCTV_461E }, { USB_DEVICE(0x2013, 0x8258), /* Bulk transport 461e */ .driver_info = EM28178_BOARD_PCTV_461E }, { USB_DEVICE(0x2013, 0x0461), .driver_info = EM28178_BOARD_PCTV_461E_V2 }, { USB_DEVICE(0x2013, 0x8461), /* Bulk transport 461e v2 */ .driver_info = EM28178_BOARD_PCTV_461E_V2 }, { USB_DEVICE(0x2013, 0x0259), .driver_info = EM28178_BOARD_PCTV_461E_V2 }, { USB_DEVICE(0x2013, 0x025f), .driver_info = EM28178_BOARD_PCTV_292E }, { USB_DEVICE(0x2013, 0x0264), /* Hauppauge WinTV-soloHD 292e SE */ .driver_info = EM28178_BOARD_PCTV_292E }, { USB_DEVICE(0x2040, 0x0264), /* Hauppauge WinTV-soloHD Isoc */ .driver_info = EM28178_BOARD_PCTV_292E }, { USB_DEVICE(0x2040, 0x8264), /* Hauppauge OEM Generic WinTV-soloHD Bulk */ .driver_info = EM28178_BOARD_PCTV_292E }, { USB_DEVICE(0x2040, 0x8268), /* Hauppauge Retail WinTV-soloHD Bulk */ .driver_info = EM28178_BOARD_PCTV_292E }, { USB_DEVICE(0x0413, 0x6f07), .driver_info = EM2861_BOARD_LEADTEK_VC100 }, { USB_DEVICE(0xeb1a, 0x8179), .driver_info = EM28178_BOARD_TERRATEC_T2_STICK_HD }, { USB_DEVICE(0x3275, 0x0085), .driver_info = EM28178_BOARD_PLEX_PX_BCUD }, { USB_DEVICE(0xeb1a, 0x5051), /* Ion Video 2 PC MKII / Startech svid2usb23 / Raygo R12-41373 */ .driver_info = EM2860_BOARD_TVP5150_REFERENCE_DESIGN }, { USB_DEVICE(0x1b80, 0xe349), /* Magix USB Videowandler-2 */ .driver_info = EM2861_BOARD_MAGIX_VIDEOWANDLER2 }, { }, }; MODULE_DEVICE_TABLE(usb, em28xx_id_table); /* * EEPROM hash table for devices with generic USB IDs */ static const struct em28xx_hash_table em28xx_eeprom_hash[] = { /* P/N: SA 60002070465 Tuner: TVF7533-MF */ {0x6ce05a8f, EM2820_BOARD_PROLINK_PLAYTV_USB2, TUNER_YMEC_TVF_5533MF}, {0x72cc5a8b, EM2820_BOARD_PROLINK_PLAYTV_BOX4_USB2, TUNER_YMEC_TVF_5533MF}, {0x966a0441, EM2880_BOARD_KWORLD_DVB_310U, TUNER_XC2028}, {0x166a0441, EM2880_BOARD_EMPIRE_DUAL_TV, TUNER_XC2028}, {0xcee44a99, EM2882_BOARD_EVGA_INDTUBE, TUNER_XC2028}, {0xb8846b20, EM2881_BOARD_PINNACLE_HYBRID_PRO, TUNER_XC2028}, {0x63f653bd, EM2870_BOARD_REDDO_DVB_C_USB_BOX, TUNER_ABSENT}, {0x4e913442, EM2882_BOARD_DIKOM_DK300, TUNER_XC2028}, {0x85dd871e, EM2882_BOARD_ZOLID_HYBRID_TV_STICK, TUNER_XC2028}, {0x8f597549, EM2860_BOARD_MYGICA_UTV3, TUNER_TENA_TNF_931D_DFDR1}, }; /* I2C devicelist hash table for devices with generic USB IDs */ static const struct em28xx_hash_table em28xx_i2c_hash[] = { {0xb06a32c3, EM2800_BOARD_TERRATEC_CINERGY_200, TUNER_LG_PAL_NEW_TAPC}, {0xf51200e3, EM2800_BOARD_VGEAR_POCKETTV, TUNER_LG_PAL_NEW_TAPC}, {0x1ba50080, EM2860_BOARD_SAA711X_REFERENCE_DESIGN, TUNER_ABSENT}, {0x77800080, EM2860_BOARD_TVP5150_REFERENCE_DESIGN, TUNER_ABSENT}, {0xc51200e3, EM2820_BOARD_GADMEI_TVR200, TUNER_LG_PAL_NEW_TAPC}, {0x4ba50080, EM2861_BOARD_GADMEI_UTV330PLUS, TUNER_TNF_5335MF}, {0x6b800080, EM2874_BOARD_LEADERSHIP_ISDBT, TUNER_ABSENT}, {0x27e10080, EM2882_BOARD_ZOLID_HYBRID_TV_STICK, TUNER_XC2028}, {0x840d0484, EM2860_BOARD_MYGICA_UTV3, TUNER_TENA_TNF_931D_DFDR1}, }; /* NOTE: introduce a separate hash table for devices with 16 bit eeproms */ int em28xx_tuner_callback(void *ptr, int component, int command, int arg) { struct em28xx_i2c_bus *i2c_bus = ptr; struct em28xx *dev = i2c_bus->dev; int rc = 0; if (dev->tuner_type != TUNER_XC2028 && dev->tuner_type != TUNER_XC5000) return 0; if (command != XC2028_TUNER_RESET && command != XC5000_TUNER_RESET) return 0; rc = em28xx_gpio_set(dev, dev->board.tuner_gpio); return rc; } EXPORT_SYMBOL_GPL(em28xx_tuner_callback); static inline void em28xx_set_xclk_i2c_speed(struct em28xx *dev) { const struct em28xx_board *board = &em28xx_boards[dev->model]; u8 xclk = board->xclk, i2c_speed = board->i2c_speed; /* * Those are the default values for the majority of boards * Use those values if not specified otherwise at boards entry */ if (!xclk) xclk = EM28XX_XCLK_IR_RC5_MODE | EM28XX_XCLK_FREQUENCY_12MHZ; em28xx_write_reg(dev, EM28XX_R0F_XCLK, xclk); if (!i2c_speed) i2c_speed = EM28XX_I2C_CLK_WAIT_ENABLE | EM28XX_I2C_FREQ_100_KHZ; dev->i2c_speed = i2c_speed & 0x03; if (!dev->board.is_em2800) em28xx_write_reg(dev, EM28XX_R06_I2C_CLK, i2c_speed); msleep(50); } static inline void em28xx_set_model(struct em28xx *dev) { dev->board = em28xx_boards[dev->model]; dev->has_msp34xx = dev->board.has_msp34xx; dev->is_webcam = dev->board.is_webcam; em28xx_set_xclk_i2c_speed(dev); /* Should be initialized early, for I2C to work */ dev->def_i2c_bus = dev->board.def_i2c_bus; } /* * Wait until AC97_RESET reports the expected value reliably before proceeding. * We also check that two unrelated registers accesses don't return the same * value to avoid premature return. * This procedure helps ensuring AC97 register accesses are reliable. */ static int em28xx_wait_until_ac97_features_equals(struct em28xx *dev, int expected_feat) { unsigned long timeout = jiffies + msecs_to_jiffies(2000); int feat, powerdown; while (time_is_after_jiffies(timeout)) { feat = em28xx_read_ac97(dev, AC97_RESET); if (feat < 0) return feat; powerdown = em28xx_read_ac97(dev, AC97_POWERDOWN); if (powerdown < 0) return powerdown; if (feat == expected_feat && feat != powerdown) return 0; msleep(50); } dev_warn(&dev->intf->dev, "AC97 registers access is not reliable !\n"); return -ETIMEDOUT; } /* * Since em28xx_pre_card_setup() requires a proper dev->model, * this won't work for boards with generic PCI IDs */ static void em28xx_pre_card_setup(struct em28xx *dev) { /* * Set the initial XCLK and I2C clock values based on the board * definition */ em28xx_set_xclk_i2c_speed(dev); /* request some modules */ switch (dev->model) { case EM2861_BOARD_PLEXTOR_PX_TV100U: /* Sets the msp34xx I2S speed */ dev->i2s_speed = 2048000; break; case EM2861_BOARD_KWORLD_PVRTV_300U: case EM2880_BOARD_KWORLD_DVB_305U: em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0x6d); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0x7d); usleep_range(10000, 11000); break; case EM2870_BOARD_COMPRO_VIDEOMATE: /* * TODO: someone can do some cleanup here... * not everything's needed */ em28xx_write_reg(dev, EM2880_R04_GPO, 0x00); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2880_R04_GPO, 0x01); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfd); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfc); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xdc); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfc); msleep(70); break; case EM2870_BOARD_TERRATEC_XS_MT2060: /* * this device needs some gpio writes to get the DVB-T * demod work */ em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfe); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xde); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfe); msleep(70); break; case EM2870_BOARD_PINNACLE_PCTV_DVB: /* * this device needs some gpio writes to get the * DVB-T demod work */ em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfe); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xde); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfe); msleep(70); break; case EM2820_BOARD_GADMEI_UTV310: case EM2820_BOARD_MSI_VOX_USB_2: /* enables audio for that devices */ em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfd); break; case EM2882_BOARD_KWORLD_ATSC_315U: em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xff); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfe); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2880_R04_GPO, 0x00); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2880_R04_GPO, 0x08); usleep_range(10000, 11000); break; case EM2860_BOARD_KAIOMY_TVNPC_U2: em28xx_write_regs(dev, EM28XX_R0F_XCLK, "\x07", 1); em28xx_write_regs(dev, EM28XX_R06_I2C_CLK, "\x40", 1); em28xx_write_regs(dev, 0x0d, "\x42", 1); em28xx_write_regs(dev, 0x08, "\xfd", 1); usleep_range(10000, 11000); em28xx_write_regs(dev, 0x08, "\xff", 1); usleep_range(10000, 11000); em28xx_write_regs(dev, 0x08, "\x7f", 1); usleep_range(10000, 11000); em28xx_write_regs(dev, 0x08, "\x6b", 1); break; case EM2860_BOARD_EASYCAP: em28xx_write_regs(dev, 0x08, "\xf8", 1); break; case EM2820_BOARD_IODATA_GVMVP_SZ: em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xff); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xf7); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfe); msleep(70); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfd); msleep(70); break; case EM2860_BOARD_TERRATEC_GRABBY: /* * HACK?: Ensure AC97 register reading is reliable before * proceeding. In practice, this will wait about 1.6 seconds. */ em28xx_wait_until_ac97_features_equals(dev, 0x6a90); break; } em28xx_gpio_set(dev, dev->board.tuner_gpio); em28xx_set_mode(dev, EM28XX_ANALOG_MODE); /* Unlock device */ em28xx_set_mode(dev, EM28XX_SUSPEND); } static int em28xx_hint_board(struct em28xx *dev) { int i; if (dev->is_webcam) { if (dev->em28xx_sensor == EM28XX_MT9V011) { dev->model = EM2820_BOARD_SILVERCREST_WEBCAM; } else if (dev->em28xx_sensor == EM28XX_MT9M001 || dev->em28xx_sensor == EM28XX_MT9M111) { dev->model = EM2750_BOARD_UNKNOWN; } /* FIXME: IMPROVE ! */ return 0; } /* * HINT method: EEPROM * * This method works only for boards with eeprom. * Uses a hash of all eeprom bytes. The hash should be * unique for a vendor/tuner pair. * There are a high chance that tuners for different * video standards produce different hashes. */ for (i = 0; i < ARRAY_SIZE(em28xx_eeprom_hash); i++) { if (dev->hash == em28xx_eeprom_hash[i].hash) { dev->model = em28xx_eeprom_hash[i].model; dev->tuner_type = em28xx_eeprom_hash[i].tuner; dev_err(&dev->intf->dev, "Your board has no unique USB ID.\n" "A hint were successfully done, based on eeprom hash.\n" "This method is not 100%% failproof.\n" "If the board were misdetected, please email this log to:\n" "\tV4L Mailing List <linux-media@vger.kernel.org>\n" "Board detected as %s\n", em28xx_boards[dev->model].name); return 0; } } /* * HINT method: I2C attached devices * * This method works for all boards. * Uses a hash of i2c scanned devices. * Devices with the same i2c attached chips will * be considered equal. * This method is less precise than the eeprom one. */ /* user did not request i2c scanning => do it now */ if (!dev->i2c_hash) em28xx_do_i2c_scan(dev, dev->def_i2c_bus); for (i = 0; i < ARRAY_SIZE(em28xx_i2c_hash); i++) { if (dev->i2c_hash == em28xx_i2c_hash[i].hash) { dev->model = em28xx_i2c_hash[i].model; dev->tuner_type = em28xx_i2c_hash[i].tuner; dev_err(&dev->intf->dev, "Your board has no unique USB ID.\n" "A hint were successfully done, based on i2c devicelist hash.\n" "This method is not 100%% failproof.\n" "If the board were misdetected, please email this log to:\n" "\tV4L Mailing List <linux-media@vger.kernel.org>\n" "Board detected as %s\n", em28xx_boards[dev->model].name); return 0; } } dev_err(&dev->intf->dev, "Your board has no unique USB ID and thus need a hint to be detected.\n" "You may try to use card=<n> insmod option to workaround that.\n" "Please send an email with this log to:\n" "\tV4L Mailing List <linux-media@vger.kernel.org>\n" "Board eeprom hash is 0x%08lx\n" "Board i2c devicelist hash is 0x%08lx\n", dev->hash, dev->i2c_hash); dev_err(&dev->intf->dev, "Here is a list of valid choices for the card=<n> insmod option:\n"); for (i = 0; i < em28xx_bcount; i++) { dev_err(&dev->intf->dev, " card=%d -> %s\n", i, em28xx_boards[i].name); } return -1; } static void em28xx_card_setup(struct em28xx *dev) { int i, j, idx; bool duplicate_entry; /* * If the device can be a webcam, seek for a sensor. * If sensor is not found, then it isn't a webcam. */ if (dev->is_webcam) { em28xx_detect_sensor(dev); if (dev->em28xx_sensor == EM28XX_NOSENSOR) /* NOTE: error/unknown sensor/no sensor */ dev->is_webcam = 0; } switch (dev->model) { case EM2750_BOARD_UNKNOWN: case EM2820_BOARD_UNKNOWN: case EM2800_BOARD_UNKNOWN: /* * The K-WORLD DVB-T 310U is detected as an MSI Digivox AD. * * This occurs because they share identical USB vendor and * product IDs. * * What we do here is look up the EEPROM hash of the K-WORLD * and if it is found then we decide that we do not have * a DIGIVOX and reset the device to the K-WORLD instead. * * This solution is only valid if they do not share eeprom * hash identities which has not been determined as yet. */ if (em28xx_hint_board(dev) < 0) { dev_err(&dev->intf->dev, "Board not discovered\n"); } else { em28xx_set_model(dev); em28xx_pre_card_setup(dev); } break; default: em28xx_set_model(dev); } dev_info(&dev->intf->dev, "Identified as %s (card=%d)\n", dev->board.name, dev->model); dev->tuner_type = em28xx_boards[dev->model].tuner_type; /* request some modules */ switch (dev->model) { case EM2820_BOARD_HAUPPAUGE_WINTV_USB_2: case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900: case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900_R2: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2884_BOARD_HAUPPAUGE_WINTV_HVR_930C: case EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_DVB: case EM28174_BOARD_HAUPPAUGE_WINTV_DUALHD_01595: { struct tveeprom tv; if (!dev->eedata) break; #if defined(CONFIG_MODULES) && defined(MODULE) request_module("tveeprom"); #endif /* Call first TVeeprom */ tveeprom_hauppauge_analog(&tv, dev->eedata); dev->tuner_type = tv.tuner_type; if (tv.audio_processor == TVEEPROM_AUDPROC_MSP) { dev->i2s_speed = 2048000; dev->has_msp34xx = 1; } break; } case EM2882_BOARD_KWORLD_ATSC_315U: em28xx_write_reg(dev, 0x0d, 0x42); usleep_range(10000, 11000); em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xfd); usleep_range(10000, 11000); break; case EM2820_BOARD_KWORLD_PVRTV2800RF: /* GPIO enables sound on KWORLD PVR TV 2800RF */ em28xx_write_reg(dev, EM2820_R08_GPIO_CTRL, 0xf9); break; case EM2820_BOARD_UNKNOWN: case EM2800_BOARD_UNKNOWN: /* * The K-WORLD DVB-T 310U is detected as an MSI Digivox AD. * * This occurs because they share identical USB vendor and * product IDs. * * What we do here is look up the EEPROM hash of the K-WORLD * and if it is found then we decide that we do not have * a DIGIVOX and reset the device to the K-WORLD instead. * * This solution is only valid if they do not share eeprom * hash identities which has not been determined as yet. */ case EM2880_BOARD_MSI_DIGIVOX_AD: if (!em28xx_hint_board(dev)) em28xx_set_model(dev); /* * In cases where we had to use a board hint, the call to * em28xx_set_mode() in em28xx_pre_card_setup() was a no-op, * so make the call now so the analog GPIOs are set properly * before probing the i2c bus. */ em28xx_gpio_set(dev, dev->board.tuner_gpio); em28xx_set_mode(dev, EM28XX_ANALOG_MODE); break; /* * The Dikom DK300 is detected as an Kworld VS-DVB-T 323UR. * * This occurs because they share identical USB vendor and * product IDs. * * What we do here is look up the EEPROM hash of the Dikom * and if it is found then we decide that we do not have * a Kworld and reset the device to the Dikom instead. * * This solution is only valid if they do not share eeprom * hash identities which has not been determined as yet. */ case EM2882_BOARD_KWORLD_VS_DVBT: if (!em28xx_hint_board(dev)) em28xx_set_model(dev); /* * In cases where we had to use a board hint, the call to * em28xx_set_mode() in em28xx_pre_card_setup() was a no-op, * so make the call now so the analog GPIOs are set properly * before probing the i2c bus. */ em28xx_gpio_set(dev, dev->board.tuner_gpio); em28xx_set_mode(dev, EM28XX_ANALOG_MODE); break; } if (dev->board.valid == EM28XX_BOARD_NOT_VALIDATED) { dev_err(&dev->intf->dev, "\n\n" "The support for this board weren't valid yet.\n" "Please send a report of having this working\n" "not to V4L mailing list (and/or to other addresses)\n\n"); } /* Free eeprom data memory */ kfree(dev->eedata); dev->eedata = NULL; /* Allow override tuner type by a module parameter */ if (tuner >= 0) dev->tuner_type = tuner; /* * Dynamically generate a list of valid audio inputs for this * specific board, mapping them via enum em28xx_amux. */ idx = 0; for (i = 0; i < MAX_EM28XX_INPUT; i++) { if (!INPUT(i)->type) continue; /* Skip already mapped audio inputs */ duplicate_entry = false; for (j = 0; j < idx; j++) { if (INPUT(i)->amux == dev->amux_map[j]) { duplicate_entry = true; break; } } if (duplicate_entry) continue; dev->amux_map[idx++] = INPUT(i)->amux; } for (; idx < MAX_EM28XX_INPUT; idx++) dev->amux_map[idx] = EM28XX_AMUX_UNUSED; } void em28xx_setup_xc3028(struct em28xx *dev, struct xc2028_ctrl *ctl) { memset(ctl, 0, sizeof(*ctl)); ctl->fname = XC2028_DEFAULT_FIRMWARE; ctl->max_len = 64; ctl->mts = em28xx_boards[dev->model].mts_firmware; switch (dev->model) { case EM2880_BOARD_EMPIRE_DUAL_TV: case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900: case EM2882_BOARD_TERRATEC_HYBRID_XS: case EM2880_BOARD_TERRATEC_HYBRID_XS: case EM2880_BOARD_TERRATEC_HYBRID_XS_FR: case EM2881_BOARD_PINNACLE_HYBRID_PRO: case EM2882_BOARD_ZOLID_HYBRID_TV_STICK: ctl->demod = XC3028_FE_ZARLINK456; break; case EM2880_BOARD_HAUPPAUGE_WINTV_HVR_900_R2: case EM2882_BOARD_PINNACLE_HYBRID_PRO_330E: ctl->demod = XC3028_FE_DEFAULT; break; case EM2880_BOARD_AMD_ATI_TV_WONDER_HD_600: ctl->demod = XC3028_FE_DEFAULT; ctl->fname = XC3028L_DEFAULT_FIRMWARE; break; case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_850: case EM2883_BOARD_HAUPPAUGE_WINTV_HVR_950: case EM2880_BOARD_PINNACLE_PCTV_HD_PRO: /* FIXME: Better to specify the needed IF */ ctl->demod = XC3028_FE_DEFAULT; break; case EM2883_BOARD_KWORLD_HYBRID_330U: case EM2882_BOARD_DIKOM_DK300: case EM2882_BOARD_KWORLD_VS_DVBT: ctl->demod = XC3028_FE_CHINA; ctl->fname = XC2028_DEFAULT_FIRMWARE; break; case EM2882_BOARD_EVGA_INDTUBE: ctl->demod = XC3028_FE_CHINA; ctl->fname = XC3028L_DEFAULT_FIRMWARE; break; default: ctl->demod = XC3028_FE_OREN538; } } EXPORT_SYMBOL_GPL(em28xx_setup_xc3028); static void request_module_async(struct work_struct *work) { struct em28xx *dev = container_of(work, struct em28xx, request_module_wk); /* * The em28xx extensions can be modules or builtin. If the * modules are already loaded or are built in, those extensions * can be initialised right now. Otherwise, the module init * code will do it. */ /* * Devices with an audio-only intf also have a V4L/DVB/RC * intf. Don't register extensions twice on those devices. */ if (dev->is_audio_only) { #if defined(CONFIG_MODULES) && defined(MODULE) request_module("em28xx-alsa"); #endif return; } em28xx_init_extension(dev); #if defined(CONFIG_MODULES) && defined(MODULE) if (dev->has_video) request_module("em28xx-v4l"); if (dev->usb_audio_type == EM28XX_USB_AUDIO_CLASS) request_module("snd-usb-audio"); else if (dev->usb_audio_type == EM28XX_USB_AUDIO_VENDOR) request_module("em28xx-alsa"); if (dev->board.has_dvb) request_module("em28xx-dvb"); if (dev->board.buttons || ((dev->board.ir_codes || dev->board.has_ir_i2c) && !disable_ir)) request_module("em28xx-rc"); #endif /* CONFIG_MODULES */ } static void request_modules(struct em28xx *dev) { INIT_WORK(&dev->request_module_wk, request_module_async); schedule_work(&dev->request_module_wk); } static void flush_request_modules(struct em28xx *dev) { flush_work(&dev->request_module_wk); } static int em28xx_media_device_init(struct em28xx *dev, struct usb_device *udev) { #ifdef CONFIG_MEDIA_CONTROLLER struct media_device *mdev; mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return -ENOMEM; if (udev->product) media_device_usb_init(mdev, udev, udev->product); else if (udev->manufacturer) media_device_usb_init(mdev, udev, udev->manufacturer); else media_device_usb_init(mdev, udev, dev_name(&dev->intf->dev)); dev->media_dev = mdev; #endif return 0; } static void em28xx_unregister_media_device(struct em28xx *dev) { #ifdef CONFIG_MEDIA_CONTROLLER if (dev->media_dev) { media_device_unregister(dev->media_dev); media_device_cleanup(dev->media_dev); kfree(dev->media_dev); dev->media_dev = NULL; } #endif } /* * em28xx_release_resources() * unregisters the v4l2,i2c and usb devices * called when the device gets disconnected or at module unload */ static void em28xx_release_resources(struct em28xx *dev) { struct usb_device *udev = interface_to_usbdev(dev->intf); /*FIXME: I2C IR should be disconnected */ mutex_lock(&dev->lock); em28xx_unregister_media_device(dev); if (dev->def_i2c_bus) em28xx_i2c_unregister(dev, 1); em28xx_i2c_unregister(dev, 0); if (dev->ts == PRIMARY_TS) usb_put_dev(udev); /* Mark device as unused */ clear_bit(dev->devno, em28xx_devused); mutex_unlock(&dev->lock); }; /** * em28xx_free_device() - Free em28xx device * * @ref: struct kref for em28xx device * * This is called when all extensions and em28xx core unregisters a device */ void em28xx_free_device(struct kref *ref) { struct em28xx *dev = kref_to_dev(ref); dev_info(&dev->intf->dev, "Freeing device\n"); if (!dev->disconnected) em28xx_release_resources(dev); if (dev->ts == PRIMARY_TS) kfree(dev->alt_max_pkt_size_isoc); kfree(dev); } EXPORT_SYMBOL_GPL(em28xx_free_device); /* * em28xx_init_dev() * allocates and inits the device structs, registers i2c bus and v4l device */ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, struct usb_interface *intf, int minor) { int retval; const char *chip_name = NULL; dev->intf = intf; mutex_init(&dev->ctrl_urb_lock); spin_lock_init(&dev->slock); dev->em28xx_write_regs = em28xx_write_regs; dev->em28xx_read_reg = em28xx_read_reg; dev->em28xx_read_reg_req_len = em28xx_read_reg_req_len; dev->em28xx_write_regs_req = em28xx_write_regs_req; dev->em28xx_read_reg_req = em28xx_read_reg_req; dev->board.is_em2800 = em28xx_boards[dev->model].is_em2800; em28xx_set_model(dev); dev->wait_after_write = 5; /* Based on the Chip ID, set the device configuration */ retval = em28xx_read_reg(dev, EM28XX_R0A_CHIPID); if (retval > 0) { dev->chip_id = retval; switch (dev->chip_id) { case CHIP_ID_EM2800: chip_name = "em2800"; break; case CHIP_ID_EM2710: chip_name = "em2710"; break; case CHIP_ID_EM2750: chip_name = "em2750"; break; case CHIP_ID_EM2765: chip_name = "em2765"; dev->wait_after_write = 0; dev->is_em25xx = 1; dev->eeprom_addrwidth_16bit = 1; break; case CHIP_ID_EM2820: chip_name = "em2710/2820"; if (le16_to_cpu(udev->descriptor.idVendor) == 0xeb1a) { __le16 idProd = udev->descriptor.idProduct; if (le16_to_cpu(idProd) == 0x2710) chip_name = "em2710"; else if (le16_to_cpu(idProd) == 0x2820) chip_name = "em2820"; } /* NOTE: the em2820 is used in webcams, too ! */ break; case CHIP_ID_EM2840: chip_name = "em2840"; break; case CHIP_ID_EM2860: chip_name = "em2860"; break; case CHIP_ID_EM2870: chip_name = "em2870"; dev->wait_after_write = 0; break; case CHIP_ID_EM2874: chip_name = "em2874"; dev->wait_after_write = 0; dev->eeprom_addrwidth_16bit = 1; break; case CHIP_ID_EM28174: chip_name = "em28174"; dev->wait_after_write = 0; dev->eeprom_addrwidth_16bit = 1; break; case CHIP_ID_EM28178: chip_name = "em28178"; dev->wait_after_write = 0; dev->eeprom_addrwidth_16bit = 1; break; case CHIP_ID_EM2883: chip_name = "em2882/3"; dev->wait_after_write = 0; break; case CHIP_ID_EM2884: chip_name = "em2884"; dev->wait_after_write = 0; dev->eeprom_addrwidth_16bit = 1; break; } } if (!chip_name) dev_info(&dev->intf->dev, "unknown em28xx chip ID (%d)\n", dev->chip_id); else dev_info(&dev->intf->dev, "chip ID is %s\n", chip_name); em28xx_media_device_init(dev, udev); if (dev->is_audio_only) { retval = em28xx_audio_setup(dev); if (retval) { retval = -ENODEV; goto err_deinit_media; } em28xx_init_extension(dev); return 0; } em28xx_pre_card_setup(dev); rt_mutex_init(&dev->i2c_bus_lock); /* register i2c bus 0 */ if (dev->board.is_em2800) retval = em28xx_i2c_register(dev, 0, EM28XX_I2C_ALGO_EM2800); else retval = em28xx_i2c_register(dev, 0, EM28XX_I2C_ALGO_EM28XX); if (retval < 0) { dev_err(&dev->intf->dev, "%s: em28xx_i2c_register bus 0 - error [%d]!\n", __func__, retval); goto err_deinit_media; } /* register i2c bus 1 */ if (dev->def_i2c_bus) { if (dev->is_em25xx) retval = em28xx_i2c_register(dev, 1, EM28XX_I2C_ALGO_EM25XX_BUS_B); else retval = em28xx_i2c_register(dev, 1, EM28XX_I2C_ALGO_EM28XX); if (retval < 0) { dev_err(&dev->intf->dev, "%s: em28xx_i2c_register bus 1 - error [%d]!\n", __func__, retval); goto err_unreg_i2c; } } /* Do board specific init and eeprom reading */ em28xx_card_setup(dev); return 0; err_unreg_i2c: em28xx_i2c_unregister(dev, 0); err_deinit_media: em28xx_unregister_media_device(dev); return retval; } static int em28xx_duplicate_dev(struct em28xx *dev) { int nr; struct em28xx *sec_dev = kmemdup(dev, sizeof(*sec_dev), GFP_KERNEL); if (!sec_dev) { dev->dev_next = NULL; return -ENOMEM; } /* Check to see next free device and mark as used */ do { nr = find_first_zero_bit(em28xx_devused, EM28XX_MAXBOARDS); if (nr >= EM28XX_MAXBOARDS) { /* No free device slots */ dev_warn(&dev->intf->dev, ": Supports only %i em28xx boards.\n", EM28XX_MAXBOARDS); kfree(sec_dev); dev->dev_next = NULL; return -ENOMEM; } } while (test_and_set_bit(nr, em28xx_devused)); sec_dev->devno = nr; snprintf(sec_dev->name, 28, "em28xx #%d", nr); sec_dev->dev_next = NULL; dev->dev_next = sec_dev; return 0; } /* high bandwidth multiplier, as encoded in highspeed endpoint descriptors */ #define hb_mult(wMaxPacketSize) (1 + (((wMaxPacketSize) >> 11) & 0x03)) static void em28xx_check_usb_descriptor(struct em28xx *dev, struct usb_device *udev, struct usb_interface *intf, int alt, int ep, bool *has_vendor_audio, bool *has_video, bool *has_dvb) { const struct usb_endpoint_descriptor *e; int sizedescr, size; /* * NOTE: * * Old logic with support for isoc transfers only was: * 0x82 isoc => analog * 0x83 isoc => audio * 0x84 isoc => digital * * New logic with support for bulk transfers * 0x82 isoc => analog * 0x82 bulk => analog * 0x83 isoc* => audio * 0x84 isoc => digital * 0x84 bulk => analog or digital** * 0x85 isoc => digital TS2 * 0x85 bulk => digital TS2 * (*: audio should always be isoc) * (**: analog, if ep 0x82 is isoc, otherwise digital) * * The new logic preserves backwards compatibility and * reflects the endpoint configurations we have seen * so far. But there might be devices for which this * logic is not sufficient... */ e = &intf->altsetting[alt].endpoint[ep].desc; if (!usb_endpoint_dir_in(e)) return; sizedescr = le16_to_cpu(e->wMaxPacketSize); size = sizedescr & 0x7ff; if (udev->speed == USB_SPEED_HIGH) size = size * hb_mult(sizedescr); /* Only inspect input endpoints */ switch (e->bEndpointAddress) { case 0x82: *has_video = true; if (usb_endpoint_xfer_isoc(e)) { dev->analog_ep_isoc = e->bEndpointAddress; dev->alt_max_pkt_size_isoc[alt] = size; } else if (usb_endpoint_xfer_bulk(e)) { dev->analog_ep_bulk = e->bEndpointAddress; } return; case 0x83: if (usb_endpoint_xfer_isoc(e)) *has_vendor_audio = true; else dev_err(&intf->dev, "error: skipping audio endpoint 0x83, because it uses bulk transfers !\n"); return; case 0x84: if (*has_video && (usb_endpoint_xfer_bulk(e))) { dev->analog_ep_bulk = e->bEndpointAddress; } else { if (usb_endpoint_xfer_isoc(e)) { if (size > dev->dvb_max_pkt_size_isoc) { /* * 2) some manufacturers (e.g. Terratec) * disable endpoints by setting * wMaxPacketSize to 0 bytes for all * alt settings. So far, we've seen * this for DVB isoc endpoints only. */ *has_dvb = true; dev->dvb_ep_isoc = e->bEndpointAddress; dev->dvb_max_pkt_size_isoc = size; dev->dvb_alt_isoc = alt; } } else { *has_dvb = true; dev->dvb_ep_bulk = e->bEndpointAddress; } } return; case 0x85: if (usb_endpoint_xfer_isoc(e)) { if (size > dev->dvb_max_pkt_size_isoc_ts2) { dev->dvb_ep_isoc_ts2 = e->bEndpointAddress; dev->dvb_max_pkt_size_isoc_ts2 = size; dev->dvb_alt_isoc = alt; } } else { dev->dvb_ep_bulk_ts2 = e->bEndpointAddress; } return; } } /* * em28xx_usb_probe() * checks for supported devices */ static int em28xx_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev; struct em28xx *dev = NULL; int retval; bool has_vendor_audio = false, has_video = false, has_dvb = false; int i, nr, try_bulk; const int ifnum = intf->altsetting[0].desc.bInterfaceNumber; char *speed; udev = usb_get_dev(interface_to_usbdev(intf)); /* Check to see next free device and mark as used */ do { nr = find_first_zero_bit(em28xx_devused, EM28XX_MAXBOARDS); if (nr >= EM28XX_MAXBOARDS) { /* No free device slots */ dev_err(&intf->dev, "Driver supports up to %i em28xx boards.\n", EM28XX_MAXBOARDS); retval = -ENOMEM; goto err_no_slot; } } while (test_and_set_bit(nr, em28xx_devused)); /* Don't register audio interfaces */ if (intf->altsetting[0].desc.bInterfaceClass == USB_CLASS_AUDIO) { dev_info(&intf->dev, "audio device (%04x:%04x): interface %i, class %i\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), ifnum, intf->altsetting[0].desc.bInterfaceClass); retval = -ENODEV; goto err; } /* allocate memory for our device state and initialize it */ dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { retval = -ENOMEM; goto err; } /* compute alternate max packet sizes */ dev->alt_max_pkt_size_isoc = kcalloc(intf->num_altsetting, sizeof(dev->alt_max_pkt_size_isoc[0]), GFP_KERNEL); if (!dev->alt_max_pkt_size_isoc) { kfree(dev); retval = -ENOMEM; goto err; } /* Get endpoints */ for (i = 0; i < intf->num_altsetting; i++) { int ep; for (ep = 0; ep < intf->altsetting[i].desc.bNumEndpoints; ep++) em28xx_check_usb_descriptor(dev, udev, intf, i, ep, &has_vendor_audio, &has_video, &has_dvb); } if (!(has_vendor_audio || has_video || has_dvb)) { retval = -ENODEV; goto err_free; } switch (udev->speed) { case USB_SPEED_LOW: speed = "1.5"; break; case USB_SPEED_UNKNOWN: case USB_SPEED_FULL: speed = "12"; break; case USB_SPEED_HIGH: speed = "480"; break; default: speed = "unknown"; } dev_info(&intf->dev, "New device %s %s @ %s Mbps (%04x:%04x, interface %d, class %d)\n", udev->manufacturer ? udev->manufacturer : "", udev->product ? udev->product : "", speed, le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), ifnum, intf->altsetting->desc.bInterfaceNumber); /* * Make sure we have 480 Mbps of bandwidth, otherwise things like * video stream wouldn't likely work, since 12 Mbps is generally * not enough even for most Digital TV streams. */ if (udev->speed != USB_SPEED_HIGH && disable_usb_speed_check == 0) { dev_err(&intf->dev, "Device initialization failed.\n"); dev_err(&intf->dev, "Device must be connected to a high-speed USB 2.0 port.\n"); retval = -ENODEV; goto err_free; } kref_init(&dev->ref); dev->devno = nr; dev->model = id->driver_info; dev->alt = -1; dev->is_audio_only = has_vendor_audio && !(has_video || has_dvb); dev->has_video = has_video; dev->ifnum = ifnum; dev->ts = PRIMARY_TS; snprintf(dev->name, 28, "em28xx"); dev->dev_next = NULL; if (has_vendor_audio) { dev_info(&intf->dev, "Audio interface %i found (Vendor Class)\n", ifnum); dev->usb_audio_type = EM28XX_USB_AUDIO_VENDOR; } /* Checks if audio is provided by a USB Audio Class intf */ for (i = 0; i < udev->config->desc.bNumInterfaces; i++) { struct usb_interface *uif = udev->config->interface[i]; if (uif->altsetting[0].desc.bInterfaceClass == USB_CLASS_AUDIO) { if (has_vendor_audio) dev_err(&intf->dev, "em28xx: device seems to have vendor AND usb audio class interfaces !\n" "\t\tThe vendor interface will be ignored. Please contact the developers <linux-media@vger.kernel.org>\n"); dev->usb_audio_type = EM28XX_USB_AUDIO_CLASS; break; } } if (has_video) dev_info(&intf->dev, "Video interface %i found:%s%s\n", ifnum, dev->analog_ep_bulk ? " bulk" : "", dev->analog_ep_isoc ? " isoc" : ""); if (has_dvb) dev_info(&intf->dev, "DVB interface %i found:%s%s\n", ifnum, dev->dvb_ep_bulk ? " bulk" : "", dev->dvb_ep_isoc ? " isoc" : ""); dev->num_alt = intf->num_altsetting; if ((unsigned int)card[nr] < em28xx_bcount) dev->model = card[nr]; /* save our data pointer in this intf device */ usb_set_intfdata(intf, dev); /* allocate device struct and check if the device is a webcam */ mutex_init(&dev->lock); retval = em28xx_init_dev(dev, udev, intf, nr); if (retval) goto err_free; if (usb_xfer_mode < 0) { if (dev->is_webcam) try_bulk = 1; else try_bulk = 0; } else { try_bulk = usb_xfer_mode > 0; } /* Disable V4L2 if the device doesn't have a decoder or image sensor */ if (has_video && dev->board.decoder == EM28XX_NODECODER && dev->em28xx_sensor == EM28XX_NOSENSOR) { dev_err(&intf->dev, "Currently, V4L2 is not supported on this model\n"); has_video = false; dev->has_video = false; } if (dev->board.has_dual_ts && (dev->tuner_type != TUNER_ABSENT || INPUT(0)->type)) { /* * The logic with sets alternate is not ready for dual-tuners * which analog modes. */ dev_err(&intf->dev, "We currently don't support analog TV or stream capture on dual tuners.\n"); has_video = false; } /* Select USB transfer types to use */ if (has_video) { if (!dev->analog_ep_isoc || (try_bulk && dev->analog_ep_bulk)) dev->analog_xfer_bulk = 1; dev_info(&intf->dev, "analog set to %s mode.\n", dev->analog_xfer_bulk ? "bulk" : "isoc"); } if (has_dvb) { if (!dev->dvb_ep_isoc || (try_bulk && dev->dvb_ep_bulk)) dev->dvb_xfer_bulk = 1; dev_info(&intf->dev, "dvb set to %s mode.\n", dev->dvb_xfer_bulk ? "bulk" : "isoc"); } if (dev->board.has_dual_ts && em28xx_duplicate_dev(dev) == 0) { kref_init(&dev->dev_next->ref); dev->dev_next->ts = SECONDARY_TS; dev->dev_next->alt = -1; dev->dev_next->is_audio_only = has_vendor_audio && !(has_video || has_dvb); dev->dev_next->has_video = false; dev->dev_next->ifnum = ifnum; dev->dev_next->model = id->driver_info; mutex_init(&dev->dev_next->lock); retval = em28xx_init_dev(dev->dev_next, udev, intf, dev->dev_next->devno); if (retval) goto err_free; dev->dev_next->board.ir_codes = NULL; /* No IR for 2nd tuner */ dev->dev_next->board.has_ir_i2c = 0; /* No IR for 2nd tuner */ if (usb_xfer_mode < 0) { if (dev->dev_next->is_webcam) try_bulk = 1; else try_bulk = 0; } else { try_bulk = usb_xfer_mode > 0; } /* Select USB transfer types to use */ if (has_dvb) { if (!dev->dvb_ep_isoc_ts2 || (try_bulk && dev->dvb_ep_bulk_ts2)) dev->dev_next->dvb_xfer_bulk = 1; dev_info(&dev->intf->dev, "dvb ts2 set to %s mode.\n", dev->dev_next->dvb_xfer_bulk ? "bulk" : "isoc"); } dev->dev_next->dvb_ep_isoc = dev->dvb_ep_isoc_ts2; dev->dev_next->dvb_ep_bulk = dev->dvb_ep_bulk_ts2; dev->dev_next->dvb_max_pkt_size_isoc = dev->dvb_max_pkt_size_isoc_ts2; dev->dev_next->dvb_alt_isoc = dev->dvb_alt_isoc; /* Configure hardware to support TS2*/ if (dev->dvb_xfer_bulk) { /* The ep4 and ep5 are configured for BULK */ em28xx_write_reg(dev, 0x0b, 0x96); mdelay(100); em28xx_write_reg(dev, 0x0b, 0x80); mdelay(100); } else { /* The ep4 and ep5 are configured for ISO */ em28xx_write_reg(dev, 0x0b, 0x96); mdelay(100); em28xx_write_reg(dev, 0x0b, 0x82); mdelay(100); } } request_modules(dev); /* * Do it at the end, to reduce dynamic configuration changes during * the device init. Yet, as request_modules() can be async, the * topology will likely change after the load of the em28xx subdrivers. */ #ifdef CONFIG_MEDIA_CONTROLLER /* * No need to check the return value, the device will still be * usable without media controller API. */ retval = media_device_register(dev->media_dev); #endif return 0; err_free: kfree(dev->alt_max_pkt_size_isoc); kfree(dev); err: clear_bit(nr, em28xx_devused); err_no_slot: usb_put_dev(udev); return retval; } /* * em28xx_usb_disconnect() * called when the device gets disconnected * video device will be unregistered on v4l2_close in case it is still open */ static void em28xx_usb_disconnect(struct usb_interface *intf) { struct em28xx *dev; dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); if (!dev) return; if (dev->dev_next) { dev->dev_next->disconnected = 1; dev_info(&dev->intf->dev, "Disconnecting %s\n", dev->dev_next->name); } dev->disconnected = 1; dev_info(&dev->intf->dev, "Disconnecting %s\n", dev->name); flush_request_modules(dev); em28xx_close_extension(dev); if (dev->dev_next) em28xx_release_resources(dev->dev_next); em28xx_release_resources(dev); if (dev->dev_next) { kref_put(&dev->dev_next->ref, em28xx_free_device); dev->dev_next = NULL; } kref_put(&dev->ref, em28xx_free_device); } static int em28xx_usb_suspend(struct usb_interface *intf, pm_message_t message) { struct em28xx *dev; dev = usb_get_intfdata(intf); if (!dev) return 0; em28xx_suspend_extension(dev); return 0; } static int em28xx_usb_resume(struct usb_interface *intf) { struct em28xx *dev; dev = usb_get_intfdata(intf); if (!dev) return 0; em28xx_resume_extension(dev); return 0; } static struct usb_driver em28xx_usb_driver = { .name = "em28xx", .probe = em28xx_usb_probe, .disconnect = em28xx_usb_disconnect, .suspend = em28xx_usb_suspend, .resume = em28xx_usb_resume, .reset_resume = em28xx_usb_resume, .id_table = em28xx_id_table, }; module_usb_driver(em28xx_usb_driver);
1 1 1 10 1 10 12 2 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 // SPDX-License-Identifier: GPL-2.0-or-later /* * Plantronics USB HID Driver * * Copyright (c) 2014 JD Cole <jd.cole@plantronics.com> * Copyright (c) 2015-2018 Terry Junge <terry.junge@plantronics.com> */ #include "hid-ids.h" #include <linux/hid.h> #include <linux/module.h> #include <linux/jiffies.h> #define PLT_HID_1_0_PAGE 0xffa00000 #define PLT_HID_2_0_PAGE 0xffa20000 #define PLT_BASIC_TELEPHONY 0x0003 #define PLT_BASIC_EXCEPTION 0x0005 #define PLT_VOL_UP 0x00b1 #define PLT_VOL_DOWN 0x00b2 #define PLT_MIC_MUTE 0x00b5 #define PLT1_VOL_UP (PLT_HID_1_0_PAGE | PLT_VOL_UP) #define PLT1_VOL_DOWN (PLT_HID_1_0_PAGE | PLT_VOL_DOWN) #define PLT1_MIC_MUTE (PLT_HID_1_0_PAGE | PLT_MIC_MUTE) #define PLT2_VOL_UP (PLT_HID_2_0_PAGE | PLT_VOL_UP) #define PLT2_VOL_DOWN (PLT_HID_2_0_PAGE | PLT_VOL_DOWN) #define PLT2_MIC_MUTE (PLT_HID_2_0_PAGE | PLT_MIC_MUTE) #define HID_TELEPHONY_MUTE (HID_UP_TELEPHONY | 0x2f) #define HID_CONSUMER_MUTE (HID_UP_CONSUMER | 0xe2) #define PLT_DA60 0xda60 #define PLT_BT300_MIN 0x0413 #define PLT_BT300_MAX 0x0418 #define PLT_DOUBLE_KEY_TIMEOUT 5 /* ms */ struct plt_drv_data { unsigned long device_type; unsigned long last_key_ts; unsigned long double_key_to; __u16 last_key; }; static int plantronics_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { unsigned short mapped_key; struct plt_drv_data *drv_data = hid_get_drvdata(hdev); unsigned long plt_type = drv_data->device_type; int allow_mute = usage->hid == HID_TELEPHONY_MUTE; int allow_consumer = field->application == HID_CP_CONSUMERCONTROL && (usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER && usage->hid != HID_CONSUMER_MUTE; /* special case for PTT products */ if (field->application == HID_GD_JOYSTICK) goto defaulted; /* non-standard types or multi-HID interfaces - plt_type is PID */ if (!(plt_type & HID_USAGE_PAGE)) { switch (plt_type) { case PLT_DA60: if (allow_consumer) goto defaulted; if (usage->hid == HID_CONSUMER_MUTE) { mapped_key = KEY_MICMUTE; goto mapped; } break; default: if (allow_consumer || allow_mute) goto defaulted; } goto ignored; } /* handle standard consumer control mapping */ /* and standard telephony mic mute mapping */ if (allow_consumer || allow_mute) goto defaulted; /* handle vendor unique types - plt_type is 0xffa0uuuu or 0xffa2uuuu */ /* if not 'basic telephony compliant' - map vendor unique controls */ if (!((plt_type & HID_USAGE) >= PLT_BASIC_TELEPHONY && (plt_type & HID_USAGE) != PLT_BASIC_EXCEPTION) && !((field->application ^ plt_type) & HID_USAGE_PAGE)) switch (usage->hid) { case PLT1_VOL_UP: case PLT2_VOL_UP: mapped_key = KEY_VOLUMEUP; goto mapped; case PLT1_VOL_DOWN: case PLT2_VOL_DOWN: mapped_key = KEY_VOLUMEDOWN; goto mapped; case PLT1_MIC_MUTE: case PLT2_MIC_MUTE: mapped_key = KEY_MICMUTE; goto mapped; } /* * Future mapping of call control or other usages, * if and when keys are defined would go here * otherwise, ignore everything else that was not mapped */ ignored: hid_dbg(hdev, "usage: %08x (appl: %08x) - ignored\n", usage->hid, field->application); return -1; defaulted: hid_dbg(hdev, "usage: %08x (appl: %08x) - defaulted\n", usage->hid, field->application); return 0; mapped: hid_map_usage_clear(hi, usage, bit, max, EV_KEY, mapped_key); hid_dbg(hdev, "usage: %08x (appl: %08x) - mapped to key %d\n", usage->hid, field->application, mapped_key); return 1; } static int plantronics_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct plt_drv_data *drv_data = hid_get_drvdata(hdev); unsigned long prev_tsto, cur_ts; __u16 prev_key, cur_key; /* Usages are filtered in plantronics_usages. */ /* HZ too low for ms resolution - double key detection disabled */ /* or it is a key release - handle key presses only. */ if (!drv_data->double_key_to || !value) return 0; prev_tsto = drv_data->last_key_ts + drv_data->double_key_to; cur_ts = drv_data->last_key_ts = jiffies; prev_key = drv_data->last_key; cur_key = drv_data->last_key = usage->code; /* If the same key occurs in <= double_key_to -- ignore it */ if (prev_key == cur_key && time_before_eq(cur_ts, prev_tsto)) { hid_dbg(hdev, "double key %d ignored\n", cur_key); return 1; /* Ignore the repeated key. */ } return 0; } static unsigned long plantronics_device_type(struct hid_device *hdev) { unsigned i, col_page; unsigned long plt_type = hdev->product; /* multi-HID interfaces? - plt_type is PID */ if (plt_type >= PLT_BT300_MIN && plt_type <= PLT_BT300_MAX) goto exit; /* determine primary vendor page */ for (i = 0; i < hdev->maxcollection; i++) { col_page = hdev->collection[i].usage & HID_USAGE_PAGE; if (col_page == PLT_HID_2_0_PAGE) { plt_type = hdev->collection[i].usage; break; } if (col_page == PLT_HID_1_0_PAGE) plt_type = hdev->collection[i].usage; } exit: hid_dbg(hdev, "plt_type decoded as: %08lx\n", plt_type); return plt_type; } static int plantronics_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct plt_drv_data *drv_data; int ret; drv_data = devm_kzalloc(&hdev->dev, sizeof(*drv_data), GFP_KERNEL); if (!drv_data) return -ENOMEM; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); return ret; } drv_data->device_type = plantronics_device_type(hdev); drv_data->double_key_to = msecs_to_jiffies(PLT_DOUBLE_KEY_TIMEOUT); drv_data->last_key_ts = jiffies - drv_data->double_key_to; /* if HZ does not allow ms resolution - disable double key detection */ if (drv_data->double_key_to < PLT_DOUBLE_KEY_TIMEOUT) drv_data->double_key_to = 0; hid_set_drvdata(hdev, drv_data); ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT | HID_CONNECT_HIDINPUT_FORCE | HID_CONNECT_HIDDEV_FORCE); if (ret) hid_err(hdev, "hw start failed\n"); return ret; } static const struct hid_device_id plantronics_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) }, { } }; MODULE_DEVICE_TABLE(hid, plantronics_devices); static const struct hid_usage_id plantronics_usages[] = { { HID_CP_VOLUMEUP, EV_KEY, HID_ANY_ID }, { HID_CP_VOLUMEDOWN, EV_KEY, HID_ANY_ID }, { HID_TELEPHONY_MUTE, EV_KEY, HID_ANY_ID }, { HID_CONSUMER_MUTE, EV_KEY, HID_ANY_ID }, { PLT2_VOL_UP, EV_KEY, HID_ANY_ID }, { PLT2_VOL_DOWN, EV_KEY, HID_ANY_ID }, { PLT2_MIC_MUTE, EV_KEY, HID_ANY_ID }, { PLT1_VOL_UP, EV_KEY, HID_ANY_ID }, { PLT1_VOL_DOWN, EV_KEY, HID_ANY_ID }, { PLT1_MIC_MUTE, EV_KEY, HID_ANY_ID }, { HID_TERMINATOR, HID_TERMINATOR, HID_TERMINATOR } }; static struct hid_driver plantronics_driver = { .name = "plantronics", .id_table = plantronics_devices, .usage_table = plantronics_usages, .input_mapping = plantronics_input_mapping, .event = plantronics_event, .probe = plantronics_probe, }; module_hid_driver(plantronics_driver); MODULE_AUTHOR("JD Cole <jd.cole@plantronics.com>"); MODULE_AUTHOR("Terry Junge <terry.junge@plantronics.com>"); MODULE_DESCRIPTION("Plantronics USB HID Driver"); MODULE_LICENSE("GPL");
100 97 21 2 35 11 32 21 20 21 3 18 9 13 14 1 6 21 11 10 11 11 11 11 10 14 14 7 7 2 12 4 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * open/close and reset interface * * Copyright (C) 1998-1999 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_device.h" #include "seq_oss_synth.h" #include "seq_oss_midi.h" #include "seq_oss_writeq.h" #include "seq_oss_readq.h" #include "seq_oss_timer.h" #include "seq_oss_event.h" #include <linux/init.h> #include <linux/export.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/workqueue.h> /* * common variables */ static int maxqlen = SNDRV_SEQ_OSS_MAX_QLEN; module_param(maxqlen, int, 0444); MODULE_PARM_DESC(maxqlen, "maximum queue length"); static int system_client = -1; /* ALSA sequencer client number */ static int system_port = -1; static int num_clients; static struct seq_oss_devinfo *client_table[SNDRV_SEQ_OSS_MAX_CLIENTS]; /* * prototypes */ static int receive_announce(struct snd_seq_event *ev, int direct, void *private, int atomic, int hop); static int translate_mode(struct file *file); static int create_port(struct seq_oss_devinfo *dp); static int delete_port(struct seq_oss_devinfo *dp); static int alloc_seq_queue(struct seq_oss_devinfo *dp); static int delete_seq_queue(int queue); static void free_devinfo(void *private); #define call_ctl(type,rec) snd_seq_kernel_client_ctl(system_client, type, rec) /* call snd_seq_oss_midi_lookup_ports() asynchronously */ static void async_call_lookup_ports(struct work_struct *work) { snd_seq_oss_midi_lookup_ports(system_client); } static DECLARE_WORK(async_lookup_work, async_call_lookup_ports); /* * create sequencer client for OSS sequencer */ int __init snd_seq_oss_create_client(void) { int rc; struct snd_seq_port_info *port __free(kfree) = NULL; struct snd_seq_port_callback port_callback; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) return -ENOMEM; /* create ALSA client */ rc = snd_seq_create_kernel_client(NULL, SNDRV_SEQ_CLIENT_OSS, "OSS sequencer"); if (rc < 0) return rc; system_client = rc; /* create announcement receiver port */ strscpy(port->name, "Receiver"); port->addr.client = system_client; port->capability = SNDRV_SEQ_PORT_CAP_WRITE; /* receive only */ port->type = 0; memset(&port_callback, 0, sizeof(port_callback)); /* don't set port_callback.owner here. otherwise the module counter * is incremented and we can no longer release the module.. */ port_callback.event_input = receive_announce; port->kernel = &port_callback; if (call_ctl(SNDRV_SEQ_IOCTL_CREATE_PORT, port) >= 0) { struct snd_seq_port_subscribe subs; system_port = port->addr.port; memset(&subs, 0, sizeof(subs)); subs.sender.client = SNDRV_SEQ_CLIENT_SYSTEM; subs.sender.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE; subs.dest.client = system_client; subs.dest.port = system_port; call_ctl(SNDRV_SEQ_IOCTL_SUBSCRIBE_PORT, &subs); } /* look up midi devices */ schedule_work(&async_lookup_work); return 0; } /* * receive announcement from system port, and check the midi device */ static int receive_announce(struct snd_seq_event *ev, int direct, void *private, int atomic, int hop) { struct snd_seq_port_info pinfo; if (atomic) return 0; /* it must not happen */ switch (ev->type) { case SNDRV_SEQ_EVENT_PORT_START: case SNDRV_SEQ_EVENT_PORT_CHANGE: if (ev->data.addr.client == system_client) break; /* ignore myself */ memset(&pinfo, 0, sizeof(pinfo)); pinfo.addr = ev->data.addr; if (call_ctl(SNDRV_SEQ_IOCTL_GET_PORT_INFO, &pinfo) >= 0) snd_seq_oss_midi_check_new_port(&pinfo); break; case SNDRV_SEQ_EVENT_PORT_EXIT: if (ev->data.addr.client == system_client) break; /* ignore myself */ snd_seq_oss_midi_check_exit_port(ev->data.addr.client, ev->data.addr.port); break; } return 0; } /* * delete OSS sequencer client */ int snd_seq_oss_delete_client(void) { cancel_work_sync(&async_lookup_work); if (system_client >= 0) snd_seq_delete_kernel_client(system_client); snd_seq_oss_midi_clear_all(); return 0; } /* * open sequencer device */ int snd_seq_oss_open(struct file *file, int level) { int i, rc; struct seq_oss_devinfo *dp; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (!dp) return -ENOMEM; dp->cseq = system_client; dp->port = -1; dp->queue = -1; for (i = 0; i < SNDRV_SEQ_OSS_MAX_CLIENTS; i++) { if (client_table[i] == NULL) break; } dp->index = i; if (i >= SNDRV_SEQ_OSS_MAX_CLIENTS) { pr_debug("ALSA: seq_oss: too many applications\n"); rc = -ENOMEM; goto _error; } /* look up synth and midi devices */ snd_seq_oss_synth_setup(dp); snd_seq_oss_midi_setup(dp); if (dp->synth_opened == 0 && dp->max_mididev == 0) { /* pr_err("ALSA: seq_oss: no device found\n"); */ rc = -ENODEV; goto _error; } /* create port */ rc = create_port(dp); if (rc < 0) { pr_err("ALSA: seq_oss: can't create port\n"); goto _error; } /* allocate queue */ rc = alloc_seq_queue(dp); if (rc < 0) goto _error; /* set address */ dp->addr.client = dp->cseq; dp->addr.port = dp->port; /*dp->addr.queue = dp->queue;*/ /*dp->addr.channel = 0;*/ dp->seq_mode = level; /* set up file mode */ dp->file_mode = translate_mode(file); /* initialize read queue */ if (is_read_mode(dp->file_mode)) { dp->readq = snd_seq_oss_readq_new(dp, maxqlen); if (!dp->readq) { rc = -ENOMEM; goto _error; } } /* initialize write queue */ if (is_write_mode(dp->file_mode)) { dp->writeq = snd_seq_oss_writeq_new(dp, maxqlen); if (!dp->writeq) { rc = -ENOMEM; goto _error; } } /* initialize timer */ dp->timer = snd_seq_oss_timer_new(dp); if (!dp->timer) { pr_err("ALSA: seq_oss: can't alloc timer\n"); rc = -ENOMEM; goto _error; } /* set private data pointer */ file->private_data = dp; /* set up for mode2 */ if (level == SNDRV_SEQ_OSS_MODE_MUSIC) snd_seq_oss_synth_setup_midi(dp); else if (is_read_mode(dp->file_mode)) snd_seq_oss_midi_open_all(dp, SNDRV_SEQ_OSS_FILE_READ); client_table[dp->index] = dp; num_clients++; return 0; _error: snd_seq_oss_synth_cleanup(dp); snd_seq_oss_midi_cleanup(dp); delete_seq_queue(dp->queue); delete_port(dp); return rc; } /* * translate file flags to private mode */ static int translate_mode(struct file *file) { int file_mode = 0; if ((file->f_flags & O_ACCMODE) != O_RDONLY) file_mode |= SNDRV_SEQ_OSS_FILE_WRITE; if ((file->f_flags & O_ACCMODE) != O_WRONLY) file_mode |= SNDRV_SEQ_OSS_FILE_READ; if (file->f_flags & O_NONBLOCK) file_mode |= SNDRV_SEQ_OSS_FILE_NONBLOCK; return file_mode; } /* * create sequencer port */ static int create_port(struct seq_oss_devinfo *dp) { int rc; struct snd_seq_port_info port; struct snd_seq_port_callback callback; memset(&port, 0, sizeof(port)); port.addr.client = dp->cseq; sprintf(port.name, "Sequencer-%d", dp->index); port.capability = SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_WRITE; /* no subscription */ port.type = SNDRV_SEQ_PORT_TYPE_SPECIFIC; port.midi_channels = 128; port.synth_voices = 128; memset(&callback, 0, sizeof(callback)); callback.owner = THIS_MODULE; callback.private_data = dp; callback.event_input = snd_seq_oss_event_input; callback.private_free = free_devinfo; port.kernel = &callback; rc = call_ctl(SNDRV_SEQ_IOCTL_CREATE_PORT, &port); if (rc < 0) return rc; dp->port = port.addr.port; return 0; } /* * delete ALSA port */ static int delete_port(struct seq_oss_devinfo *dp) { if (dp->port < 0) { kfree(dp); return 0; } return snd_seq_event_port_detach(dp->cseq, dp->port); } /* * allocate a queue */ static int alloc_seq_queue(struct seq_oss_devinfo *dp) { struct snd_seq_queue_info qinfo; int rc; memset(&qinfo, 0, sizeof(qinfo)); qinfo.owner = system_client; qinfo.locked = 1; strscpy(qinfo.name, "OSS Sequencer Emulation"); rc = call_ctl(SNDRV_SEQ_IOCTL_CREATE_QUEUE, &qinfo); if (rc < 0) return rc; dp->queue = qinfo.queue; return 0; } /* * release queue */ static int delete_seq_queue(int queue) { struct snd_seq_queue_info qinfo; int rc; if (queue < 0) return 0; memset(&qinfo, 0, sizeof(qinfo)); qinfo.queue = queue; rc = call_ctl(SNDRV_SEQ_IOCTL_DELETE_QUEUE, &qinfo); if (rc < 0) pr_err("ALSA: seq_oss: unable to delete queue %d (%d)\n", queue, rc); return rc; } /* * free device informations - private_free callback of port */ static void free_devinfo(void *private) { struct seq_oss_devinfo *dp = (struct seq_oss_devinfo *)private; snd_seq_oss_timer_delete(dp->timer); snd_seq_oss_writeq_delete(dp->writeq); snd_seq_oss_readq_delete(dp->readq); kfree(dp); } /* * close sequencer device */ void snd_seq_oss_release(struct seq_oss_devinfo *dp) { int queue; client_table[dp->index] = NULL; num_clients--; snd_seq_oss_reset(dp); snd_seq_oss_synth_cleanup(dp); snd_seq_oss_midi_cleanup(dp); /* clear slot */ queue = dp->queue; if (dp->port >= 0) delete_port(dp); delete_seq_queue(queue); } /* * reset sequencer devices */ void snd_seq_oss_reset(struct seq_oss_devinfo *dp) { int i; /* reset all synth devices */ for (i = 0; i < dp->max_synthdev; i++) snd_seq_oss_synth_reset(dp, i); /* reset all midi devices */ if (dp->seq_mode != SNDRV_SEQ_OSS_MODE_MUSIC) { for (i = 0; i < dp->max_mididev; i++) snd_seq_oss_midi_reset(dp, i); } /* remove queues */ if (dp->readq) snd_seq_oss_readq_clear(dp->readq); if (dp->writeq) snd_seq_oss_writeq_clear(dp->writeq); /* reset timer */ snd_seq_oss_timer_stop(dp->timer); } #ifdef CONFIG_SND_PROC_FS /* * misc. functions for proc interface */ static const char * filemode_str(int val) { static const char * const str[] = { "none", "read", "write", "read/write", }; return str[val & SNDRV_SEQ_OSS_FILE_ACMODE]; } /* * proc interface */ void snd_seq_oss_system_info_read(struct snd_info_buffer *buf) { int i; struct seq_oss_devinfo *dp; snd_iprintf(buf, "ALSA client number %d\n", system_client); snd_iprintf(buf, "ALSA receiver port %d\n", system_port); snd_iprintf(buf, "\nNumber of applications: %d\n", num_clients); for (i = 0; i < num_clients; i++) { snd_iprintf(buf, "\nApplication %d: ", i); dp = client_table[i]; if (!dp) { snd_iprintf(buf, "*empty*\n"); continue; } snd_iprintf(buf, "port %d : queue %d\n", dp->port, dp->queue); snd_iprintf(buf, " sequencer mode = %s : file open mode = %s\n", (dp->seq_mode ? "music" : "synth"), filemode_str(dp->file_mode)); if (dp->seq_mode) snd_iprintf(buf, " timer tempo = %d, timebase = %d\n", dp->timer->oss_tempo, dp->timer->oss_timebase); snd_iprintf(buf, " max queue length %d\n", maxqlen); if (is_read_mode(dp->file_mode) && dp->readq) snd_seq_oss_readq_info_read(dp->readq, buf); } } #endif /* CONFIG_SND_PROC_FS */
10 10 10 10 9 18 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 // SPDX-License-Identifier: GPL-2.0-only /* * fs/crypto/hooks.c * * Encryption hooks for higher-level filesystem operations. */ #include <linux/export.h> #include "fscrypt_private.h" /** * fscrypt_file_open() - prepare to open a possibly-encrypted regular file * @inode: the inode being opened * @filp: the struct file being set up * * Currently, an encrypted regular file can only be opened if its encryption key * is available; access to the raw encrypted contents is not supported. * Therefore, we first set up the inode's encryption key (if not already done) * and return an error if it's unavailable. * * We also verify that if the parent directory (from the path via which the file * is being opened) is encrypted, then the inode being opened uses the same * encryption policy. This is needed as part of the enforcement that all files * in an encrypted directory tree use the same encryption policy, as a * protection against certain types of offline attacks. Note that this check is * needed even when opening an *unencrypted* file, since it's forbidden to have * an unencrypted file in an encrypted directory. * * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code */ int fscrypt_file_open(struct inode *inode, struct file *filp) { int err; struct dentry *dentry, *dentry_parent; struct inode *inode_parent; err = fscrypt_require_key(inode); if (err) return err; dentry = file_dentry(filp); /* * Getting a reference to the parent dentry is needed for the actual * encryption policy comparison, but it's expensive on multi-core * systems. Since this function runs on unencrypted files too, start * with a lightweight RCU-mode check for the parent directory being * unencrypted (in which case it's fine for the child to be either * unencrypted, or encrypted with any policy). Only continue on to the * full policy check if the parent directory is actually encrypted. */ rcu_read_lock(); dentry_parent = READ_ONCE(dentry->d_parent); inode_parent = d_inode_rcu(dentry_parent); if (inode_parent != NULL && !IS_ENCRYPTED(inode_parent)) { rcu_read_unlock(); return 0; } rcu_read_unlock(); dentry_parent = dget_parent(dentry); if (!fscrypt_has_permitted_context(d_inode(dentry_parent), inode)) { fscrypt_warn(inode, "Inconsistent encryption context (parent directory: %lu)", d_inode(dentry_parent)->i_ino); err = -EPERM; } dput(dentry_parent); return err; } EXPORT_SYMBOL_GPL(fscrypt_file_open); int __fscrypt_prepare_link(struct inode *inode, struct inode *dir, struct dentry *dentry) { if (fscrypt_is_nokey_name(dentry)) return -ENOKEY; /* * We don't need to separately check that the directory inode's key is * available, as it's implied by the dentry not being a no-key name. */ if (!fscrypt_has_permitted_context(dir, inode)) return -EXDEV; return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_link); int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { if (fscrypt_is_nokey_name(old_dentry) || fscrypt_is_nokey_name(new_dentry)) return -ENOKEY; /* * We don't need to separately check that the directory inodes' keys are * available, as it's implied by the dentries not being no-key names. */ if (old_dir != new_dir) { if (IS_ENCRYPTED(new_dir) && !fscrypt_has_permitted_context(new_dir, d_inode(old_dentry))) return -EXDEV; if ((flags & RENAME_EXCHANGE) && IS_ENCRYPTED(old_dir) && !fscrypt_has_permitted_context(old_dir, d_inode(new_dentry))) return -EXDEV; } return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_rename); int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry, struct fscrypt_name *fname) { int err = fscrypt_setup_filename(dir, &dentry->d_name, 1, fname); if (err && err != -ENOENT) return err; fscrypt_prepare_dentry(dentry, fname->is_nokey_name); return err; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_lookup); /** * fscrypt_prepare_lookup_partial() - prepare lookup without filename setup * @dir: the encrypted directory being searched * @dentry: the dentry being looked up in @dir * * This function should be used by the ->lookup and ->atomic_open methods of * filesystems that handle filename encryption and no-key name encoding * themselves and thus can't use fscrypt_prepare_lookup(). Like * fscrypt_prepare_lookup(), this will try to set up the directory's encryption * key and will set DCACHE_NOKEY_NAME on the dentry if the key is unavailable. * However, this function doesn't set up a struct fscrypt_name for the filename. * * Return: 0 on success; -errno on error. Note that the encryption key being * unavailable is not considered an error. It is also not an error if * the encryption policy is unsupported by this kernel; that is treated * like the key being unavailable, so that files can still be deleted. */ int fscrypt_prepare_lookup_partial(struct inode *dir, struct dentry *dentry) { int err = fscrypt_get_encryption_info(dir, true); bool is_nokey_name = (!err && !fscrypt_has_encryption_key(dir)); fscrypt_prepare_dentry(dentry, is_nokey_name); return err; } EXPORT_SYMBOL_GPL(fscrypt_prepare_lookup_partial); int __fscrypt_prepare_readdir(struct inode *dir) { return fscrypt_get_encryption_info(dir, true); } EXPORT_SYMBOL_GPL(__fscrypt_prepare_readdir); int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr) { if (attr->ia_valid & ATTR_SIZE) return fscrypt_require_key(d_inode(dentry)); return 0; } EXPORT_SYMBOL_GPL(__fscrypt_prepare_setattr); /** * fscrypt_prepare_setflags() - prepare to change flags with FS_IOC_SETFLAGS * @inode: the inode on which flags are being changed * @oldflags: the old flags * @flags: the new flags * * The caller should be holding i_rwsem for write. * * Return: 0 on success; -errno if the flags change isn't allowed or if * another error occurs. */ int fscrypt_prepare_setflags(struct inode *inode, unsigned int oldflags, unsigned int flags) { struct fscrypt_inode_info *ci; struct fscrypt_master_key *mk; int err; /* * When the CASEFOLD flag is set on an encrypted directory, we must * derive the secret key needed for the dirhash. This is only possible * if the directory uses a v2 encryption policy. */ if (IS_ENCRYPTED(inode) && (flags & ~oldflags & FS_CASEFOLD_FL)) { err = fscrypt_require_key(inode); if (err) return err; ci = inode->i_crypt_info; if (ci->ci_policy.version != FSCRYPT_POLICY_V2) return -EINVAL; mk = ci->ci_master_key; down_read(&mk->mk_sem); if (mk->mk_present) err = fscrypt_derive_dirhash_key(ci, mk); else err = -ENOKEY; up_read(&mk->mk_sem); return err; } return 0; } /** * fscrypt_prepare_symlink() - prepare to create a possibly-encrypted symlink * @dir: directory in which the symlink is being created * @target: plaintext symlink target * @len: length of @target excluding null terminator * @max_len: space the filesystem has available to store the symlink target * @disk_link: (out) the on-disk symlink target being prepared * * This function computes the size the symlink target will require on-disk, * stores it in @disk_link->len, and validates it against @max_len. An * encrypted symlink may be longer than the original. * * Additionally, @disk_link->name is set to @target if the symlink will be * unencrypted, but left NULL if the symlink will be encrypted. For encrypted * symlinks, the filesystem must call fscrypt_encrypt_symlink() to create the * on-disk target later. (The reason for the two-step process is that some * filesystems need to know the size of the symlink target before creating the * inode, e.g. to determine whether it will be a "fast" or "slow" symlink.) * * Return: 0 on success, -ENAMETOOLONG if the symlink target is too long, * -ENOKEY if the encryption key is missing, or another -errno code if a problem * occurred while setting up the encryption key. */ int fscrypt_prepare_symlink(struct inode *dir, const char *target, unsigned int len, unsigned int max_len, struct fscrypt_str *disk_link) { const union fscrypt_policy *policy; /* * To calculate the size of the encrypted symlink target we need to know * the amount of NUL padding, which is determined by the flags set in * the encryption policy which will be inherited from the directory. */ policy = fscrypt_policy_to_inherit(dir); if (policy == NULL) { /* Not encrypted */ disk_link->name = (unsigned char *)target; disk_link->len = len + 1; if (disk_link->len > max_len) return -ENAMETOOLONG; return 0; } if (IS_ERR(policy)) return PTR_ERR(policy); /* * Calculate the size of the encrypted symlink and verify it won't * exceed max_len. Note that for historical reasons, encrypted symlink * targets are prefixed with the ciphertext length, despite this * actually being redundant with i_size. This decreases by 2 bytes the * longest symlink target we can accept. * * We could recover 1 byte by not counting a null terminator, but * counting it (even though it is meaningless for ciphertext) is simpler * for now since filesystems will assume it is there and subtract it. */ if (!__fscrypt_fname_encrypted_size(policy, len, max_len - sizeof(struct fscrypt_symlink_data) - 1, &disk_link->len)) return -ENAMETOOLONG; disk_link->len += sizeof(struct fscrypt_symlink_data) + 1; disk_link->name = NULL; return 0; } EXPORT_SYMBOL_GPL(fscrypt_prepare_symlink); int __fscrypt_encrypt_symlink(struct inode *inode, const char *target, unsigned int len, struct fscrypt_str *disk_link) { int err; struct qstr iname = QSTR_INIT(target, len); struct fscrypt_symlink_data *sd; unsigned int ciphertext_len; /* * fscrypt_prepare_new_inode() should have already set up the new * symlink inode's encryption key. We don't wait until now to do it, * since we may be in a filesystem transaction now. */ if (WARN_ON_ONCE(!fscrypt_has_encryption_key(inode))) return -ENOKEY; if (disk_link->name) { /* filesystem-provided buffer */ sd = (struct fscrypt_symlink_data *)disk_link->name; } else { sd = kmalloc(disk_link->len, GFP_NOFS); if (!sd) return -ENOMEM; } ciphertext_len = disk_link->len - sizeof(*sd) - 1; sd->len = cpu_to_le16(ciphertext_len); err = fscrypt_fname_encrypt(inode, &iname, sd->encrypted_path, ciphertext_len); if (err) goto err_free_sd; /* * Null-terminating the ciphertext doesn't make sense, but we still * count the null terminator in the length, so we might as well * initialize it just in case the filesystem writes it out. */ sd->encrypted_path[ciphertext_len] = '\0'; /* Cache the plaintext symlink target for later use by get_link() */ err = -ENOMEM; inode->i_link = kmemdup(target, len + 1, GFP_NOFS); if (!inode->i_link) goto err_free_sd; if (!disk_link->name) disk_link->name = (unsigned char *)sd; return 0; err_free_sd: if (!disk_link->name) kfree(sd); return err; } EXPORT_SYMBOL_GPL(__fscrypt_encrypt_symlink); /** * fscrypt_get_symlink() - get the target of an encrypted symlink * @inode: the symlink inode * @caddr: the on-disk contents of the symlink * @max_size: size of @caddr buffer * @done: if successful, will be set up to free the returned target if needed * * If the symlink's encryption key is available, we decrypt its target. * Otherwise, we encode its target for presentation. * * This may sleep, so the filesystem must have dropped out of RCU mode already. * * Return: the presentable symlink target or an ERR_PTR() */ const char *fscrypt_get_symlink(struct inode *inode, const void *caddr, unsigned int max_size, struct delayed_call *done) { const struct fscrypt_symlink_data *sd; struct fscrypt_str cstr, pstr; bool has_key; int err; /* This is for encrypted symlinks only */ if (WARN_ON_ONCE(!IS_ENCRYPTED(inode))) return ERR_PTR(-EINVAL); /* If the decrypted target is already cached, just return it. */ pstr.name = READ_ONCE(inode->i_link); if (pstr.name) return pstr.name; /* * Try to set up the symlink's encryption key, but we can continue * regardless of whether the key is available or not. */ err = fscrypt_get_encryption_info(inode, false); if (err) return ERR_PTR(err); has_key = fscrypt_has_encryption_key(inode); /* * For historical reasons, encrypted symlink targets are prefixed with * the ciphertext length, even though this is redundant with i_size. */ if (max_size < sizeof(*sd) + 1) return ERR_PTR(-EUCLEAN); sd = caddr; cstr.name = (unsigned char *)sd->encrypted_path; cstr.len = le16_to_cpu(sd->len); if (cstr.len == 0) return ERR_PTR(-EUCLEAN); if (cstr.len + sizeof(*sd) > max_size) return ERR_PTR(-EUCLEAN); err = fscrypt_fname_alloc_buffer(cstr.len, &pstr); if (err) return ERR_PTR(err); err = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr); if (err) goto err_kfree; err = -EUCLEAN; if (pstr.name[0] == '\0') goto err_kfree; pstr.name[pstr.len] = '\0'; /* * Cache decrypted symlink targets in i_link for later use. Don't cache * symlink targets encoded without the key, since those become outdated * once the key is added. This pairs with the READ_ONCE() above and in * the VFS path lookup code. */ if (!has_key || cmpxchg_release(&inode->i_link, NULL, pstr.name) != NULL) set_delayed_call(done, kfree_link, pstr.name); return pstr.name; err_kfree: kfree(pstr.name); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(fscrypt_get_symlink); /** * fscrypt_symlink_getattr() - set the correct st_size for encrypted symlinks * @path: the path for the encrypted symlink being queried * @stat: the struct being filled with the symlink's attributes * * Override st_size of encrypted symlinks to be the length of the decrypted * symlink target (or the no-key encoded symlink target, if the key is * unavailable) rather than the length of the encrypted symlink target. This is * necessary for st_size to match the symlink target that userspace actually * sees. POSIX requires this, and some userspace programs depend on it. * * This requires reading the symlink target from disk if needed, setting up the * inode's encryption key if possible, and then decrypting or encoding the * symlink target. This makes lstat() more heavyweight than is normally the * case. However, decrypted symlink targets will be cached in ->i_link, so * usually the symlink won't have to be read and decrypted again later if/when * it is actually followed, readlink() is called, or lstat() is called again. * * Return: 0 on success, -errno on failure */ int fscrypt_symlink_getattr(const struct path *path, struct kstat *stat) { struct dentry *dentry = path->dentry; struct inode *inode = d_inode(dentry); const char *link; DEFINE_DELAYED_CALL(done); /* * To get the symlink target that userspace will see (whether it's the * decrypted target or the no-key encoded target), we can just get it in * the same way the VFS does during path resolution and readlink(). */ link = READ_ONCE(inode->i_link); if (!link) { link = inode->i_op->get_link(dentry, inode, &done); if (IS_ERR(link)) return PTR_ERR(link); } stat->size = strlen(link); do_delayed_call(&done); return 0; } EXPORT_SYMBOL_GPL(fscrypt_symlink_getattr);
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* request_key authorisation token key type * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #ifndef _KEYS_REQUEST_KEY_AUTH_TYPE_H #define _KEYS_REQUEST_KEY_AUTH_TYPE_H #include <linux/key.h> /* * Authorisation record for request_key(). */ struct request_key_auth { struct rcu_head rcu; struct key *target_key; struct key *dest_keyring; const struct cred *cred; void *callout_info; size_t callout_len; pid_t pid; char op[8]; } __randomize_layout; static inline struct request_key_auth *get_request_key_auth(const struct key *key) { return key->payload.data[0]; } #endif /* _KEYS_REQUEST_KEY_AUTH_TYPE_H */
22 23 22 22 22 23 23 23 23 22 23 45 147 153 151 54 137 10 128 137 5 132 137 49 49 1 2 44 2 85 79 84 84 84 56 97 96 134 135 1 1 1 84 84 80 1 13 39 27 36 49 134 134 135 134 124 8 134 132 133 134 195 164 9 21 111 21 86 16 7 104 6 3 1 2 1 24 24 1 1 1 1 1 5 8 7 15 1 4 16 2 19 18 17 1 14 1 2 2 14 1 167 169 1 1 1 1 78 76 74 76 74 76 74 1 1 77 77 1 5 1 1 1 1 1 9 5 4 7 2 8 5 4 6 5 1 9 1 49 1 49 44 7 50 69 13 6 51 71 72 82 81 1 1 62 62 49 140 141 1 1 136 1 1 112 4 24 136 1 134 134 24 109 135 104 16 3 1 1 1 1 1 61 68 11 58 53 7 70 2 69 66 4 69 44 79 23 56 79 13 66 256 262 1 256 17 7 14 16 250 5 2 16 262 259 5 6 6 1 1 1 139 140 2 141 139 141 139 5 138 1 139 96 77 140 5 50 50 50 48 7 7 98 98 98 1 97 21 51 51 22 21 244 249 97 98 98 96 96 97 97 148 6 81 109 148 81 81 82 1 1 82 94 20 76 71 5 56 81 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IPv4 Forwarding Information Base: semantics. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> */ #include <linux/uaccess.h> #include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/mm.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/netlink.h> #include <linux/hash.h> #include <linux/nospec.h> #include <net/arp.h> #include <net/inet_dscp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/nexthop.h> #include <net/netlink.h> #include <net/rtnh.h> #include <net/lwtunnel.h> #include <net/fib_notifier.h> #include <net/addrconf.h> #include "fib_lookup.h" /* for_nexthops and change_nexthops only used when nexthop object * is not set in a fib_info. The logic within can reference fib_nh. */ #ifdef CONFIG_IP_ROUTE_MULTIPATH #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh; \ for (nhsel = 0, nh = (fi)->fib_nh; \ nhsel < fib_info_num_path((fi)); \ nh++, nhsel++) #define change_nexthops(fi) { \ int nhsel; struct fib_nh *nexthop_nh; \ for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ nhsel < fib_info_num_path((fi)); \ nexthop_nh++, nhsel++) #else /* CONFIG_IP_ROUTE_MULTIPATH */ /* Hope, that gcc will optimize it to get rid of dummy loop */ #define for_nexthops(fi) { \ int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ for (nhsel = 0; nhsel < 1; nhsel++) #define change_nexthops(fi) { \ int nhsel; \ struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ for (nhsel = 0; nhsel < 1; nhsel++) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ #define endfor_nexthops(fi) } const struct fib_prop fib_props[RTN_MAX + 1] = { [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE, }, [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, }, [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST, }, [RTN_BROADCAST] = { .error = 0, .scope = RT_SCOPE_LINK, }, [RTN_ANYCAST] = { .error = 0, .scope = RT_SCOPE_LINK, }, [RTN_MULTICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, }, [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE, }, [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE, }, [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE, }, [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE, }, [RTN_NAT] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, }, }; static void rt_fibinfo_free(struct rtable __rcu **rtp) { struct rtable *rt = rcu_dereference_protected(*rtp, 1); if (!rt) return; /* Not even needed : RCU_INIT_POINTER(*rtp, NULL); * because we waited an RCU grace period before calling * free_fib_info_rcu() */ dst_dev_put(&rt->dst); dst_release_immediate(&rt->dst); } static void free_nh_exceptions(struct fib_nh_common *nhc) { struct fnhe_hash_bucket *hash; int i; hash = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!hash) return; for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe; fnhe = rcu_dereference_protected(hash[i].chain, 1); while (fnhe) { struct fib_nh_exception *next; next = rcu_dereference_protected(fnhe->fnhe_next, 1); rt_fibinfo_free(&fnhe->fnhe_rth_input); rt_fibinfo_free(&fnhe->fnhe_rth_output); kfree(fnhe); fnhe = next; } } kfree(hash); } static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) { int cpu; if (!rtp) return; for_each_possible_cpu(cpu) { struct rtable *rt; rt = rcu_dereference_protected(*per_cpu_ptr(rtp, cpu), 1); if (rt) { dst_dev_put(&rt->dst); dst_release_immediate(&rt->dst); } } free_percpu(rtp); } void fib_nh_common_release(struct fib_nh_common *nhc) { netdev_put(nhc->nhc_dev, &nhc->nhc_dev_tracker); lwtstate_put(nhc->nhc_lwtstate); rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); rt_fibinfo_free(&nhc->nhc_rth_input); free_nh_exceptions(nhc); } EXPORT_SYMBOL_GPL(fib_nh_common_release); void fib_nh_release(struct net *net, struct fib_nh *fib_nh) { #ifdef CONFIG_IP_ROUTE_CLASSID if (fib_nh->nh_tclassid) atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif fib_nh_common_release(&fib_nh->nh_common); } /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); if (fi->nh) { nexthop_put(fi->nh); } else { change_nexthops(fi) { fib_nh_release(fi->fib_net, nexthop_nh); } endfor_nexthops(fi); } ip_fib_metrics_put(fi->fib_metrics); kfree(fi); } void free_fib_info(struct fib_info *fi) { if (fi->fib_dead == 0) { pr_warn("Freeing alive fib_info %p\n", fi); return; } call_rcu_hurry(&fi->rcu, free_fib_info_rcu); } EXPORT_SYMBOL_GPL(free_fib_info); void fib_release_info(struct fib_info *fi) { ASSERT_RTNL(); if (fi && refcount_dec_and_test(&fi->fib_treeref)) { hlist_del(&fi->fib_hash); fi->fib_net->ipv4.fib_info_cnt--; if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); if (fi->nh) { list_del(&fi->nh_list); } else { change_nexthops(fi) { if (!nexthop_nh->fib_nh_dev) continue; hlist_del_rcu(&nexthop_nh->nh_hash); } endfor_nexthops(fi) } /* Paired with READ_ONCE() from fib_table_lookup() */ WRITE_ONCE(fi->fib_dead, 1); fib_info_put(fi); } } static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) { const struct fib_nh *onh; if (fi->nh || ofi->nh) return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1; if (ofi->fib_nhs == 0) return 0; for_nexthops(fi) { onh = fib_info_nh(ofi, nhsel); if (nh->fib_nh_oif != onh->fib_nh_oif || nh->fib_nh_gw_family != onh->fib_nh_gw_family || nh->fib_nh_scope != onh->fib_nh_scope || #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->fib_nh_weight != onh->fib_nh_weight || #endif #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) return -1; if (nh->fib_nh_gw_family == AF_INET && nh->fib_nh_gw4 != onh->fib_nh_gw4) return -1; if (nh->fib_nh_gw_family == AF_INET6 && ipv6_addr_cmp(&nh->fib_nh_gw6, &onh->fib_nh_gw6)) return -1; } endfor_nexthops(fi); return 0; } static struct hlist_head *fib_nh_head(struct net_device *dev) { return &dev->fib_nh_head; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, u32 prefsrc, u32 priority) { unsigned int val = init_val; val ^= (protocol << 8) | scope; val ^= prefsrc; val ^= priority; return val; } static unsigned int fib_info_hashfn_result(const struct net *net, unsigned int val) { return hash_32(val ^ net_hash_mix(net), net->ipv4.fib_info_hash_bits); } static struct hlist_head *fib_info_hash_bucket(struct fib_info *fi) { struct net *net = fi->fib_net; unsigned int val; val = fib_info_hashfn_1(fi->fib_nhs, fi->fib_protocol, fi->fib_scope, (__force u32)fi->fib_prefsrc, fi->fib_priority); if (fi->nh) { val ^= fi->nh->id; } else { for_nexthops(fi) { val ^= nh->fib_nh_oif; } endfor_nexthops(fi) } return &net->ipv4.fib_info_hash[fib_info_hashfn_result(net, val)]; } static struct hlist_head *fib_info_laddrhash_bucket(const struct net *net, __be32 val) { unsigned int hash_bits = net->ipv4.fib_info_hash_bits; u32 slot; slot = hash_32(net_hash_mix(net) ^ (__force u32)val, hash_bits); return &net->ipv4.fib_info_hash[(1 << hash_bits) + slot]; } static struct hlist_head *fib_info_hash_alloc(unsigned int hash_bits) { /* The second half is used for prefsrc */ return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head), GFP_KERNEL); } static void fib_info_hash_free(struct hlist_head *head) { kvfree(head); } static void fib_info_hash_grow(struct net *net) { unsigned int old_size = 1 << net->ipv4.fib_info_hash_bits; struct hlist_head *new_info_hash, *old_info_hash; unsigned int i; if (net->ipv4.fib_info_cnt < old_size) return; new_info_hash = fib_info_hash_alloc(net->ipv4.fib_info_hash_bits + 1); if (!new_info_hash) return; old_info_hash = net->ipv4.fib_info_hash; net->ipv4.fib_info_hash = new_info_hash; net->ipv4.fib_info_hash_bits += 1; for (i = 0; i < old_size; i++) { struct hlist_head *head = &old_info_hash[i]; struct hlist_node *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, n, head, fib_hash) hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi)); } for (i = 0; i < old_size; i++) { struct hlist_head *lhead = &old_info_hash[old_size + i]; struct hlist_node *n; struct fib_info *fi; hlist_for_each_entry_safe(fi, n, lhead, fib_lhash) hlist_add_head(&fi->fib_lhash, fib_info_laddrhash_bucket(fi->fib_net, fi->fib_prefsrc)); } fib_info_hash_free(old_info_hash); } /* no metrics, only nexthop id */ static struct fib_info *fib_find_info_nh(struct net *net, const struct fib_config *cfg) { struct hlist_head *head; struct fib_info *fi; unsigned int hash; hash = fib_info_hashfn_1(cfg->fc_nh_id, cfg->fc_protocol, cfg->fc_scope, (__force u32)cfg->fc_prefsrc, cfg->fc_priority); hash = fib_info_hashfn_result(net, hash); head = &net->ipv4.fib_info_hash[hash]; hlist_for_each_entry(fi, head, fib_hash) { if (!fi->nh || fi->nh->id != cfg->fc_nh_id) continue; if (cfg->fc_protocol == fi->fib_protocol && cfg->fc_scope == fi->fib_scope && cfg->fc_prefsrc == fi->fib_prefsrc && cfg->fc_priority == fi->fib_priority && cfg->fc_type == fi->fib_type && cfg->fc_table == fi->fib_tb_id && !((cfg->fc_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK)) return fi; } return NULL; } static struct fib_info *fib_find_info(struct fib_info *nfi) { struct hlist_head *head = fib_info_hash_bucket(nfi); struct fib_info *fi; hlist_for_each_entry(fi, head, fib_hash) { if (fi->fib_nhs != nfi->fib_nhs) continue; if (nfi->fib_protocol == fi->fib_protocol && nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && nfi->fib_type == fi->fib_type && nfi->fib_tb_id == fi->fib_tb_id && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) && nh_comp(fi, nfi) == 0) return fi; } return NULL; } /* Check, that the gateway is already configured. * Used only by redirect accept routine, under rcu_read_lock(); */ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; head = fib_nh_head(dev); hlist_for_each_entry_rcu(nh, head, nh_hash) { DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); if (nh->fib_nh_gw4 == gw && !(nh->fib_nh_flags & RTNH_F_DEAD)) { return 0; } } return -1; } size_t fib_nlmsg_size(struct fib_info *fi) { size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg)) + nla_total_size(4) /* RTA_TABLE */ + nla_total_size(4) /* RTA_DST */ + nla_total_size(4) /* RTA_PRIORITY */ + nla_total_size(4) /* RTA_PREFSRC */ + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ unsigned int nhs = fib_info_num_path(fi); /* space for nested metrics */ payload += nla_total_size((RTAX_MAX * nla_total_size(4))); if (fi->nh) payload += nla_total_size(4); /* RTA_NH_ID */ if (nhs) { size_t nh_encapsize = 0; /* Also handles the special case nhs == 1 */ /* each nexthop is packed in an attribute */ size_t nhsize = nla_total_size(sizeof(struct rtnexthop)); unsigned int i; /* may contain flow and gateway attribute */ nhsize += 2 * nla_total_size(4); /* grab encap info */ for (i = 0; i < fib_info_num_path(fi); i++) { struct fib_nh_common *nhc = fib_info_nhc(fi, i); if (nhc->nhc_lwtstate) { /* RTA_ENCAP_TYPE */ nh_encapsize += lwtunnel_get_encap_size( nhc->nhc_lwtstate); /* RTA_ENCAP */ nh_encapsize += nla_total_size(2); } } /* all nexthops are packed in a nested attribute */ payload += nla_total_size((nhs * nhsize) + nh_encapsize); } return payload; } void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, const struct nl_info *info, unsigned int nlm_flags) { struct fib_rt_info fri; struct sk_buff *skb; u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; int err = -ENOBUFS; skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL); if (!skb) goto errout; fri.fi = fa->fa_info; fri.tb_id = tb_id; fri.dst = key; fri.dst_len = dst_len; fri.dscp = fa->fa_dscp; fri.type = fa->fa_type; fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); fri.offload_failed = READ_ONCE(fa->offload_failed); err = fib_dump_info(skb, info->portid, seq, event, &fri, nlm_flags); if (err < 0) { /* -EMSGSIZE implies BUG in fib_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); return; errout: rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); } static int fib_detect_death(struct fib_info *fi, int order, struct fib_info **last_resort, int *last_idx, int dflt) { const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); struct neighbour *n; int state = NUD_NONE; if (likely(nhc->nhc_gw_family == AF_INET)) n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); else if (nhc->nhc_gw_family == AF_INET6) n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, nhc->nhc_dev); else n = NULL; if (n) { state = READ_ONCE(n->nud_state); neigh_release(n); } else { return 0; } if (state == NUD_REACHABLE) return 0; if ((state & NUD_VALID) && order != dflt) return 0; if ((state & NUD_VALID) || (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) { *last_resort = fi; *last_idx = order; } return 1; } int fib_nh_common_init(struct net *net, struct fib_nh_common *nhc, struct nlattr *encap, u16 encap_type, void *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { int err; nhc->nhc_pcpu_rth_output = alloc_percpu_gfp(struct rtable __rcu *, gfp_flags); if (!nhc->nhc_pcpu_rth_output) return -ENOMEM; if (encap) { struct lwtunnel_state *lwtstate; err = lwtunnel_build_state(net, encap_type, encap, nhc->nhc_family, cfg, &lwtstate, extack); if (err) goto lwt_failure; nhc->nhc_lwtstate = lwtstate_get(lwtstate); } return 0; lwt_failure: rt_fibinfo_free_cpus(nhc->nhc_pcpu_rth_output); nhc->nhc_pcpu_rth_output = NULL; return err; } EXPORT_SYMBOL_GPL(fib_nh_common_init); int fib_nh_init(struct net *net, struct fib_nh *nh, struct fib_config *cfg, int nh_weight, struct netlink_ext_ack *extack) { int err; nh->fib_nh_family = AF_INET; err = fib_nh_common_init(net, &nh->nh_common, cfg->fc_encap, cfg->fc_encap_type, cfg, GFP_KERNEL, extack); if (err) return err; nh->fib_nh_oif = cfg->fc_oif; nh->fib_nh_gw_family = cfg->fc_gw_family; if (cfg->fc_gw_family == AF_INET) nh->fib_nh_gw4 = cfg->fc_gw4; else if (cfg->fc_gw_family == AF_INET6) nh->fib_nh_gw6 = cfg->fc_gw6; nh->fib_nh_flags = cfg->fc_flags; #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) atomic_inc(&net->ipv4.fib_num_tclassid_users); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->fib_nh_weight = nh_weight; #endif return 0; } #ifdef CONFIG_IP_ROUTE_MULTIPATH static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, struct netlink_ext_ack *extack) { int nhs = 0; while (rtnh_ok(rtnh, remaining)) { nhs++; rtnh = rtnh_next(rtnh, &remaining); } /* leftover implies invalid nexthop configuration, discard it */ if (remaining > 0) { NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthops"); nhs = 0; } return nhs; } static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, struct netlink_ext_ack *extack) { if (nla_len(nla) < sizeof(*gw)) { NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); return -EINVAL; } *gw = nla_get_in_addr(nla); return 0; } /* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack) { struct net *net = fi->fib_net; struct fib_config fib_cfg; struct fib_nh *nh; int ret; change_nexthops(fi) { int attrlen; memset(&fib_cfg, 0, sizeof(fib_cfg)); if (!rtnh_ok(rtnh, remaining)) { NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthop"); return -EINVAL; } if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { NL_SET_ERR_MSG(extack, "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); return -EINVAL; } fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; fib_cfg.fc_oif = rtnh->rtnh_ifindex; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); if (nla && nlav) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); return -EINVAL; } if (nla) { ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, extack); if (ret) goto errout; if (fib_cfg.fc_gw4) fib_cfg.fc_gw_family = AF_INET; } else if (nlav) { ret = fib_gw_from_via(&fib_cfg, nlav, extack); if (ret) goto errout; } nla = nla_find(attrs, attrlen, RTA_FLOW); if (nla) { if (nla_len(nla) < sizeof(u32)) { NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); return -EINVAL; } fib_cfg.fc_flow = nla_get_u32(nla); } fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); /* RTA_ENCAP_TYPE length checked in * lwtunnel_valid_encap_type_attr */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) fib_cfg.fc_encap_type = nla_get_u16(nla); } ret = fib_nh_init(net, nexthop_nh, &fib_cfg, rtnh->rtnh_hops + 1, extack); if (ret) goto errout; rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); ret = -EINVAL; nh = fib_info_nh(fi, 0); if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) { NL_SET_ERR_MSG(extack, "Nexthop device index does not match RTA_OIF"); goto errout; } if (cfg->fc_gw_family) { if (cfg->fc_gw_family != nh->fib_nh_gw_family || (cfg->fc_gw_family == AF_INET && nh->fib_nh_gw4 != cfg->fc_gw4) || (cfg->fc_gw_family == AF_INET6 && ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) { NL_SET_ERR_MSG(extack, "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); goto errout; } } #ifdef CONFIG_IP_ROUTE_CLASSID if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) { NL_SET_ERR_MSG(extack, "Nexthop class id does not match RTA_FLOW"); goto errout; } #endif ret = 0; errout: return ret; } /* only called when fib_nh is integrated into fib_info */ static void fib_rebalance(struct fib_info *fi) { int total; int w; if (fib_info_num_path(fi) < 2) return; total = 0; for_nexthops(fi) { if (nh->fib_nh_flags & RTNH_F_DEAD) continue; if (ip_ignore_linkdown(nh->fib_nh_dev) && nh->fib_nh_flags & RTNH_F_LINKDOWN) continue; total += nh->fib_nh_weight; } endfor_nexthops(fi); w = 0; change_nexthops(fi) { int upper_bound; if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { upper_bound = -1; } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { upper_bound = -1; } else { w += nexthop_nh->fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; } atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); } endfor_nexthops(fi); } #else /* CONFIG_IP_ROUTE_MULTIPATH */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack) { NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); return -EINVAL; } #define fib_rebalance(fi) do { } while (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static int fib_encap_match(struct net *net, u16 encap_type, struct nlattr *encap, const struct fib_nh *nh, const struct fib_config *cfg, struct netlink_ext_ack *extack) { struct lwtunnel_state *lwtstate; int ret, result = 0; if (encap_type == LWTUNNEL_ENCAP_NONE) return 0; ret = lwtunnel_build_state(net, encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); lwtstate_free(lwtstate); } return result; } int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack) { #ifdef CONFIG_IP_ROUTE_MULTIPATH struct rtnexthop *rtnh; int remaining; #endif if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; if (cfg->fc_nh_id) { if (fi->nh && cfg->fc_nh_id == fi->nh->id) return 0; return 1; } if (fi->nh) { if (cfg->fc_oif || cfg->fc_gw_family || cfg->fc_mp) return 1; return 0; } if (cfg->fc_oif || cfg->fc_gw_family) { struct fib_nh *nh; nh = fib_info_nh(fi, 0); if (cfg->fc_encap) { if (fib_encap_match(net, cfg->fc_encap_type, cfg->fc_encap, nh, cfg, extack)) return 1; } #ifdef CONFIG_IP_ROUTE_CLASSID if (cfg->fc_flow && cfg->fc_flow != nh->nh_tclassid) return 1; #endif if ((cfg->fc_oif && cfg->fc_oif != nh->fib_nh_oif) || (cfg->fc_gw_family && cfg->fc_gw_family != nh->fib_nh_gw_family)) return 1; if (cfg->fc_gw_family == AF_INET && cfg->fc_gw4 != nh->fib_nh_gw4) return 1; if (cfg->fc_gw_family == AF_INET6 && ipv6_addr_cmp(&cfg->fc_gw6, &nh->fib_nh_gw6)) return 1; return 0; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (!cfg->fc_mp) return 0; rtnh = cfg->fc_mp; remaining = cfg->fc_mp_len; for_nexthops(fi) { int attrlen; if (!rtnh_ok(rtnh, remaining)) return -EINVAL; if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) return 1; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); int err; nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); if (nla && nlav) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); return -EINVAL; } if (nla) { __be32 gw; err = fib_gw_from_attr(&gw, nla, extack); if (err) return err; if (nh->fib_nh_gw_family != AF_INET || gw != nh->fib_nh_gw4) return 1; } else if (nlav) { struct fib_config cfg2; err = fib_gw_from_via(&cfg2, nlav, extack); if (err) return err; switch (nh->fib_nh_gw_family) { case AF_INET: if (cfg2.fc_gw_family != AF_INET || cfg2.fc_gw4 != nh->fib_nh_gw4) return 1; break; case AF_INET6: if (cfg2.fc_gw_family != AF_INET6 || ipv6_addr_cmp(&cfg2.fc_gw6, &nh->fib_nh_gw6)) return 1; break; } } #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); if (nla) { if (nla_len(nla) < sizeof(u32)) { NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); return -EINVAL; } if (nla_get_u32(nla) != nh->nh_tclassid) return 1; } #endif } rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); #endif return 0; } bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) { struct nlattr *nla; int remaining; if (!cfg->fc_mx) return true; nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { int type = nla_type(nla); u32 fi_val, val; if (!type) continue; if (type > RTAX_MAX) return false; type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; bool ecn_ca = false; nla_strscpy(tmp, nla, sizeof(tmp)); val = tcp_ca_get_key_by_name(tmp, &ecn_ca); } else { if (nla_len(nla) != sizeof(u32)) return false; val = nla_get_u32(nla); } fi_val = fi->fib_metrics->metrics[type - 1]; if (type == RTAX_FEATURES) fi_val &= ~DST_FEATURE_ECN_CA; if (fi_val != val) return false; } return true; } static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, u32 table, struct netlink_ext_ack *extack) { struct fib6_config cfg = { .fc_table = table, .fc_flags = nh->fib_nh_flags | RTF_GATEWAY, .fc_ifindex = nh->fib_nh_oif, .fc_gateway = nh->fib_nh_gw6, }; struct fib6_nh fib6_nh = {}; int err; err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL); nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; ipv6_stub->fib6_nh_release(&fib6_nh); } return err; } /* * Picture * ------- * * Semantics of nexthop is very messy by historical reasons. * We have to take into account, that: * a) gateway can be actually local interface address, * so that gatewayed route is direct. * b) gateway must be on-link address, possibly * described not by an ifaddr, but also by a direct route. * c) If both gateway and interface are specified, they should not * contradict. * d) If we use tunnel routes, gateway could be not on-link. * * Attempt to reconcile all of these (alas, self-contradictory) conditions * results in pretty ugly and hairy code with obscure logic. * * I chose to generalized it instead, so that the size * of code does not increase practically, but it becomes * much more general. * Every prefix is assigned a "scope" value: "host" is local address, * "link" is direct route, * [ ... "site" ... "interior" ... ] * and "universe" is true gateway route with global meaning. * * Every prefix refers to a set of "nexthop"s (gw, oif), * where gw must have narrower scope. This recursion stops * when gw has LOCAL scope or if "nexthop" is declared ONLINK, * which means that gw is forced to be on link. * * Code is still hairy, but now it is apparently logically * consistent and very flexible. F.e. as by-product it allows * to co-exists in peace independent exterior and interior * routing processes. * * Normally it looks as following. * * {universe prefix} -> (gw, oif) [scope link] * | * |-> {link prefix} -> (gw, oif) [scope local] * | * |-> {local prefix} (terminal node) */ static int fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack) { struct net_device *dev; struct fib_result res; int err = 0; if (nh->fib_nh_flags & RTNH_F_ONLINK) { unsigned int addr_type; if (scope >= RT_SCOPE_LINK) { NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); return -EINVAL; } dev = __dev_get_by_index(net, nh->fib_nh_oif); if (!dev) { NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); return -ENODEV; } if (!(dev->flags & IFF_UP)) { NL_SET_ERR_MSG(extack, "Nexthop device is not up"); return -ENETDOWN; } addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); if (addr_type != RTN_UNICAST) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); return -EINVAL; } if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; nh->fib_nh_dev = dev; netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_LINK; return 0; } rcu_read_lock(); { struct fib_table *tbl = NULL; struct flowi4 fl4 = { .daddr = nh->fib_nh_gw4, .flowi4_scope = scope + 1, .flowi4_oif = nh->fib_nh_oif, .flowi4_iif = LOOPBACK_IFINDEX, }; /* It is not necessary, but requires a bit of thinking */ if (fl4.flowi4_scope < RT_SCOPE_LINK) fl4.flowi4_scope = RT_SCOPE_LINK; if (table && table != RT_TABLE_MAIN) tbl = fib_get_table(net, table); if (tbl) err = fib_table_lookup(tbl, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE | FIB_LOOKUP_NOREF); /* on error or if no table given do full lookup. This * is needed for example when nexthops are in the local * table rather than the given table */ if (!tbl || err) { err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE); } if (err) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out; } } err = -EINVAL; if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out; } nh->fib_nh_scope = res.scope; nh->fib_nh_oif = FIB_RES_OIF(res); nh->fib_nh_dev = dev = FIB_RES_DEV(res); if (!dev) { NL_SET_ERR_MSG(extack, "No egress device for nexthop gateway"); goto out; } netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); if (!netif_carrier_ok(dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; out: rcu_read_unlock(); return err; } static int fib_check_nh_nongw(struct net *net, struct fib_nh *nh, struct netlink_ext_ack *extack) { struct in_device *in_dev; int err; if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { NL_SET_ERR_MSG(extack, "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); return -EINVAL; } rcu_read_lock(); err = -ENODEV; in_dev = inetdev_by_index(net, nh->fib_nh_oif); if (!in_dev) goto out; err = -ENETDOWN; if (!(in_dev->dev->flags & IFF_UP)) { NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); goto out; } nh->fib_nh_dev = in_dev->dev; netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); nh->fib_nh_scope = RT_SCOPE_HOST; if (!netif_carrier_ok(nh->fib_nh_dev)) nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; out: rcu_read_unlock(); return err; } int fib_check_nh(struct net *net, struct fib_nh *nh, u32 table, u8 scope, struct netlink_ext_ack *extack) { int err; if (nh->fib_nh_gw_family == AF_INET) err = fib_check_nh_v4_gw(net, nh, table, scope, extack); else if (nh->fib_nh_gw_family == AF_INET6) err = fib_check_nh_v6_gw(net, nh, table, extack); else err = fib_check_nh_nongw(net, nh, extack); return err; } __be32 fib_info_update_nhc_saddr(struct net *net, struct fib_nh_common *nhc, unsigned char scope) { struct fib_nh *nh; __be32 saddr; if (nhc->nhc_family != AF_INET) return inet_select_addr(nhc->nhc_dev, 0, scope); nh = container_of(nhc, struct fib_nh, nh_common); saddr = inet_select_addr(nh->fib_nh_dev, nh->fib_nh_gw4, scope); WRITE_ONCE(nh->nh_saddr, saddr); WRITE_ONCE(nh->nh_saddr_genid, atomic_read(&net->ipv4.dev_addr_genid)); return saddr; } __be32 fib_result_prefsrc(struct net *net, struct fib_result *res) { struct fib_nh_common *nhc = res->nhc; if (res->fi->fib_prefsrc) return res->fi->fib_prefsrc; if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); if (READ_ONCE(nh->nh_saddr_genid) == atomic_read(&net->ipv4.dev_addr_genid)) return READ_ONCE(nh->nh_saddr); } return fib_info_update_nhc_saddr(net, nhc, res->fi->fib_scope); } static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fib_prefsrc != cfg->fc_dst) { u32 tb_id = cfg->fc_table; int rc; if (tb_id == RT_TABLE_MAIN) tb_id = RT_TABLE_LOCAL; rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, fib_prefsrc, tb_id); if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, fib_prefsrc, RT_TABLE_LOCAL); } if (rc != RTN_LOCAL) return false; } return true; } struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack) { int err; struct fib_info *fi = NULL; struct nexthop *nh = NULL; struct fib_info *ofi; int nhs = 1; struct net *net = cfg->fc_nlinfo.nl_net; ASSERT_RTNL(); if (cfg->fc_type > RTN_MAX) goto err_inval; /* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) { NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval; } if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) { NL_SET_ERR_MSG(extack, "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); goto err_inval; } if (cfg->fc_nh_id) { if (!cfg->fc_mx) { fi = fib_find_info_nh(net, cfg); if (fi) { refcount_inc(&fi->fib_treeref); return fi; } } nh = nexthop_find_by_id(net, cfg->fc_nh_id); if (!nh) { NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); goto err_inval; } nhs = 0; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) { nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); if (nhs == 0) goto err_inval; } #endif fib_info_hash_grow(net); fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); if (!fi) { err = -ENOBUFS; goto failure; } fi->fib_metrics = ip_fib_metrics_init(cfg->fc_mx, cfg->fc_mx_len, extack); if (IS_ERR(fi->fib_metrics)) { err = PTR_ERR(fi->fib_metrics); kfree(fi); return ERR_PTR(err); } fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_type = cfg->fc_type; fi->fib_tb_id = cfg->fc_table; fi->fib_nhs = nhs; if (nh) { if (!nexthop_get(nh)) { NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); err = -EINVAL; } else { err = 0; fi->nh = nh; } } else { change_nexthops(fi) { nexthop_nh->nh_parent = fi; } endfor_nexthops(fi) if (cfg->fc_mp) err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); else err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); } if (err != 0) goto failure; if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) { NL_SET_ERR_MSG(extack, "Gateway, device and multipath can not be specified for this route type"); goto err_inval; } goto link_it; } else { switch (cfg->fc_type) { case RTN_UNICAST: case RTN_LOCAL: case RTN_BROADCAST: case RTN_ANYCAST: case RTN_MULTICAST: break; default: NL_SET_ERR_MSG(extack, "Invalid route type"); goto err_inval; } } if (cfg->fc_scope > RT_SCOPE_HOST) { NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval; } if (fi->nh) { err = fib_check_nexthop(fi->nh, cfg->fc_scope, extack); if (err) goto failure; } else if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ if (nhs != 1) { NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); goto err_inval; } if (nh->fib_nh_gw_family) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); goto err_inval; } nh->fib_nh_scope = RT_SCOPE_NOWHERE; nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif); err = -ENODEV; if (!nh->fib_nh_dev) goto failure; netdev_tracker_alloc(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_KERNEL); } else { int linkdown = 0; change_nexthops(fi) { err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh, cfg->fc_table, cfg->fc_scope, extack); if (err != 0) goto failure; if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) linkdown++; } endfor_nexthops(fi) if (linkdown == fi->fib_nhs) fi->fib_flags |= RTNH_F_LINKDOWN; } if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc)) { NL_SET_ERR_MSG(extack, "Invalid prefsrc address"); goto err_inval; } if (!fi->nh) { change_nexthops(fi) { fib_info_update_nhc_saddr(net, &nexthop_nh->nh_common, fi->fib_scope); if (nexthop_nh->fib_nh_gw_family == AF_INET6) fi->fib_nh_is_v6 = true; } endfor_nexthops(fi) fib_rebalance(fi); } link_it: ofi = fib_find_info(fi); if (ofi) { /* fib_table_lookup() should not see @fi yet. */ fi->fib_dead = 1; free_fib_info(fi); refcount_inc(&ofi->fib_treeref); return ofi; } refcount_set(&fi->fib_treeref, 1); refcount_set(&fi->fib_clntref, 1); net->ipv4.fib_info_cnt++; hlist_add_head(&fi->fib_hash, fib_info_hash_bucket(fi)); if (fi->fib_prefsrc) { struct hlist_head *head; head = fib_info_laddrhash_bucket(net, fi->fib_prefsrc); hlist_add_head(&fi->fib_lhash, head); } if (fi->nh) { list_add(&fi->nh_list, &nh->fi_list); } else { change_nexthops(fi) { struct hlist_head *head; if (!nexthop_nh->fib_nh_dev) continue; head = fib_nh_head(nexthop_nh->fib_nh_dev); hlist_add_head_rcu(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } return fi; err_inval: err = -EINVAL; failure: if (fi) { /* fib_table_lookup() should not see @fi yet. */ fi->fib_dead = 1; free_fib_info(fi); } return ERR_PTR(err); } int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, u8 rt_family, unsigned char *flags, bool skip_oif) { if (nhc->nhc_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; if (nhc->nhc_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; rcu_read_lock(); switch (nhc->nhc_family) { case AF_INET: if (ip_ignore_linkdown(nhc->nhc_dev)) *flags |= RTNH_F_DEAD; break; case AF_INET6: if (ip6_ignore_linkdown(nhc->nhc_dev)) *flags |= RTNH_F_DEAD; break; } rcu_read_unlock(); } switch (nhc->nhc_gw_family) { case AF_INET: if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) goto nla_put_failure; break; case AF_INET6: /* if gateway family does not match nexthop family * gateway is encoded as RTA_VIA */ if (rt_family != nhc->nhc_gw_family) { int alen = sizeof(struct in6_addr); struct nlattr *nla; struct rtvia *via; nla = nla_reserve(skb, RTA_VIA, alen + 2); if (!nla) goto nla_put_failure; via = nla_data(nla); via->rtvia_family = AF_INET6; memcpy(via->rtvia_addr, &nhc->nhc_gw.ipv6, alen); } else if (nla_put_in6_addr(skb, RTA_GATEWAY, &nhc->nhc_gw.ipv6) < 0) { goto nla_put_failure; } break; } *flags |= (nhc->nhc_flags & (RTNH_F_ONLINK | RTNH_F_OFFLOAD | RTNH_F_TRAP)); if (!skip_oif && nhc->nhc_dev && nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) goto nla_put_failure; if (lwtunnel_fill_encap(skb, nhc->nhc_lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } EXPORT_SYMBOL_GPL(fib_nexthop_info); #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, int nh_weight, u8 rt_family, u32 nh_tclassid) { const struct net_device *dev = nhc->nhc_dev; struct rtnexthop *rtnh; unsigned char flags = 0; rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; rtnh->rtnh_hops = nh_weight - 1; rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; if (fib_nexthop_info(skb, nhc, rt_family, &flags, true) < 0) goto nla_put_failure; rtnh->rtnh_flags = flags; if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) goto nla_put_failure; /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; return 0; nla_put_failure: return -EMSGSIZE; } EXPORT_SYMBOL_GPL(fib_add_nexthop); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) { struct nlattr *mp; mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure; if (unlikely(fi->nh)) { if (nexthop_mpath_fill_node(skb, fi->nh, AF_INET) < 0) goto nla_put_failure; goto mp_end; } for_nexthops(fi) { u32 nh_tclassid = 0; #ifdef CONFIG_IP_ROUTE_CLASSID nh_tclassid = nh->nh_tclassid; #endif if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, AF_INET, nh_tclassid) < 0) goto nla_put_failure; } endfor_nexthops(fi); mp_end: nla_nest_end(skb, mp); return 0; nla_put_failure: return -EMSGSIZE; } #else static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) { return 0; } #endif int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct fib_rt_info *fri, unsigned int flags) { unsigned int nhs = fib_info_num_path(fri->fi); struct fib_info *fi = fri->fi; u32 tb_id = fri->tb_id; struct nlmsghdr *nlh; struct rtmsg *rtm; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); rtm->rtm_family = AF_INET; rtm->rtm_dst_len = fri->dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = inet_dscp_to_dsfield(fri->dscp); if (tb_id < 256) rtm->rtm_table = tb_id; else rtm->rtm_table = RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, tb_id)) goto nla_put_failure; rtm->rtm_type = fri->type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = fi->fib_scope; rtm->rtm_protocol = fi->fib_protocol; if (rtm->rtm_dst_len && nla_put_in_addr(skb, RTA_DST, fri->dst)) goto nla_put_failure; if (fi->fib_priority && nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure; if (fi->fib_prefsrc && nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->nh) { if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id)) goto nla_put_failure; if (nexthop_is_blackhole(fi->nh)) rtm->rtm_type = RTN_BLACKHOLE; if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload; } if (nhs == 1) { const struct fib_nh_common *nhc = fib_info_nhc(fi, 0); unsigned char flags = 0; if (fib_nexthop_info(skb, nhc, AF_INET, &flags, false) < 0) goto nla_put_failure; rtm->rtm_flags = flags; #ifdef CONFIG_IP_ROUTE_CLASSID if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); if (nh->nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; } #endif } else { if (fib_add_multipath(skb, fi) < 0) goto nla_put_failure; } offload: if (fri->offload) rtm->rtm_flags |= RTM_F_OFFLOAD; if (fri->trap) rtm->rtm_flags |= RTM_F_TRAP; if (fri->offload_failed) rtm->rtm_flags |= RTM_F_OFFLOAD_FAILED; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } /* * Update FIB if: * - local address disappeared -> we must delete all the entries * referring to it. * - device went down -> we must shutdown all nexthops going via it. */ int fib_sync_down_addr(struct net_device *dev, __be32 local) { int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); struct hlist_head *head; struct fib_info *fi; int ret = 0; if (!local) return 0; head = fib_info_laddrhash_bucket(net, local); hlist_for_each_entry(fi, head, fib_lhash) { if (!net_eq(fi->fib_net, net) || fi->fib_tb_id != tb_id) continue; if (fi->fib_prefsrc == local) { fi->fib_flags |= RTNH_F_DEAD; fi->pfsrc_removed = true; ret++; } } return ret; } static int call_fib_nh_notifiers(struct fib_nh *nh, enum fib_event_type event_type) { bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); struct fib_nh_notifier_info info = { .fib_nh = nh, }; switch (event_type) { case FIB_EVENT_NH_ADD: if (nh->fib_nh_flags & RTNH_F_DEAD) break; if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) break; return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info); case FIB_EVENT_NH_DEL: if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || (nh->fib_nh_flags & RTNH_F_DEAD)) return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info); break; default: break; } return NOTIFY_DONE; } /* Update the PMTU of exceptions when: * - the new MTU of the first hop becomes smaller than the PMTU * - the old MTU was the same as the PMTU, and it limited discovery of * larger MTUs on the path. With that limit raised, we can now * discover larger MTUs * A special case is locked exceptions, for which the PMTU is smaller * than the minimal accepted PMTU: * - if the new MTU is greater than the PMTU, don't make any change * - otherwise, unlock and set PMTU */ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) { struct fnhe_hash_bucket *bucket; int i; bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!bucket) return; for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe; for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); fnhe; fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { if (fnhe->fnhe_mtu_locked) { if (new <= fnhe->fnhe_pmtu) { fnhe->fnhe_pmtu = new; fnhe->fnhe_mtu_locked = false; } } else if (new < fnhe->fnhe_pmtu || orig == fnhe->fnhe_pmtu) { fnhe->fnhe_pmtu = new; } } } } void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { struct hlist_head *head = fib_nh_head(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); fib_nhc_update_mtu(&nh->nh_common, dev->mtu, orig_mtu); } } /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed * * only used when fib_nh is built into fib_info */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { struct hlist_head *head = fib_nh_head(dev); struct fib_info *prev_fi = NULL; int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; int ret = 0; if (force) scope = -1; hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int dead; BUG_ON(!fi->fib_nhs); DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); if (fi == prev_fi) continue; prev_fi = fi; dead = 0; change_nexthops(fi) { if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) dead++; else if (nexthop_nh->fib_nh_dev == dev && nexthop_nh->fib_nh_scope != scope) { switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; fallthrough; case NETDEV_CHANGE: nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; } call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_DEL); dead++; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (event == NETDEV_UNREGISTER && nexthop_nh->fib_nh_dev == dev) { dead = fi->fib_nhs; break; } #endif } endfor_nexthops(fi) if (dead == fi->fib_nhs) { switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: fi->fib_flags |= RTNH_F_DEAD; fallthrough; case NETDEV_CHANGE: fi->fib_flags |= RTNH_F_LINKDOWN; break; } ret++; } fib_rebalance(fi); } return ret; } /* Must be invoked inside of an RCU protected region. */ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) { struct fib_info *fi = NULL, *last_resort = NULL; struct hlist_head *fa_head = res->fa_head; struct fib_table *tb = res->table; u8 slen = 32 - res->prefixlen; int order = -1, last_idx = -1; struct fib_alias *fa, *fa1 = NULL; u32 last_prio = res->fi->fib_priority; dscp_t last_dscp = 0; hlist_for_each_entry_rcu(fa, fa_head, fa_list) { struct fib_info *next_fi = fa->fa_info; struct fib_nh_common *nhc; if (fa->fa_slen != slen) continue; if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp)) continue; if (fa->tb_id != tb->tb_id) continue; if (next_fi->fib_priority > last_prio && fa->fa_dscp == last_dscp) { if (last_dscp) continue; break; } if (next_fi->fib_flags & RTNH_F_DEAD) continue; last_dscp = fa->fa_dscp; last_prio = next_fi->fib_priority; if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; nhc = fib_info_nhc(next_fi, 0); if (!nhc->nhc_gw_family || nhc->nhc_scope != RT_SCOPE_LINK) continue; fib_alias_accessed(fa); if (!fi) { if (next_fi != res->fi) break; fa1 = fa; } else if (!fib_detect_death(fi, order, &last_resort, &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); fa1->fa_default = order; goto out; } fi = next_fi; order++; } if (order <= 0 || !fi) { if (fa1) fa1->fa_default = -1; goto out; } if (!fib_detect_death(fi, order, &last_resort, &last_idx, fa1->fa_default)) { fib_result_assign(res, fi); fa1->fa_default = order; goto out; } if (last_idx >= 0) fib_result_assign(res, last_resort); fa1->fa_default = last_idx; out: return; } /* * Dead device goes up. We wake up dead nexthops. * It takes sense only on multipath routes. * * only used when fib_nh is built into fib_info */ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; struct hlist_head *head; struct fib_nh *nh; int ret; if (!(dev->flags & IFF_UP)) return 0; if (nh_flags & RTNH_F_DEAD) { unsigned int flags = netif_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) nh_flags |= RTNH_F_LINKDOWN; } prev_fi = NULL; head = fib_nh_head(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { struct fib_info *fi = nh->nh_parent; int alive; BUG_ON(!fi->fib_nhs); DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); if (fi == prev_fi) continue; prev_fi = fi; alive = 0; change_nexthops(fi) { if (!(nexthop_nh->fib_nh_flags & nh_flags)) { alive++; continue; } if (!nexthop_nh->fib_nh_dev || !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) continue; if (nexthop_nh->fib_nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; nexthop_nh->fib_nh_flags &= ~nh_flags; call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); } endfor_nexthops(fi) if (alive > 0) { fi->fib_flags &= ~nh_flags; ret++; } fib_rebalance(fi); } return ret; } #ifdef CONFIG_IP_ROUTE_MULTIPATH static bool fib_good_nh(const struct fib_nh *nh) { int state = NUD_REACHABLE; if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n; rcu_read_lock(); if (likely(nh->fib_nh_gw_family == AF_INET)) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); else if (nh->fib_nh_gw_family == AF_INET6) n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); else n = NULL; if (n) state = READ_ONCE(n->nud_state); rcu_read_unlock(); } return !!(state & NUD_VALID); } void fib_select_multipath(struct fib_result *res, int hash, const struct flowi4 *fl4) { struct fib_info *fi = res->fi; struct net *net = fi->fib_net; bool found = false; bool use_neigh; __be32 saddr; if (unlikely(res->fi->nh)) { nexthop_path_fib_result(res, hash); return; } use_neigh = READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh); saddr = fl4 ? fl4->saddr : 0; change_nexthops(fi) { int nh_upper_bound; /* Nexthops without a carrier are assigned an upper bound of * minus one when "ignore_routes_with_linkdown" is set. */ nh_upper_bound = atomic_read(&nexthop_nh->fib_nh_upper_bound); if (nh_upper_bound == -1 || (use_neigh && !fib_good_nh(nexthop_nh))) continue; if (!found) { res->nh_sel = nhsel; res->nhc = &nexthop_nh->nh_common; found = !saddr || nexthop_nh->nh_saddr == saddr; } if (hash > nh_upper_bound) continue; if (!saddr || nexthop_nh->nh_saddr == saddr) { res->nh_sel = nhsel; res->nhc = &nexthop_nh->nh_common; return; } if (found) return; } endfor_nexthops(fi); } #endif void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb) { if (fl4->flowi4_oif) goto check_saddr; #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(net, fl4, skb, NULL); fib_select_multipath(res, h, fl4); } else #endif if (!res->prefixlen && res->table->tb_num_default > 1 && res->type == RTN_UNICAST) fib_select_default(fl4, res); check_saddr: if (!fl4->saddr) { struct net_device *l3mdev; l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); if (!l3mdev || l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) fl4->saddr = fib_result_prefsrc(net, res); else fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); } } int __net_init fib4_semantics_init(struct net *net) { unsigned int hash_bits = 4; net->ipv4.fib_info_hash = fib_info_hash_alloc(hash_bits); if (!net->ipv4.fib_info_hash) return -ENOMEM; net->ipv4.fib_info_hash_bits = hash_bits; net->ipv4.fib_info_cnt = 0; return 0; } void __net_exit fib4_semantics_exit(struct net *net) { fib_info_hash_free(net->ipv4.fib_info_hash); }
2 1 158 122 58 135 171 161 103 48 108 88 160 196 12 12 12 11 176 1 165 12 177 12 11 175 1 11 5 5 13 1 9 8 1 2 5 113 95 52 12 118 118 129 195 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * (C) Copyright Red Hat Inc. 2017 * * This file is part of the SCTP kernel implementation * * These functions manipulate sctp stream queue/scheduling. * * Please send any bug reports or fixes you make to the * email addresched(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> */ #include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> /* First Come First Serve (a.k.a. FIFO) * RFC DRAFT ndata Section 3.1 */ static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid, __u16 value, gfp_t gfp) { return 0; } static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid, __u16 *value) { *value = 0; return 0; } static int sctp_sched_fcfs_init(struct sctp_stream *stream) { return 0; } static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { return 0; } static void sctp_sched_fcfs_free_sid(struct sctp_stream *stream, __u16 sid) { } static void sctp_sched_fcfs_enqueue(struct sctp_outq *q, struct sctp_datamsg *msg) { } static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q) { struct sctp_stream *stream = &q->asoc->stream; struct sctp_chunk *ch = NULL; struct list_head *entry; if (list_empty(&q->out_chunk_list)) goto out; if (stream->out_curr) { ch = list_entry(stream->out_curr->ext->outq.next, struct sctp_chunk, stream_list); } else { entry = q->out_chunk_list.next; ch = list_entry(entry, struct sctp_chunk, list); } sctp_sched_dequeue_common(q, ch); out: return ch; } static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q, struct sctp_chunk *chunk) { } static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream) { } static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) { } static struct sctp_sched_ops sctp_sched_fcfs = { .set = sctp_sched_fcfs_set, .get = sctp_sched_fcfs_get, .init = sctp_sched_fcfs_init, .init_sid = sctp_sched_fcfs_init_sid, .free_sid = sctp_sched_fcfs_free_sid, .enqueue = sctp_sched_fcfs_enqueue, .dequeue = sctp_sched_fcfs_dequeue, .dequeue_done = sctp_sched_fcfs_dequeue_done, .sched_all = sctp_sched_fcfs_sched_all, .unsched_all = sctp_sched_fcfs_unsched_all, }; static void sctp_sched_ops_fcfs_init(void) { sctp_sched_ops_register(SCTP_SS_FCFS, &sctp_sched_fcfs); } /* API to other parts of the stack */ static struct sctp_sched_ops *sctp_sched_ops[SCTP_SS_MAX + 1]; void sctp_sched_ops_register(enum sctp_sched_type sched, struct sctp_sched_ops *sched_ops) { sctp_sched_ops[sched] = sched_ops; } void sctp_sched_ops_init(void) { sctp_sched_ops_fcfs_init(); sctp_sched_ops_prio_init(); sctp_sched_ops_rr_init(); sctp_sched_ops_fc_init(); sctp_sched_ops_wfq_init(); } static void sctp_sched_free_sched(struct sctp_stream *stream) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *soute; int i; sched->unsched_all(stream); for (i = 0; i < stream->outcnt; i++) { soute = SCTP_SO(stream, i)->ext; if (!soute) continue; sched->free_sid(stream, i); /* Give the next scheduler a clean slate. */ memset_after(soute, 0, outq); } } int sctp_sched_set_sched(struct sctp_association *asoc, enum sctp_sched_type sched) { struct sctp_sched_ops *old = asoc->outqueue.sched; struct sctp_datamsg *msg = NULL; struct sctp_sched_ops *n; struct sctp_chunk *ch; int i, ret = 0; if (sched > SCTP_SS_MAX) return -EINVAL; n = sctp_sched_ops[sched]; if (old == n) return ret; if (old) sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = n; n->init(&asoc->stream); for (i = 0; i < asoc->stream.outcnt; i++) { if (!SCTP_SO(&asoc->stream, i)->ext) continue; ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC); if (ret) goto err; } /* We have to requeue all chunks already queued. */ list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { if (ch->msg == msg) continue; msg = ch->msg; n->enqueue(&asoc->outqueue, msg); } return ret; err: sctp_sched_free_sched(&asoc->stream); asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ return ret; } int sctp_sched_get_sched(struct sctp_association *asoc) { int i; for (i = 0; i <= SCTP_SS_MAX; i++) if (asoc->outqueue.sched == sctp_sched_ops[i]) return i; return 0; } int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, __u16 value, gfp_t gfp) { if (sid >= asoc->stream.outcnt) return -EINVAL; if (!SCTP_SO(&asoc->stream, sid)->ext) { int ret; ret = sctp_stream_init_ext(&asoc->stream, sid); if (ret) return ret; } return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp); } int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, __u16 *value) { if (sid >= asoc->stream.outcnt) return -EINVAL; if (!SCTP_SO(&asoc->stream, sid)->ext) return 0; return asoc->outqueue.sched->get(&asoc->stream, sid, value); } void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) { if (!list_is_last(&ch->frag_list, &ch->msg->chunks) && !q->asoc->peer.intl_capable) { struct sctp_stream_out *sout; __u16 sid; /* datamsg is not finish, so save it as current one, * in case application switch scheduler or a higher * priority stream comes in. */ sid = sctp_chunk_stream_no(ch); sout = SCTP_SO(&q->asoc->stream, sid); q->asoc->stream.out_curr = sout; return; } q->asoc->stream.out_curr = NULL; q->sched->dequeue_done(q, ch); } /* Auxiliary functions for the schedulers */ void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) { list_del_init(&ch->list); list_del_init(&ch->stream_list); q->out_qlen -= ch->skb->len; } int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) { struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); struct sctp_stream_out_ext *ext = SCTP_SO(stream, sid)->ext; INIT_LIST_HEAD(&ext->outq); return sched->init_sid(stream, sid, gfp); } struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) { struct sctp_association *asoc; asoc = container_of(stream, struct sctp_association, stream); return asoc->outqueue.sched; }
10674 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __X86_KERNEL_FPU_INTERNAL_H #define __X86_KERNEL_FPU_INTERNAL_H extern struct fpstate init_fpstate; /* CPU feature check wrappers */ static __always_inline __pure bool use_xsave(void) { return cpu_feature_enabled(X86_FEATURE_XSAVE); } static __always_inline __pure bool use_fxsr(void) { return cpu_feature_enabled(X86_FEATURE_FXSR); } #ifdef CONFIG_X86_DEBUG_FPU # define WARN_ON_FPU(x) WARN_ON_ONCE(x) #else # define WARN_ON_FPU(x) ({ BUILD_BUG_ON_INVALID(x); 0; }) #endif /* Used in init.c */ extern void fpstate_init_user(struct fpstate *fpstate); extern void fpstate_reset(struct fpu *fpu); #endif
3 1 2 2 2 2 2 1 5 1 5 1 4 2 4 1 1 1 1 1 1 1 10 1 9 3 3 3 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 // SPDX-License-Identifier: GPL-2.0-only #include <linux/phy.h> #include <linux/ethtool_netlink.h> #include <net/netdev_lock.h> #include "netlink.h" #include "common.h" /* 802.3 standard allows 100 meters for BaseT cables. However longer * cables might work, depending on the quality of the cables and the * PHY. So allow testing for up to 150 meters. */ #define MAX_CABLE_LENGTH_CM (150 * 100) const struct nla_policy ethnl_cable_test_act_policy[] = { [ETHTOOL_A_CABLE_TEST_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd) { struct sk_buff *skb; int err = -ENOMEM; void *ehdr; skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) goto out; ehdr = ethnl_bcastmsg_put(skb, cmd); if (!ehdr) { err = -EMSGSIZE; goto out; } err = ethnl_fill_reply_header(skb, phydev->attached_dev, ETHTOOL_A_CABLE_TEST_NTF_HEADER); if (err) goto out; err = nla_put_u8(skb, ETHTOOL_A_CABLE_TEST_NTF_STATUS, ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED); if (err) goto out; genlmsg_end(skb, ehdr); return ethnl_multicast(skb, phydev->attached_dev); out: nlmsg_free(skb); phydev_err(phydev, "%s: Error %pe\n", __func__, ERR_PTR(err)); return err; } int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) { struct ethnl_req_info req_info = {}; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; struct net_device *dev; int ret; ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_CABLE_TEST_HEADER], genl_info_net(info), info->extack, true); if (ret < 0) return ret; dev = req_info.dev; rtnl_lock(); netdev_lock_ops(dev); phydev = ethnl_req_get_phydev(&req_info, tb, ETHTOOL_A_CABLE_TEST_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test) { ret = -EOPNOTSUPP; goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out_unlock; ret = ops->start_cable_test(phydev, info->extack); ethnl_ops_complete(dev); if (!ret) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_NTF); out_unlock: netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; } int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd) { int err = -ENOMEM; /* One TDR sample occupies 20 bytes. For a 150 meter cable, * with four pairs, around 12K is needed. */ phydev->skb = genlmsg_new(SZ_16K, GFP_KERNEL); if (!phydev->skb) goto out; phydev->ehdr = ethnl_bcastmsg_put(phydev->skb, cmd); if (!phydev->ehdr) { err = -EMSGSIZE; goto out; } err = ethnl_fill_reply_header(phydev->skb, phydev->attached_dev, ETHTOOL_A_CABLE_TEST_NTF_HEADER); if (err) goto out; err = nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_TEST_NTF_STATUS, ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED); if (err) goto out; phydev->nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TEST_NTF_NEST); if (!phydev->nest) { err = -EMSGSIZE; goto out; } return 0; out: nlmsg_free(phydev->skb); phydev->skb = NULL; return err; } EXPORT_SYMBOL_GPL(ethnl_cable_test_alloc); void ethnl_cable_test_free(struct phy_device *phydev) { nlmsg_free(phydev->skb); phydev->skb = NULL; } EXPORT_SYMBOL_GPL(ethnl_cable_test_free); void ethnl_cable_test_finished(struct phy_device *phydev) { nla_nest_end(phydev->skb, phydev->nest); genlmsg_end(phydev->skb, phydev->ehdr); ethnl_multicast(phydev->skb, phydev->attached_dev); } EXPORT_SYMBOL_GPL(ethnl_cable_test_finished); int ethnl_cable_test_result_with_src(struct phy_device *phydev, u8 pair, u8 result, u32 src) { struct nlattr *nest; int ret = -EMSGSIZE; nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_NEST_RESULT); if (!nest) return -EMSGSIZE; if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_PAIR, pair)) goto err; if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_CODE, result)) goto err; if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) { if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_RESULT_SRC, src)) goto err; } nla_nest_end(phydev->skb, nest); return 0; err: nla_nest_cancel(phydev->skb, nest); return ret; } EXPORT_SYMBOL_GPL(ethnl_cable_test_result_with_src); int ethnl_cable_test_fault_length_with_src(struct phy_device *phydev, u8 pair, u32 cm, u32 src) { struct nlattr *nest; int ret = -EMSGSIZE; nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_NEST_FAULT_LENGTH); if (!nest) return -EMSGSIZE; if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, pair)) goto err; if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_CM, cm)) goto err; if (src != ETHTOOL_A_CABLE_INF_SRC_UNSPEC) { if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_SRC, src)) goto err; } nla_nest_end(phydev->skb, nest); return 0; err: nla_nest_cancel(phydev->skb, nest); return ret; } EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length_with_src); static const struct nla_policy cable_test_tdr_act_cfg_policy[] = { [ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST] = { .type = NLA_U32 }, [ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST] = { .type = NLA_U32 }, [ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP] = { .type = NLA_U32 }, [ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR] = { .type = NLA_U8 }, }; const struct nla_policy ethnl_cable_test_tdr_act_policy[] = { [ETHTOOL_A_CABLE_TEST_TDR_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), [ETHTOOL_A_CABLE_TEST_TDR_CFG] = { .type = NLA_NESTED }, }; /* CABLE_TEST_TDR_ACT */ static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest, struct genl_info *info, struct phy_tdr_config *cfg) { struct nlattr *tb[ARRAY_SIZE(cable_test_tdr_act_cfg_policy)]; int ret; cfg->first = 100; cfg->step = 100; cfg->last = MAX_CABLE_LENGTH_CM; cfg->pair = PHY_PAIR_ALL; if (!nest) return 0; ret = nla_parse_nested(tb, ARRAY_SIZE(cable_test_tdr_act_cfg_policy) - 1, nest, cable_test_tdr_act_cfg_policy, info->extack); if (ret < 0) return ret; if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]) cfg->first = nla_get_u32( tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]); if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]) cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]); if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]) cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]); if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) { cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]); if (cfg->pair > ETHTOOL_A_CABLE_PAIR_D) { NL_SET_ERR_MSG_ATTR( info->extack, tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR], "invalid pair parameter"); return -EINVAL; } } if (cfg->first > MAX_CABLE_LENGTH_CM) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST], "invalid first parameter"); return -EINVAL; } if (cfg->last > MAX_CABLE_LENGTH_CM) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST], "invalid last parameter"); return -EINVAL; } if (cfg->first > cfg->last) { NL_SET_ERR_MSG(info->extack, "invalid first/last parameter"); return -EINVAL; } if (!cfg->step) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP], "invalid step parameter"); return -EINVAL; } if (cfg->step > (cfg->last - cfg->first)) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP], "step parameter too big"); return -EINVAL; } return 0; } int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) { struct ethnl_req_info req_info = {}; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; struct phy_tdr_config cfg; struct net_device *dev; int ret; ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER], genl_info_net(info), info->extack, true); if (ret < 0) return ret; dev = req_info.dev; ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG], info, &cfg); if (ret) goto out_dev_put; rtnl_lock(); netdev_lock_ops(dev); phydev = ethnl_req_get_phydev(&req_info, tb, ETHTOOL_A_CABLE_TEST_TDR_HEADER, info->extack); if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out_unlock; } ops = ethtool_phy_ops; if (!ops || !ops->start_cable_test_tdr) { ret = -EOPNOTSUPP; goto out_unlock; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out_unlock; ret = ops->start_cable_test_tdr(phydev, info->extack, &cfg); ethnl_ops_complete(dev); if (!ret) ethnl_cable_test_started(phydev, ETHTOOL_MSG_CABLE_TEST_TDR_NTF); out_unlock: netdev_unlock_ops(dev); rtnl_unlock(); out_dev_put: ethnl_parse_header_dev_put(&req_info); return ret; } int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV) { struct nlattr *nest; int ret = -EMSGSIZE; nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE); if (!nest) return -EMSGSIZE; if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_PAIR, pair)) goto err; if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_mV, mV)) goto err; nla_nest_end(phydev->skb, nest); return 0; err: nla_nest_cancel(phydev->skb, nest); return ret; } EXPORT_SYMBOL_GPL(ethnl_cable_test_amplitude); int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV) { struct nlattr *nest; int ret = -EMSGSIZE; nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_PULSE); if (!nest) return -EMSGSIZE; if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_PULSE_mV, mV)) goto err; nla_nest_end(phydev->skb, nest); return 0; err: nla_nest_cancel(phydev->skb, nest); return ret; } EXPORT_SYMBOL_GPL(ethnl_cable_test_pulse); int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last, u32 step) { struct nlattr *nest; int ret = -EMSGSIZE; nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_STEP); if (!nest) return -EMSGSIZE; if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE, first)) goto err; if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_LAST_DISTANCE, last)) goto err; if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_STEP_DISTANCE, step)) goto err; nla_nest_end(phydev->skb, nest); return 0; err: nla_nest_cancel(phydev->skb, nest); return ret; } EXPORT_SYMBOL_GPL(ethnl_cable_test_step);
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the RAW-IP module. * * Version: @(#)raw.h 1.0.2 05/07/93 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _RAW_H #define _RAW_H #include <net/inet_sock.h> #include <net/protocol.h> #include <net/netns/hash.h> #include <linux/hash.h> #include <linux/icmp.h> extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); int raw_local_deliver(struct sk_buff *, int); int raw_rcv(struct sock *, struct sk_buff *); #define RAW_HTABLE_LOG 8 #define RAW_HTABLE_SIZE (1U << RAW_HTABLE_LOG) struct raw_hashinfo { spinlock_t lock; struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; }; static inline u32 raw_hashfunc(const struct net *net, u32 proto) { return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG); } static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) { int i; spin_lock_init(&hashinfo->lock); for (i = 0; i < RAW_HTABLE_SIZE; i++) INIT_HLIST_HEAD(&hashinfo->ht[i]); } #ifdef CONFIG_PROC_FS int raw_proc_init(void); void raw_proc_exit(void); struct raw_iter_state { struct seq_net_private p; int bucket; }; static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) { return seq->private; } void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); #endif int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); void raw_init(void); struct raw_sock { /* inet_sock has to be the first member */ struct inet_sock inet; struct icmp_filter filter; u32 ipmr_table; }; #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk) static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); #endif } #endif /* _RAW_H */
16 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 // SPDX-License-Identifier: GPL-2.0-or-later /* Sysfs attributes of bond slaves * * Copyright (c) 2014 Scott Feldman <sfeldma@cumulusnetworks.com> */ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/bonding.h> struct slave_attribute { struct attribute attr; ssize_t (*show)(struct slave *, char *); }; #define SLAVE_ATTR_RO(_name) \ const struct slave_attribute slave_attr_##_name = __ATTR_RO(_name) static ssize_t state_show(struct slave *slave, char *buf) { switch (bond_slave_state(slave)) { case BOND_STATE_ACTIVE: return sysfs_emit(buf, "active\n"); case BOND_STATE_BACKUP: return sysfs_emit(buf, "backup\n"); default: return sysfs_emit(buf, "UNKNOWN\n"); } } static SLAVE_ATTR_RO(state); static ssize_t mii_status_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%s\n", bond_slave_link_status(slave->link)); } static SLAVE_ATTR_RO(mii_status); static ssize_t link_failure_count_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%d\n", slave->link_failure_count); } static SLAVE_ATTR_RO(link_failure_count); static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%*phC\n", slave->dev->addr_len, slave->perm_hwaddr); } static SLAVE_ATTR_RO(perm_hwaddr); static ssize_t queue_id_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%d\n", READ_ONCE(slave->queue_id)); } static SLAVE_ATTR_RO(queue_id); static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) { const struct aggregator *agg; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { agg = SLAVE_AD_INFO(slave)->port.aggregator; if (agg) return sysfs_emit(buf, "%d\n", agg->aggregator_identifier); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_aggregator_id); static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) { const struct port *ad_port; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; if (ad_port->aggregator) return sysfs_emit(buf, "%u\n", ad_port->actor_oper_port_state); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_actor_oper_port_state); static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) { const struct port *ad_port; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; if (ad_port->aggregator) return sysfs_emit(buf, "%u\n", ad_port->partner_oper.port_state); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_partner_oper_port_state); static const struct attribute *slave_attrs[] = { &slave_attr_state.attr, &slave_attr_mii_status.attr, &slave_attr_link_failure_count.attr, &slave_attr_perm_hwaddr.attr, &slave_attr_queue_id.attr, &slave_attr_ad_aggregator_id.attr, &slave_attr_ad_actor_oper_port_state.attr, &slave_attr_ad_partner_oper_port_state.attr, NULL }; #define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr) static ssize_t slave_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct slave_attribute *slave_attr = to_slave_attr(attr); struct slave *slave = to_slave(kobj); return slave_attr->show(slave, buf); } const struct sysfs_ops slave_sysfs_ops = { .show = slave_show, }; int bond_sysfs_slave_add(struct slave *slave) { return sysfs_create_files(&slave->kobj, slave_attrs); } void bond_sysfs_slave_del(struct slave *slave) { sysfs_remove_files(&slave->kobj, slave_attrs); }
299 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 /* SPDX-License-Identifier: GPL-2.0 */ /* * Because linux/module.h has tracepoints in the header, and ftrace.h * used to include this file, define_trace.h includes linux/module.h * But we do not want the module.h to override the TRACE_SYSTEM macro * variable that define_trace.h is processing, so we only set it * when module events are being processed, which would happen when * CREATE_TRACE_POINTS is defined. */ #ifdef CREATE_TRACE_POINTS #undef TRACE_SYSTEM #define TRACE_SYSTEM module #endif #if !defined(_TRACE_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MODULE_H #include <linux/tracepoint.h> #ifdef CONFIG_MODULES struct module; #define show_module_flags(flags) __print_flags(flags, "", \ { (1UL << TAINT_PROPRIETARY_MODULE), "P" }, \ { (1UL << TAINT_OOT_MODULE), "O" }, \ { (1UL << TAINT_FORCED_MODULE), "F" }, \ { (1UL << TAINT_CRAP), "C" }, \ { (1UL << TAINT_UNSIGNED_MODULE), "E" }) TRACE_EVENT(module_load, TP_PROTO(struct module *mod), TP_ARGS(mod), TP_STRUCT__entry( __field( unsigned int, taints ) __string( name, mod->name ) ), TP_fast_assign( __entry->taints = mod->taints; __assign_str(name); ), TP_printk("%s %s", __get_str(name), show_module_flags(__entry->taints)) ); TRACE_EVENT(module_free, TP_PROTO(struct module *mod), TP_ARGS(mod), TP_STRUCT__entry( __string( name, mod->name ) ), TP_fast_assign( __assign_str(name); ), TP_printk("%s", __get_str(name)) ); #ifdef CONFIG_MODULE_UNLOAD /* trace_module_get/put are only used if CONFIG_MODULE_UNLOAD is defined */ DECLARE_EVENT_CLASS(module_refcnt, TP_PROTO(struct module *mod, unsigned long ip), TP_ARGS(mod, ip), TP_STRUCT__entry( __field( unsigned long, ip ) __field( int, refcnt ) __string( name, mod->name ) ), TP_fast_assign( __entry->ip = ip; __entry->refcnt = atomic_read(&mod->refcnt); __assign_str(name); ), TP_printk("%s call_site=%ps refcnt=%d", __get_str(name), (void *)__entry->ip, __entry->refcnt) ); DEFINE_EVENT(module_refcnt, module_get, TP_PROTO(struct module *mod, unsigned long ip), TP_ARGS(mod, ip) ); DEFINE_EVENT(module_refcnt, module_put, TP_PROTO(struct module *mod, unsigned long ip), TP_ARGS(mod, ip) ); #endif /* CONFIG_MODULE_UNLOAD */ TRACE_EVENT(module_request, TP_PROTO(char *name, bool wait, unsigned long ip), TP_ARGS(name, wait, ip), TP_STRUCT__entry( __field( unsigned long, ip ) __field( bool, wait ) __string( name, name ) ), TP_fast_assign( __entry->ip = ip; __entry->wait = wait; __assign_str(name); ), TP_printk("%s wait=%d call_site=%ps", __get_str(name), (int)__entry->wait, (void *)__entry->ip) ); #endif /* CONFIG_MODULES */ #endif /* _TRACE_MODULE_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 // SPDX-License-Identifier: GPL-2.0 /* * * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. * */ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/fs.h> #include <linux/kernel.h> #include "debug.h" #include "ntfs.h" #include "ntfs_fs.h" static const struct INDEX_NAMES { const __le16 *name; u8 name_len; } s_index_names[INDEX_MUTEX_TOTAL] = { { I30_NAME, ARRAY_SIZE(I30_NAME) }, { SII_NAME, ARRAY_SIZE(SII_NAME) }, { SDH_NAME, ARRAY_SIZE(SDH_NAME) }, { SO_NAME, ARRAY_SIZE(SO_NAME) }, { SQ_NAME, ARRAY_SIZE(SQ_NAME) }, { SR_NAME, ARRAY_SIZE(SR_NAME) }, }; /* * cmp_fnames - Compare two names in index. * * if l1 != 0 * Both names are little endian on-disk ATTR_FILE_NAME structs. * else * key1 - cpu_str, key2 - ATTR_FILE_NAME */ static int cmp_fnames(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const struct ATTR_FILE_NAME *f2 = key2; const struct ntfs_sb_info *sbi = data; const struct ATTR_FILE_NAME *f1; u16 fsize2; bool both_case; if (l2 <= offsetof(struct ATTR_FILE_NAME, name)) return -1; fsize2 = fname_full_size(f2); if (l2 < fsize2) return -1; both_case = f2->type != FILE_NAME_DOS && !sbi->options->nocase; if (!l1) { const struct le_str *s2 = (struct le_str *)&f2->name_len; /* * If names are equal (case insensitive) * try to compare it case sensitive. */ return ntfs_cmp_names_cpu(key1, s2, sbi->upcase, both_case); } f1 = key1; return ntfs_cmp_names(f1->name, f1->name_len, f2->name, f2->name_len, sbi->upcase, both_case); } /* * cmp_uint - $SII of $Secure and $Q of Quota */ static int cmp_uint(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const u32 *k1 = key1; const u32 *k2 = key2; if (l2 < sizeof(u32)) return -1; if (*k1 < *k2) return -1; if (*k1 > *k2) return 1; return 0; } /* * cmp_sdh - $SDH of $Secure */ static int cmp_sdh(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const struct SECURITY_KEY *k1 = key1; const struct SECURITY_KEY *k2 = key2; u32 t1, t2; if (l2 < sizeof(struct SECURITY_KEY)) return -1; t1 = le32_to_cpu(k1->hash); t2 = le32_to_cpu(k2->hash); /* First value is a hash value itself. */ if (t1 < t2) return -1; if (t1 > t2) return 1; /* Second value is security Id. */ if (data) { t1 = le32_to_cpu(k1->sec_id); t2 = le32_to_cpu(k2->sec_id); if (t1 < t2) return -1; if (t1 > t2) return 1; } return 0; } /* * cmp_uints - $O of ObjId and "$R" for Reparse. */ static int cmp_uints(const void *key1, size_t l1, const void *key2, size_t l2, const void *data) { const __le32 *k1 = key1; const __le32 *k2 = key2; size_t count; if ((size_t)data == 1) { /* * ni_delete_all -> ntfs_remove_reparse -> * delete all with this reference. * k1, k2 - pointers to REPARSE_KEY */ k1 += 1; // Skip REPARSE_KEY.ReparseTag k2 += 1; // Skip REPARSE_KEY.ReparseTag if (l2 <= sizeof(int)) return -1; l2 -= sizeof(int); if (l1 <= sizeof(int)) return 1; l1 -= sizeof(int); } if (l2 < sizeof(int)) return -1; for (count = min(l1, l2) >> 2; count > 0; --count, ++k1, ++k2) { u32 t1 = le32_to_cpu(*k1); u32 t2 = le32_to_cpu(*k2); if (t1 > t2) return 1; if (t1 < t2) return -1; } if (l1 > l2) return 1; if (l1 < l2) return -1; return 0; } static inline NTFS_CMP_FUNC get_cmp_func(const struct INDEX_ROOT *root) { switch (root->type) { case ATTR_NAME: if (root->rule == NTFS_COLLATION_TYPE_FILENAME) return &cmp_fnames; break; case ATTR_ZERO: switch (root->rule) { case NTFS_COLLATION_TYPE_UINT: return &cmp_uint; case NTFS_COLLATION_TYPE_SECURITY_HASH: return &cmp_sdh; case NTFS_COLLATION_TYPE_UINTS: return &cmp_uints; default: break; } break; default: break; } return NULL; } struct bmp_buf { struct ATTRIB *b; struct mft_inode *mi; struct buffer_head *bh; ulong *buf; size_t bit; u32 nbits; u64 new_valid; }; static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit, struct bmp_buf *bbuf) { struct ATTRIB *b; size_t data_size, valid_size, vbo, off = bit >> 3; struct ntfs_sb_info *sbi = ni->mi.sbi; CLST vcn = off >> sbi->cluster_bits; struct ATTR_LIST_ENTRY *le = NULL; struct buffer_head *bh; struct super_block *sb; u32 blocksize; const struct INDEX_NAMES *in = &s_index_names[indx->type]; bbuf->bh = NULL; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, &vcn, &bbuf->mi); bbuf->b = b; if (!b) return -EINVAL; if (!b->non_res) { data_size = le32_to_cpu(b->res.data_size); if (off >= data_size) return -EINVAL; bbuf->buf = (ulong *)resident_data(b); bbuf->bit = 0; bbuf->nbits = data_size * 8; return 0; } data_size = le64_to_cpu(b->nres.data_size); if (WARN_ON(off >= data_size)) { /* Looks like filesystem error. */ return -EINVAL; } valid_size = le64_to_cpu(b->nres.valid_size); bh = ntfs_bread_run(sbi, &indx->bitmap_run, off); if (!bh) return -EIO; if (IS_ERR(bh)) return PTR_ERR(bh); bbuf->bh = bh; if (buffer_locked(bh)) __wait_on_buffer(bh); lock_buffer(bh); sb = sbi->sb; blocksize = sb->s_blocksize; vbo = off & ~(size_t)sbi->block_mask; bbuf->new_valid = vbo + blocksize; if (bbuf->new_valid <= valid_size) bbuf->new_valid = 0; else if (bbuf->new_valid > data_size) bbuf->new_valid = data_size; if (vbo >= valid_size) { memset(bh->b_data, 0, blocksize); } else if (vbo + blocksize > valid_size) { u32 voff = valid_size & sbi->block_mask; memset(bh->b_data + voff, 0, blocksize - voff); } bbuf->buf = (ulong *)bh->b_data; bbuf->bit = 8 * (off & ~(size_t)sbi->block_mask); bbuf->nbits = 8 * blocksize; return 0; } static void bmp_buf_put(struct bmp_buf *bbuf, bool dirty) { struct buffer_head *bh = bbuf->bh; struct ATTRIB *b = bbuf->b; if (!bh) { if (b && !b->non_res && dirty) bbuf->mi->dirty = true; return; } if (!dirty) goto out; if (bbuf->new_valid) { b->nres.valid_size = cpu_to_le64(bbuf->new_valid); bbuf->mi->dirty = true; } set_buffer_uptodate(bh); mark_buffer_dirty(bh); out: unlock_buffer(bh); put_bh(bh); } /* * indx_mark_used - Mark the bit @bit as used. */ static int indx_mark_used(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit) { int err; struct bmp_buf bbuf; err = bmp_buf_get(indx, ni, bit, &bbuf); if (err) return err; __set_bit_le(bit - bbuf.bit, bbuf.buf); bmp_buf_put(&bbuf, true); return 0; } /* * indx_mark_free - Mark the bit @bit as free. */ static int indx_mark_free(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit) { int err; struct bmp_buf bbuf; err = bmp_buf_get(indx, ni, bit, &bbuf); if (err) return err; __clear_bit_le(bit - bbuf.bit, bbuf.buf); bmp_buf_put(&bbuf, true); return 0; } /* * scan_nres_bitmap * * If ntfs_readdir calls this function (indx_used_bit -> scan_nres_bitmap), * inode is shared locked and no ni_lock. * Use rw_semaphore for read/write access to bitmap_run. */ static int scan_nres_bitmap(struct ntfs_inode *ni, struct ATTRIB *bitmap, struct ntfs_index *indx, size_t from, bool (*fn)(const ulong *buf, u32 bit, u32 bits, size_t *ret), size_t *ret) { struct ntfs_sb_info *sbi = ni->mi.sbi; struct super_block *sb = sbi->sb; struct runs_tree *run = &indx->bitmap_run; struct rw_semaphore *lock = &indx->run_lock; u32 nbits = sb->s_blocksize * 8; u32 blocksize = sb->s_blocksize; u64 valid_size = le64_to_cpu(bitmap->nres.valid_size); u64 data_size = le64_to_cpu(bitmap->nres.data_size); sector_t eblock = bytes_to_block(sb, data_size); size_t vbo = from >> 3; sector_t blk = (vbo & sbi->cluster_mask) >> sb->s_blocksize_bits; sector_t vblock = vbo >> sb->s_blocksize_bits; sector_t blen, block; CLST lcn, clen, vcn, vcn_next; size_t idx; struct buffer_head *bh; bool ok; *ret = MINUS_ONE_T; if (vblock >= eblock) return 0; from &= nbits - 1; vcn = vbo >> sbi->cluster_bits; down_read(lock); ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); up_read(lock); next_run: if (!ok) { int err; const struct INDEX_NAMES *name = &s_index_names[indx->type]; down_write(lock); err = attr_load_runs_vcn(ni, ATTR_BITMAP, name->name, name->name_len, run, vcn); up_write(lock); if (err) return err; down_read(lock); ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); up_read(lock); if (!ok) return -EINVAL; } blen = (sector_t)clen * sbi->blocks_per_cluster; block = (sector_t)lcn * sbi->blocks_per_cluster; for (; blk < blen; blk++, from = 0) { bh = ntfs_bread(sb, block + blk); if (!bh) return -EIO; vbo = (u64)vblock << sb->s_blocksize_bits; if (vbo >= valid_size) { memset(bh->b_data, 0, blocksize); } else if (vbo + blocksize > valid_size) { u32 voff = valid_size & sbi->block_mask; memset(bh->b_data + voff, 0, blocksize - voff); } if (vbo + blocksize > data_size) nbits = 8 * (data_size - vbo); ok = nbits > from ? (*fn)((ulong *)bh->b_data, from, nbits, ret) : false; put_bh(bh); if (ok) { *ret += 8 * vbo; return 0; } if (++vblock >= eblock) { *ret = MINUS_ONE_T; return 0; } } blk = 0; vcn_next = vcn + clen; down_read(lock); ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && vcn == vcn_next; if (!ok) vcn = vcn_next; up_read(lock); goto next_run; } static bool scan_for_free(const ulong *buf, u32 bit, u32 bits, size_t *ret) { size_t pos = find_next_zero_bit_le(buf, bits, bit); if (pos >= bits) return false; *ret = pos; return true; } /* * indx_find_free - Look for free bit. * * Return: -1 if no free bits. */ static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit, struct ATTRIB **bitmap) { struct ATTRIB *b; struct ATTR_LIST_ENTRY *le = NULL; const struct INDEX_NAMES *in = &s_index_names[indx->type]; int err; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, NULL, NULL); if (!b) return -ENOENT; *bitmap = b; *bit = MINUS_ONE_T; if (!b->non_res) { u32 nbits = 8 * le32_to_cpu(b->res.data_size); size_t pos = find_next_zero_bit_le(resident_data(b), nbits, 0); if (pos < nbits) *bit = pos; } else { err = scan_nres_bitmap(ni, b, indx, 0, &scan_for_free, bit); if (err) return err; } return 0; } static bool scan_for_used(const ulong *buf, u32 bit, u32 bits, size_t *ret) { size_t pos = find_next_bit_le(buf, bits, bit); if (pos >= bits) return false; *ret = pos; return true; } /* * indx_used_bit - Look for used bit. * * Return: MINUS_ONE_T if no used bits. */ int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit) { struct ATTRIB *b; struct ATTR_LIST_ENTRY *le = NULL; size_t from = *bit; const struct INDEX_NAMES *in = &s_index_names[indx->type]; int err; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, NULL, NULL); if (!b) return -ENOENT; *bit = MINUS_ONE_T; if (!b->non_res) { u32 nbits = le32_to_cpu(b->res.data_size) * 8; size_t pos = find_next_bit_le(resident_data(b), nbits, from); if (pos < nbits) *bit = pos; } else { err = scan_nres_bitmap(ni, b, indx, from, &scan_for_used, bit); if (err) return err; } return 0; } /* * hdr_find_split * * Find a point at which the index allocation buffer would like to be split. * NOTE: This function should never return 'END' entry NULL returns on error. */ static const struct NTFS_DE *hdr_find_split(const struct INDEX_HDR *hdr) { size_t o; const struct NTFS_DE *e = hdr_first_de(hdr); u32 used_2 = le32_to_cpu(hdr->used) >> 1; u16 esize; if (!e || de_is_last(e)) return NULL; esize = le16_to_cpu(e->size); for (o = le32_to_cpu(hdr->de_off) + esize; o < used_2; o += esize) { const struct NTFS_DE *p = e; e = Add2Ptr(hdr, o); /* We must not return END entry. */ if (de_is_last(e)) return p; esize = le16_to_cpu(e->size); } return e; } /* * hdr_insert_head - Insert some entries at the beginning of the buffer. * * It is used to insert entries into a newly-created buffer. */ static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr, const void *ins, u32 ins_bytes) { u32 to_move; struct NTFS_DE *e = hdr_first_de(hdr); u32 used = le32_to_cpu(hdr->used); if (!e) return NULL; /* Now we just make room for the inserted entries and jam it in. */ to_move = used - le32_to_cpu(hdr->de_off); memmove(Add2Ptr(e, ins_bytes), e, to_move); memcpy(e, ins, ins_bytes); hdr->used = cpu_to_le32(used + ins_bytes); return e; } /* * index_hdr_check * * return true if INDEX_HDR is valid */ static bool index_hdr_check(const struct INDEX_HDR *hdr, u32 bytes) { u32 end = le32_to_cpu(hdr->used); u32 tot = le32_to_cpu(hdr->total); u32 off = le32_to_cpu(hdr->de_off); if (!IS_ALIGNED(off, 8) || tot > bytes || end > tot || size_add(off, sizeof(struct NTFS_DE)) > end) { /* incorrect index buffer. */ return false; } return true; } /* * index_buf_check * * return true if INDEX_BUFFER seems is valid */ static bool index_buf_check(const struct INDEX_BUFFER *ib, u32 bytes, const CLST *vbn) { const struct NTFS_RECORD_HEADER *rhdr = &ib->rhdr; u16 fo = le16_to_cpu(rhdr->fix_off); u16 fn = le16_to_cpu(rhdr->fix_num); if (bytes <= offsetof(struct INDEX_BUFFER, ihdr) || rhdr->sign != NTFS_INDX_SIGNATURE || fo < sizeof(struct INDEX_BUFFER) /* Check index buffer vbn. */ || (vbn && *vbn != le64_to_cpu(ib->vbn)) || (fo % sizeof(short)) || fo + fn * sizeof(short) >= bytes || fn != ((bytes >> SECTOR_SHIFT) + 1)) { /* incorrect index buffer. */ return false; } return index_hdr_check(&ib->ihdr, bytes - offsetof(struct INDEX_BUFFER, ihdr)); } void fnd_clear(struct ntfs_fnd *fnd) { int i; for (i = fnd->level - 1; i >= 0; i--) { struct indx_node *n = fnd->nodes[i]; if (!n) continue; put_indx_node(n); fnd->nodes[i] = NULL; } fnd->level = 0; fnd->root_de = NULL; } static int fnd_push(struct ntfs_fnd *fnd, struct indx_node *n, struct NTFS_DE *e) { int i = fnd->level; if (i < 0 || i >= ARRAY_SIZE(fnd->nodes)) return -EINVAL; fnd->nodes[i] = n; fnd->de[i] = e; fnd->level += 1; return 0; } static struct indx_node *fnd_pop(struct ntfs_fnd *fnd) { struct indx_node *n; int i = fnd->level; i -= 1; n = fnd->nodes[i]; fnd->nodes[i] = NULL; fnd->level = i; return n; } static bool fnd_is_empty(struct ntfs_fnd *fnd) { if (!fnd->level) return !fnd->root_de; return !fnd->de[fnd->level - 1]; } /* * hdr_find_e - Locate an entry the index buffer. * * If no matching entry is found, it returns the first entry which is greater * than the desired entry If the search key is greater than all the entries the * buffer, it returns the 'end' entry. This function does a binary search of the * current index buffer, for the first entry that is <= to the search value. * * Return: NULL if error. */ static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, const struct INDEX_HDR *hdr, const void *key, size_t key_len, const void *ctx, int *diff) { struct NTFS_DE *e, *found = NULL; NTFS_CMP_FUNC cmp = indx->cmp; int min_idx = 0, mid_idx, max_idx = 0; int diff2; int table_size = 8; u32 e_size, e_key_len; u32 end = le32_to_cpu(hdr->used); u32 off = le32_to_cpu(hdr->de_off); u32 total = le32_to_cpu(hdr->total); u16 offs[128]; if (unlikely(!cmp)) return NULL; fill_table: if (end > total) return NULL; if (size_add(off, sizeof(struct NTFS_DE)) > end) return NULL; e = Add2Ptr(hdr, off); e_size = le16_to_cpu(e->size); if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) return NULL; if (!de_is_last(e)) { offs[max_idx] = off; off += e_size; max_idx++; if (max_idx < table_size) goto fill_table; max_idx--; } binary_search: e_key_len = le16_to_cpu(e->key_size); diff2 = (*cmp)(key, key_len, e + 1, e_key_len, ctx); if (diff2 > 0) { if (found) { min_idx = mid_idx + 1; } else { if (de_is_last(e)) return NULL; max_idx = 0; table_size = min(table_size * 2, (int)ARRAY_SIZE(offs)); goto fill_table; } } else if (diff2 < 0) { if (found) max_idx = mid_idx - 1; else max_idx--; found = e; } else { *diff = 0; return e; } if (min_idx > max_idx) { *diff = -1; return found; } mid_idx = (min_idx + max_idx) >> 1; e = Add2Ptr(hdr, offs[mid_idx]); goto binary_search; } /* * hdr_insert_de - Insert an index entry into the buffer. * * 'before' should be a pointer previously returned from hdr_find_e. */ static struct NTFS_DE *hdr_insert_de(const struct ntfs_index *indx, struct INDEX_HDR *hdr, const struct NTFS_DE *de, struct NTFS_DE *before, const void *ctx) { int diff; size_t off = PtrOffset(hdr, before); u32 used = le32_to_cpu(hdr->used); u32 total = le32_to_cpu(hdr->total); u16 de_size = le16_to_cpu(de->size); /* First, check to see if there's enough room. */ if (used + de_size > total) return NULL; /* We know there's enough space, so we know we'll succeed. */ if (before) { /* Check that before is inside Index. */ if (off >= used || off < le32_to_cpu(hdr->de_off) || off + le16_to_cpu(before->size) > total) { return NULL; } goto ok; } /* No insert point is applied. Get it manually. */ before = hdr_find_e(indx, hdr, de + 1, le16_to_cpu(de->key_size), ctx, &diff); if (!before) return NULL; off = PtrOffset(hdr, before); ok: /* Now we just make room for the entry and jam it in. */ memmove(Add2Ptr(before, de_size), before, used - off); hdr->used = cpu_to_le32(used + de_size); memcpy(before, de, de_size); return before; } /* * hdr_delete_de - Remove an entry from the index buffer. */ static inline struct NTFS_DE *hdr_delete_de(struct INDEX_HDR *hdr, struct NTFS_DE *re) { u32 used = le32_to_cpu(hdr->used); u16 esize = le16_to_cpu(re->size); u32 off = PtrOffset(hdr, re); int bytes = used - (off + esize); /* check INDEX_HDR valid before using INDEX_HDR */ if (!check_index_header(hdr, le32_to_cpu(hdr->total))) return NULL; if (off >= used || esize < sizeof(struct NTFS_DE) || bytes < sizeof(struct NTFS_DE)) return NULL; hdr->used = cpu_to_le32(used - esize); memmove(re, Add2Ptr(re, esize), bytes); return re; } void indx_clear(struct ntfs_index *indx) { run_close(&indx->alloc_run); run_close(&indx->bitmap_run); } int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, const struct ATTRIB *attr, enum index_mutex_classed type) { u32 t32; const struct INDEX_ROOT *root = resident_data(attr); t32 = le32_to_cpu(attr->res.data_size); if (t32 <= offsetof(struct INDEX_ROOT, ihdr) || !index_hdr_check(&root->ihdr, t32 - offsetof(struct INDEX_ROOT, ihdr))) { goto out; } /* Check root fields. */ if (!root->index_block_clst) goto out; indx->type = type; indx->idx2vbn_bits = __ffs(root->index_block_clst); t32 = le32_to_cpu(root->index_block_size); indx->index_bits = blksize_bits(t32); /* Check index record size. */ if (t32 < sbi->cluster_size) { /* Index record is smaller than a cluster, use 512 blocks. */ if (t32 != root->index_block_clst * SECTOR_SIZE) goto out; /* Check alignment to a cluster. */ if ((sbi->cluster_size >> SECTOR_SHIFT) & (root->index_block_clst - 1)) { goto out; } indx->vbn2vbo_bits = SECTOR_SHIFT; } else { /* Index record must be a multiple of cluster size. */ if (t32 != root->index_block_clst << sbi->cluster_bits) goto out; indx->vbn2vbo_bits = sbi->cluster_bits; } init_rwsem(&indx->run_lock); indx->cmp = get_cmp_func(root); if (!indx->cmp) goto out; return 0; out: ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); return -EINVAL; } static struct indx_node *indx_new(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, const __le64 *sub_vbn) { int err; struct NTFS_DE *e; struct indx_node *r; struct INDEX_HDR *hdr; struct INDEX_BUFFER *index; u64 vbo = (u64)vbn << indx->vbn2vbo_bits; u32 bytes = 1u << indx->index_bits; u16 fn; u32 eo; r = kzalloc(sizeof(struct indx_node), GFP_NOFS); if (!r) return ERR_PTR(-ENOMEM); index = kzalloc(bytes, GFP_NOFS); if (!index) { kfree(r); return ERR_PTR(-ENOMEM); } err = ntfs_get_bh(ni->mi.sbi, &indx->alloc_run, vbo, bytes, &r->nb); if (err) { kfree(index); kfree(r); return ERR_PTR(err); } /* Create header. */ index->rhdr.sign = NTFS_INDX_SIGNATURE; index->rhdr.fix_off = cpu_to_le16(sizeof(struct INDEX_BUFFER)); // 0x28 fn = (bytes >> SECTOR_SHIFT) + 1; // 9 index->rhdr.fix_num = cpu_to_le16(fn); index->vbn = cpu_to_le64(vbn); hdr = &index->ihdr; eo = ALIGN(sizeof(struct INDEX_BUFFER) + fn * sizeof(short), 8); hdr->de_off = cpu_to_le32(eo); e = Add2Ptr(hdr, eo); if (sub_vbn) { e->flags = NTFS_IE_LAST | NTFS_IE_HAS_SUBNODES; e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64)); de_set_vbn_le(e, *sub_vbn); hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; } else { e->size = cpu_to_le16(sizeof(struct NTFS_DE)); hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE)); e->flags = NTFS_IE_LAST; } hdr->total = cpu_to_le32(bytes - offsetof(struct INDEX_BUFFER, ihdr)); r->index = index; return r; } struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTRIB **attr, struct mft_inode **mi) { struct ATTR_LIST_ENTRY *le = NULL; struct ATTRIB *a; const struct INDEX_NAMES *in = &s_index_names[indx->type]; struct INDEX_ROOT *root; a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, mi); if (!a) return NULL; if (attr) *attr = a; root = resident_data_ex(a, sizeof(struct INDEX_ROOT)); /* length check */ if (root && offsetof(struct INDEX_ROOT, ihdr) + le32_to_cpu(root->ihdr.used) > le32_to_cpu(a->res.data_size)) { return NULL; } return root; } static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, struct indx_node *node, int sync) { struct INDEX_BUFFER *ib = node->index; return ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &node->nb, sync); } /* * indx_read * * If ntfs_readdir calls this function * inode is shared locked and no ni_lock. * Use rw_semaphore for read/write access to alloc_run. */ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, struct indx_node **node) { int err; struct INDEX_BUFFER *ib; struct runs_tree *run = &indx->alloc_run; struct rw_semaphore *lock = &indx->run_lock; u64 vbo = (u64)vbn << indx->vbn2vbo_bits; u32 bytes = 1u << indx->index_bits; struct indx_node *in = *node; const struct INDEX_NAMES *name; if (!in) { in = kzalloc(sizeof(struct indx_node), GFP_NOFS); if (!in) return -ENOMEM; } else { nb_put(&in->nb); } ib = in->index; if (!ib) { ib = kmalloc(bytes, GFP_NOFS); if (!ib) { err = -ENOMEM; goto out; } } down_read(lock); err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); up_read(lock); if (!err) goto ok; if (err == -E_NTFS_FIXUP) goto ok; if (err != -ENOENT) goto out; name = &s_index_names[indx->type]; down_write(lock); err = attr_load_runs_range(ni, ATTR_ALLOC, name->name, name->name_len, run, vbo, vbo + bytes); up_write(lock); if (err) goto out; down_read(lock); err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); up_read(lock); if (err == -E_NTFS_FIXUP) goto ok; if (err) goto out; ok: if (!index_buf_check(ib, bytes, &vbn)) { _ntfs_bad_inode(&ni->vfs_inode); err = -EINVAL; goto out; } if (err == -E_NTFS_FIXUP) { ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); err = 0; } /* check for index header length */ if (offsetof(struct INDEX_BUFFER, ihdr) + le32_to_cpu(ib->ihdr.used) > bytes) { err = -EINVAL; goto out; } in->index = ib; *node = in; out: if (err == -E_NTFS_CORRUPT) { _ntfs_bad_inode(&ni->vfs_inode); err = -EINVAL; } if (ib != in->index) kfree(ib); if (*node != in) { nb_put(&in->nb); kfree(in); } return err; } /* * indx_find - Scan NTFS directory for given entry. */ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, const void *key, size_t key_len, const void *ctx, int *diff, struct NTFS_DE **entry, struct ntfs_fnd *fnd) { int err; struct NTFS_DE *e; struct indx_node *node; if (!root) root = indx_get_root(&ni->dir, ni, NULL, NULL); if (!root) { /* Should not happen. */ return -EINVAL; } /* Check cache. */ e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de; if (e && !de_is_last(e) && !(*indx->cmp)(key, key_len, e + 1, le16_to_cpu(e->key_size), ctx)) { *entry = e; *diff = 0; return 0; } /* Soft finder reset. */ fnd_clear(fnd); /* Lookup entry that is <= to the search value. */ e = hdr_find_e(indx, &root->ihdr, key, key_len, ctx, diff); if (!e) return -EINVAL; fnd->root_de = e; for (;;) { node = NULL; if (*diff >= 0 || !de_has_vcn_ex(e)) break; /* Read next level. */ err = indx_read(indx, ni, de_get_vbn(e), &node); if (err) { /* io error? */ return err; } /* Lookup entry that is <= to the search value. */ e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx, diff); if (!e) { put_indx_node(node); return -EINVAL; } fnd_push(fnd, node, e); } *entry = e; return 0; } int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, struct NTFS_DE **entry, struct ntfs_fnd *fnd) { int err; struct indx_node *n = NULL; struct NTFS_DE *e; size_t iter = 0; int level = fnd->level; if (!*entry) { /* Start find. */ e = hdr_first_de(&root->ihdr); if (!e) return 0; fnd_clear(fnd); fnd->root_de = e; } else if (!level) { if (de_is_last(fnd->root_de)) { *entry = NULL; return 0; } e = hdr_next_de(&root->ihdr, fnd->root_de); if (!e) return -EINVAL; fnd->root_de = e; } else { n = fnd->nodes[level - 1]; e = fnd->de[level - 1]; if (de_is_last(e)) goto pop_level; e = hdr_next_de(&n->index->ihdr, e); if (!e) return -EINVAL; fnd->de[level - 1] = e; } /* Just to avoid tree cycle. */ next_iter: if (iter++ >= 1000) return -EINVAL; while (de_has_vcn_ex(e)) { if (le16_to_cpu(e->size) < sizeof(struct NTFS_DE) + sizeof(u64)) { if (n) { fnd_pop(fnd); kfree(n); } return -EINVAL; } /* Read next level. */ err = indx_read(indx, ni, de_get_vbn(e), &n); if (err) return err; /* Try next level. */ e = hdr_first_de(&n->index->ihdr); if (!e) { kfree(n); return -EINVAL; } fnd_push(fnd, n, e); } if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) { *entry = e; return 0; } pop_level: for (;;) { if (!de_is_last(e)) goto next_iter; /* Pop one level. */ if (n) { fnd_pop(fnd); kfree(n); } level = fnd->level; if (level) { n = fnd->nodes[level - 1]; e = fnd->de[level - 1]; } else if (fnd->root_de) { n = NULL; e = fnd->root_de; fnd->root_de = NULL; } else { *entry = NULL; return 0; } if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) { *entry = e; if (!fnd->root_de) fnd->root_de = e; return 0; } } } int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, struct NTFS_DE **entry, size_t *off, struct ntfs_fnd *fnd) { int err; struct indx_node *n = NULL; struct NTFS_DE *e = NULL; struct NTFS_DE *e2; size_t bit; CLST next_used_vbn; CLST next_vbn; u32 record_size = ni->mi.sbi->record_size; /* Use non sorted algorithm. */ if (!*entry) { /* This is the first call. */ e = hdr_first_de(&root->ihdr); if (!e) return 0; fnd_clear(fnd); fnd->root_de = e; /* The first call with setup of initial element. */ if (*off >= record_size) { next_vbn = (((*off - record_size) >> indx->index_bits)) << indx->idx2vbn_bits; /* Jump inside cycle 'for'. */ goto next; } /* Start enumeration from root. */ *off = 0; } else if (!fnd->root_de) return -EINVAL; for (;;) { /* Check if current entry can be used. */ if (e && le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) goto ok; if (!fnd->level) { /* Continue to enumerate root. */ if (!de_is_last(fnd->root_de)) { e = hdr_next_de(&root->ihdr, fnd->root_de); if (!e) return -EINVAL; fnd->root_de = e; continue; } /* Start to enumerate indexes from 0. */ next_vbn = 0; } else { /* Continue to enumerate indexes. */ e2 = fnd->de[fnd->level - 1]; n = fnd->nodes[fnd->level - 1]; if (!de_is_last(e2)) { e = hdr_next_de(&n->index->ihdr, e2); if (!e) return -EINVAL; fnd->de[fnd->level - 1] = e; continue; } /* Continue with next index. */ next_vbn = le64_to_cpu(n->index->vbn) + root->index_block_clst; } next: /* Release current index. */ if (n) { fnd_pop(fnd); put_indx_node(n); n = NULL; } /* Skip all free indexes. */ bit = next_vbn >> indx->idx2vbn_bits; err = indx_used_bit(indx, ni, &bit); if (err == -ENOENT || bit == MINUS_ONE_T) { /* No used indexes. */ *entry = NULL; return 0; } next_used_vbn = bit << indx->idx2vbn_bits; /* Read buffer into memory. */ err = indx_read(indx, ni, next_used_vbn, &n); if (err) return err; e = hdr_first_de(&n->index->ihdr); fnd_push(fnd, n, e); if (!e) return -EINVAL; } ok: /* Return offset to restore enumerator if necessary. */ if (!n) { /* 'e' points in root, */ *off = PtrOffset(&root->ihdr, e); } else { /* 'e' points in index, */ *off = (le64_to_cpu(n->index->vbn) << indx->vbn2vbo_bits) + record_size + PtrOffset(&n->index->ihdr, e); } *entry = e; return 0; } /* * indx_create_allocate - Create "Allocation + Bitmap" attributes. */ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, CLST *vbn) { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; struct ATTRIB *bitmap; struct ATTRIB *alloc; u32 data_size = 1u << indx->index_bits; u32 alloc_size = ntfs_up_cluster(sbi, data_size); CLST len = alloc_size >> sbi->cluster_bits; const struct INDEX_NAMES *in = &s_index_names[indx->type]; CLST alen; struct runs_tree run; run_init(&run); err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF, &alen, 0, NULL, NULL); if (err) goto out; err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len, &run, 0, len, 0, &alloc, NULL, NULL); if (err) goto out1; alloc->nres.valid_size = alloc->nres.data_size = cpu_to_le64(data_size); err = ni_insert_resident(ni, ntfs3_bitmap_size(1), ATTR_BITMAP, in->name, in->name_len, &bitmap, NULL, NULL); if (err) goto out2; if (in->name == I30_NAME) { i_size_write(&ni->vfs_inode, data_size); inode_set_bytes(&ni->vfs_inode, alloc_size); } memcpy(&indx->alloc_run, &run, sizeof(run)); *vbn = 0; return 0; out2: mi_remove_attr(NULL, &ni->mi, alloc); out1: run_deallocate(sbi, &run, false); out: return err; } /* * indx_add_allocate - Add clusters to index. */ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, CLST *vbn) { int err; size_t bit; u64 data_size; u64 bmp_size, bmp_size_v; struct ATTRIB *bmp, *alloc; struct mft_inode *mi; const struct INDEX_NAMES *in = &s_index_names[indx->type]; err = indx_find_free(indx, ni, &bit, &bmp); if (err) goto out1; if (bit != MINUS_ONE_T) { bmp = NULL; } else { if (bmp->non_res) { bmp_size = le64_to_cpu(bmp->nres.data_size); bmp_size_v = le64_to_cpu(bmp->nres.valid_size); } else { bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size); } bit = bmp_size << 3; } data_size = (u64)(bit + 1) << indx->index_bits; if (bmp) { /* Increase bitmap. */ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, ntfs3_bitmap_size(bit + 1), NULL, true, NULL); if (err) goto out1; } alloc = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, in->name, in->name_len, NULL, &mi); if (!alloc) { err = -EINVAL; if (bmp) goto out2; goto out1; } if (data_size <= le64_to_cpu(alloc->nres.data_size)) { /* Reuse index. */ goto out; } /* Increase allocation. */ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, data_size, &data_size, true, NULL); if (err) { if (bmp) goto out2; goto out1; } if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, data_size); out: *vbn = bit << indx->idx2vbn_bits; return 0; out2: /* Ops. No space? */ attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL); out1: return err; } /* * indx_insert_into_root - Attempt to insert an entry into the index root. * * @undo - True if we undoing previous remove. * If necessary, it will twiddle the index b-tree. */ static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *new_de, struct NTFS_DE *root_de, const void *ctx, struct ntfs_fnd *fnd, bool undo) { int err = 0; struct NTFS_DE *e, *e0, *re; struct mft_inode *mi; struct ATTRIB *attr; struct INDEX_HDR *hdr; struct indx_node *n; CLST new_vbn; __le64 *sub_vbn, t_vbn; u16 new_de_size; u32 hdr_used, hdr_total, asize, to_move; u32 root_size, new_root_size; struct ntfs_sb_info *sbi; int ds_root; struct INDEX_ROOT *root, *a_root; /* Get the record this root placed in. */ root = indx_get_root(indx, ni, &attr, &mi); if (!root) return -EINVAL; /* * Try easy case: * hdr_insert_de will succeed if there's * room the root for the new entry. */ hdr = &root->ihdr; sbi = ni->mi.sbi; new_de_size = le16_to_cpu(new_de->size); hdr_used = le32_to_cpu(hdr->used); hdr_total = le32_to_cpu(hdr->total); asize = le32_to_cpu(attr->size); root_size = le32_to_cpu(attr->res.data_size); ds_root = new_de_size + hdr_used - hdr_total; /* If 'undo' is set then reduce requirements. */ if ((undo || asize + ds_root < sbi->max_bytes_per_attr) && mi_resize_attr(mi, attr, ds_root)) { hdr->total = cpu_to_le32(hdr_total + ds_root); e = hdr_insert_de(indx, hdr, new_de, root_de, ctx); WARN_ON(!e); fnd_clear(fnd); fnd->root_de = e; return 0; } /* Make a copy of root attribute to restore if error. */ a_root = kmemdup(attr, asize, GFP_NOFS); if (!a_root) return -ENOMEM; /* * Copy all the non-end entries from * the index root to the new buffer. */ to_move = 0; e0 = hdr_first_de(hdr); /* Calculate the size to copy. */ for (e = e0;; e = hdr_next_de(hdr, e)) { if (!e) { err = -EINVAL; goto out_free_root; } if (de_is_last(e)) break; to_move += le16_to_cpu(e->size); } if (!to_move) { re = NULL; } else { re = kmemdup(e0, to_move, GFP_NOFS); if (!re) { err = -ENOMEM; goto out_free_root; } } sub_vbn = NULL; if (de_has_vcn(e)) { t_vbn = de_get_vbn_le(e); sub_vbn = &t_vbn; } new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE) + sizeof(u64); ds_root = new_root_size - root_size; if (ds_root > 0 && asize + ds_root > sbi->max_bytes_per_attr) { /* Make root external. */ err = -EOPNOTSUPP; goto out_free_re; } if (ds_root) mi_resize_attr(mi, attr, ds_root); /* Fill first entry (vcn will be set later). */ e = (struct NTFS_DE *)(root + 1); memset(e, 0, sizeof(struct NTFS_DE)); e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST; hdr->flags = NTFS_INDEX_HDR_HAS_SUBNODES; hdr->used = hdr->total = cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr)); fnd->root_de = hdr_first_de(hdr); mi->dirty = true; /* Create alloc and bitmap attributes (if not). */ err = run_is_empty(&indx->alloc_run) ? indx_create_allocate(indx, ni, &new_vbn) : indx_add_allocate(indx, ni, &new_vbn); /* Layout of record may be changed, so rescan root. */ root = indx_get_root(indx, ni, &attr, &mi); if (!root) { /* Bug? */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); err = -EINVAL; goto out_free_re; } if (err) { /* Restore root. */ if (mi_resize_attr(mi, attr, -ds_root)) { memcpy(attr, a_root, asize); } else { /* Bug? */ ntfs_set_state(sbi, NTFS_DIRTY_ERROR); } goto out_free_re; } e = (struct NTFS_DE *)(root + 1); *(__le64 *)(e + 1) = cpu_to_le64(new_vbn); mi->dirty = true; /* Now we can create/format the new buffer and copy the entries into. */ n = indx_new(indx, ni, new_vbn, sub_vbn); if (IS_ERR(n)) { err = PTR_ERR(n); goto out_free_re; } hdr = &n->index->ihdr; hdr_used = le32_to_cpu(hdr->used); hdr_total = le32_to_cpu(hdr->total); /* Copy root entries into new buffer. */ hdr_insert_head(hdr, re, to_move); /* Update bitmap attribute. */ indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); /* Check if we can insert new entry new index buffer. */ if (hdr_used + new_de_size > hdr_total) { /* * This occurs if MFT record is the same or bigger than index * buffer. Move all root new index and have no space to add * new entry classic case when MFT record is 1K and index * buffer 4K the problem should not occurs. */ kfree(re); indx_write(indx, ni, n, 0); put_indx_node(n); fnd_clear(fnd); err = indx_insert_entry(indx, ni, new_de, ctx, fnd, undo); goto out_free_root; } /* * Now root is a parent for new index buffer. * Insert NewEntry a new buffer. */ e = hdr_insert_de(indx, hdr, new_de, NULL, ctx); if (!e) { err = -EINVAL; goto out_put_n; } fnd_push(fnd, n, e); /* Just write updates index into disk. */ indx_write(indx, ni, n, 0); n = NULL; out_put_n: put_indx_node(n); out_free_re: kfree(re); out_free_root: kfree(a_root); return err; } /* * indx_insert_into_buffer * * Attempt to insert an entry into an Index Allocation Buffer. * If necessary, it will split the buffer. */ static int indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, struct INDEX_ROOT *root, const struct NTFS_DE *new_de, const void *ctx, int level, struct ntfs_fnd *fnd) { int err; const struct NTFS_DE *sp; struct NTFS_DE *e, *de_t, *up_e; struct indx_node *n2; struct indx_node *n1 = fnd->nodes[level]; struct INDEX_HDR *hdr1 = &n1->index->ihdr; struct INDEX_HDR *hdr2; u32 to_copy, used, used1; CLST new_vbn; __le64 t_vbn, *sub_vbn; u16 sp_size; void *hdr1_saved = NULL; /* Try the most easy case. */ e = fnd->level - 1 == level ? fnd->de[level] : NULL; e = hdr_insert_de(indx, hdr1, new_de, e, ctx); fnd->de[level] = e; if (e) { /* Just write updated index into disk. */ indx_write(indx, ni, n1, 0); return 0; } /* * No space to insert into buffer. Split it. * To split we: * - Save split point ('cause index buffers will be changed) * - Allocate NewBuffer and copy all entries <= sp into new buffer * - Remove all entries (sp including) from TargetBuffer * - Insert NewEntry into left or right buffer (depending on sp <=> * NewEntry) * - Insert sp into parent buffer (or root) * - Make sp a parent for new buffer */ sp = hdr_find_split(hdr1); if (!sp) return -EINVAL; sp_size = le16_to_cpu(sp->size); up_e = kmalloc(sp_size + sizeof(u64), GFP_NOFS); if (!up_e) return -ENOMEM; memcpy(up_e, sp, sp_size); used1 = le32_to_cpu(hdr1->used); hdr1_saved = kmemdup(hdr1, used1, GFP_NOFS); if (!hdr1_saved) { err = -ENOMEM; goto out; } if (!hdr1->flags) { up_e->flags |= NTFS_IE_HAS_SUBNODES; up_e->size = cpu_to_le16(sp_size + sizeof(u64)); sub_vbn = NULL; } else { t_vbn = de_get_vbn_le(up_e); sub_vbn = &t_vbn; } /* Allocate on disk a new index allocation buffer. */ err = indx_add_allocate(indx, ni, &new_vbn); if (err) goto out; /* Allocate and format memory a new index buffer. */ n2 = indx_new(indx, ni, new_vbn, sub_vbn); if (IS_ERR(n2)) { err = PTR_ERR(n2); goto out; } hdr2 = &n2->index->ihdr; /* Make sp a parent for new buffer. */ de_set_vbn(up_e, new_vbn); /* Copy all the entries <= sp into the new buffer. */ de_t = hdr_first_de(hdr1); to_copy = PtrOffset(de_t, sp); hdr_insert_head(hdr2, de_t, to_copy); /* Remove all entries (sp including) from hdr1. */ used = used1 - to_copy - sp_size; memmove(de_t, Add2Ptr(sp, sp_size), used - le32_to_cpu(hdr1->de_off)); hdr1->used = cpu_to_le32(used); /* * Insert new entry into left or right buffer * (depending on sp <=> new_de). */ hdr_insert_de(indx, (*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size), up_e + 1, le16_to_cpu(up_e->key_size), ctx) < 0 ? hdr2 : hdr1, new_de, NULL, ctx); indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); indx_write(indx, ni, n1, 0); indx_write(indx, ni, n2, 0); put_indx_node(n2); /* * We've finished splitting everybody, so we are ready to * insert the promoted entry into the parent. */ if (!level) { /* Insert in root. */ err = indx_insert_into_root(indx, ni, up_e, NULL, ctx, fnd, 0); } else { /* * The target buffer's parent is another index buffer. * TODO: Remove recursion. */ err = indx_insert_into_buffer(indx, ni, root, up_e, ctx, level - 1, fnd); } if (err) { /* * Undo critical operations. */ indx_mark_free(indx, ni, new_vbn >> indx->idx2vbn_bits); memcpy(hdr1, hdr1_saved, used1); indx_write(indx, ni, n1, 0); } out: kfree(up_e); kfree(hdr1_saved); return err; } /* * indx_insert_entry - Insert new entry into index. * * @undo - True if we undoing previous remove. */ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *new_de, const void *ctx, struct ntfs_fnd *fnd, bool undo) { int err; int diff; struct NTFS_DE *e; struct ntfs_fnd *fnd_a = NULL; struct INDEX_ROOT *root; if (!fnd) { fnd_a = fnd_get(); if (!fnd_a) { err = -ENOMEM; goto out1; } fnd = fnd_a; } root = indx_get_root(indx, ni, NULL, NULL); if (!root) { err = -EINVAL; goto out; } if (fnd_is_empty(fnd)) { /* * Find the spot the tree where we want to * insert the new entry. */ err = indx_find(indx, ni, root, new_de + 1, le16_to_cpu(new_de->key_size), ctx, &diff, &e, fnd); if (err) goto out; if (!diff) { err = -EEXIST; goto out; } } if (!fnd->level) { /* * The root is also a leaf, so we'll insert the * new entry into it. */ err = indx_insert_into_root(indx, ni, new_de, fnd->root_de, ctx, fnd, undo); } else { /* * Found a leaf buffer, so we'll insert the new entry into it. */ err = indx_insert_into_buffer(indx, ni, root, new_de, ctx, fnd->level - 1, fnd); } out: fnd_put(fnd_a); out1: return err; } /* * indx_find_buffer - Locate a buffer from the tree. */ static struct indx_node *indx_find_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, const struct INDEX_ROOT *root, __le64 vbn, struct indx_node *n) { int err; const struct NTFS_DE *e; struct indx_node *r; const struct INDEX_HDR *hdr = n ? &n->index->ihdr : &root->ihdr; /* Step 1: Scan one level. */ for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) { if (!e) return ERR_PTR(-EINVAL); if (de_has_vcn(e) && vbn == de_get_vbn_le(e)) return n; if (de_is_last(e)) break; } /* Step2: Do recursion. */ e = Add2Ptr(hdr, le32_to_cpu(hdr->de_off)); for (;;) { if (de_has_vcn_ex(e)) { err = indx_read(indx, ni, de_get_vbn(e), &n); if (err) return ERR_PTR(err); r = indx_find_buffer(indx, ni, root, vbn, n); if (r) return r; } if (de_is_last(e)) break; e = Add2Ptr(e, le16_to_cpu(e->size)); } return NULL; } /* * indx_shrink - Deallocate unused tail indexes. */ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, size_t bit) { int err = 0; u64 bpb, new_data; size_t nbits; struct ATTRIB *b; struct ATTR_LIST_ENTRY *le = NULL; const struct INDEX_NAMES *in = &s_index_names[indx->type]; b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, NULL, NULL); if (!b) return -ENOENT; if (!b->non_res) { unsigned long pos; const unsigned long *bm = resident_data(b); nbits = (size_t)le32_to_cpu(b->res.data_size) * 8; if (bit >= nbits) return 0; pos = find_next_bit_le(bm, nbits, bit); if (pos < nbits) return 0; } else { size_t used = MINUS_ONE_T; nbits = le64_to_cpu(b->nres.data_size) * 8; if (bit >= nbits) return 0; err = scan_nres_bitmap(ni, b, indx, bit, &scan_for_used, &used); if (err) return err; if (used != MINUS_ONE_T) return 0; } new_data = (u64)bit << indx->index_bits; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, new_data, &new_data, false, NULL); if (err) return err; if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, new_data); bpb = ntfs3_bitmap_size(bit); if (bpb * 8 == nbits) return 0; err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, bpb, &bpb, false, NULL); return err; } static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *e, bool trim) { int err; struct indx_node *n = NULL; struct INDEX_HDR *hdr; CLST vbn = de_get_vbn(e); size_t i; err = indx_read(indx, ni, vbn, &n); if (err) return err; hdr = &n->index->ihdr; /* First, recurse into the children, if any. */ if (hdr_has_subnode(hdr)) { for (e = hdr_first_de(hdr); e; e = hdr_next_de(hdr, e)) { indx_free_children(indx, ni, e, false); if (de_is_last(e)) break; } } put_indx_node(n); i = vbn >> indx->idx2vbn_bits; /* * We've gotten rid of the children; add this buffer to the free list. */ indx_mark_free(indx, ni, i); if (!trim) return 0; /* * If there are no used indexes after current free index * then we can truncate allocation and bitmap. * Use bitmap to estimate the case. */ indx_shrink(indx, ni, i + 1); return 0; } /* * indx_get_entry_to_replace * * Find a replacement entry for a deleted entry. * Always returns a node entry: * NTFS_IE_HAS_SUBNODES is set the flags and the size includes the sub_vcn. */ static int indx_get_entry_to_replace(struct ntfs_index *indx, struct ntfs_inode *ni, const struct NTFS_DE *de_next, struct NTFS_DE **de_to_replace, struct ntfs_fnd *fnd) { int err; int level = -1; CLST vbn; struct NTFS_DE *e, *te, *re; struct indx_node *n; struct INDEX_BUFFER *ib; *de_to_replace = NULL; /* Find first leaf entry down from de_next. */ vbn = de_get_vbn(de_next); for (;;) { n = NULL; err = indx_read(indx, ni, vbn, &n); if (err) goto out; e = hdr_first_de(&n->index->ihdr); fnd_push(fnd, n, e); if (!e) { err = -EINVAL; goto out; } if (!de_is_last(e)) { /* * This buffer is non-empty, so its first entry * could be used as the replacement entry. */ level = fnd->level - 1; } if (!de_has_vcn(e)) break; /* This buffer is a node. Continue to go down. */ vbn = de_get_vbn(e); } if (level == -1) goto out; n = fnd->nodes[level]; te = hdr_first_de(&n->index->ihdr); if (!te) { err = -EINVAL; goto out; } /* Copy the candidate entry into the replacement entry buffer. */ re = kmalloc(le16_to_cpu(te->size) + sizeof(u64), GFP_NOFS); if (!re) { err = -ENOMEM; goto out; } *de_to_replace = re; memcpy(re, te, le16_to_cpu(te->size)); if (!de_has_vcn(re)) { /* * The replacement entry we found doesn't have a sub_vcn. * increase its size to hold one. */ le16_add_cpu(&re->size, sizeof(u64)); re->flags |= NTFS_IE_HAS_SUBNODES; } else { /* * The replacement entry we found was a node entry, which * means that all its child buffers are empty. Return them * to the free pool. */ indx_free_children(indx, ni, te, true); } /* * Expunge the replacement entry from its former location, * and then write that buffer. */ ib = n->index; e = hdr_delete_de(&ib->ihdr, te); fnd->de[level] = e; indx_write(indx, ni, n, 0); if (ib_is_leaf(ib) && ib_is_empty(ib)) { /* An empty leaf. */ return 0; } out: fnd_clear(fnd); return err; } /* * indx_delete_entry - Delete an entry from the index. */ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, const void *key, u32 key_len, const void *ctx) { int err, diff; struct INDEX_ROOT *root; struct INDEX_HDR *hdr; struct ntfs_fnd *fnd, *fnd2; struct INDEX_BUFFER *ib; struct NTFS_DE *e, *re, *next, *prev, *me; struct indx_node *n, *n2d = NULL; __le64 sub_vbn; int level, level2; struct ATTRIB *attr; struct mft_inode *mi; u32 e_size, root_size, new_root_size; size_t trim_bit; const struct INDEX_NAMES *in; fnd = fnd_get(); if (!fnd) { err = -ENOMEM; goto out2; } fnd2 = fnd_get(); if (!fnd2) { err = -ENOMEM; goto out1; } root = indx_get_root(indx, ni, &attr, &mi); if (!root) { err = -EINVAL; goto out; } /* Locate the entry to remove. */ err = indx_find(indx, ni, root, key, key_len, ctx, &diff, &e, fnd); if (err) goto out; if (!e || diff) { err = -ENOENT; goto out; } level = fnd->level; if (level) { n = fnd->nodes[level - 1]; e = fnd->de[level - 1]; ib = n->index; hdr = &ib->ihdr; } else { hdr = &root->ihdr; e = fnd->root_de; n = NULL; } e_size = le16_to_cpu(e->size); if (!de_has_vcn_ex(e)) { /* The entry to delete is a leaf, so we can just rip it out. */ hdr_delete_de(hdr, e); if (!level) { hdr->total = hdr->used; /* Shrink resident root attribute. */ mi_resize_attr(mi, attr, 0 - e_size); goto out; } indx_write(indx, ni, n, 0); /* * Check to see if removing that entry made * the leaf empty. */ if (ib_is_leaf(ib) && ib_is_empty(ib)) { fnd_pop(fnd); fnd_push(fnd2, n, e); } } else { /* * The entry we wish to delete is a node buffer, so we * have to find a replacement for it. */ next = de_get_next(e); err = indx_get_entry_to_replace(indx, ni, next, &re, fnd2); if (err) goto out; if (re) { de_set_vbn_le(re, de_get_vbn_le(e)); hdr_delete_de(hdr, e); err = level ? indx_insert_into_buffer(indx, ni, root, re, ctx, fnd->level - 1, fnd) : indx_insert_into_root(indx, ni, re, e, ctx, fnd, 0); kfree(re); if (err) goto out; } else { /* * There is no replacement for the current entry. * This means that the subtree rooted at its node * is empty, and can be deleted, which turn means * that the node can just inherit the deleted * entry sub_vcn. */ indx_free_children(indx, ni, next, true); de_set_vbn_le(next, de_get_vbn_le(e)); hdr_delete_de(hdr, e); if (level) { indx_write(indx, ni, n, 0); } else { hdr->total = hdr->used; /* Shrink resident root attribute. */ mi_resize_attr(mi, attr, 0 - e_size); } } } /* Delete a branch of tree. */ if (!fnd2 || !fnd2->level) goto out; /* Reinit root 'cause it can be changed. */ root = indx_get_root(indx, ni, &attr, &mi); if (!root) { err = -EINVAL; goto out; } n2d = NULL; sub_vbn = fnd2->nodes[0]->index->vbn; level2 = 0; level = fnd->level; hdr = level ? &fnd->nodes[level - 1]->index->ihdr : &root->ihdr; /* Scan current level. */ for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) { if (!e) { err = -EINVAL; goto out; } if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e)) break; if (de_is_last(e)) { e = NULL; break; } } if (!e) { /* Do slow search from root. */ struct indx_node *in; fnd_clear(fnd); in = indx_find_buffer(indx, ni, root, sub_vbn, NULL); if (IS_ERR(in)) { err = PTR_ERR(in); goto out; } if (in) fnd_push(fnd, in, NULL); } /* Merge fnd2 -> fnd. */ for (level = 0; level < fnd2->level; level++) { fnd_push(fnd, fnd2->nodes[level], fnd2->de[level]); fnd2->nodes[level] = NULL; } fnd2->level = 0; hdr = NULL; for (level = fnd->level; level; level--) { struct indx_node *in = fnd->nodes[level - 1]; ib = in->index; if (ib_is_empty(ib)) { sub_vbn = ib->vbn; } else { hdr = &ib->ihdr; n2d = in; level2 = level; break; } } if (!hdr) hdr = &root->ihdr; e = hdr_first_de(hdr); if (!e) { err = -EINVAL; goto out; } if (hdr != &root->ihdr || !de_is_last(e)) { prev = NULL; while (!de_is_last(e)) { if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e)) break; prev = e; e = hdr_next_de(hdr, e); if (!e) { err = -EINVAL; goto out; } } if (sub_vbn != de_get_vbn_le(e)) { /* * Didn't find the parent entry, although this buffer * is the parent trail. Something is corrupt. */ err = -EINVAL; goto out; } if (de_is_last(e)) { /* * Since we can't remove the end entry, we'll remove * its predecessor instead. This means we have to * transfer the predecessor's sub_vcn to the end entry. * Note: This index block is not empty, so the * predecessor must exist. */ if (!prev) { err = -EINVAL; goto out; } if (de_has_vcn(prev)) { de_set_vbn_le(e, de_get_vbn_le(prev)); } else if (de_has_vcn(e)) { le16_sub_cpu(&e->size, sizeof(u64)); e->flags &= ~NTFS_IE_HAS_SUBNODES; le32_sub_cpu(&hdr->used, sizeof(u64)); } e = prev; } /* * Copy the current entry into a temporary buffer (stripping * off its down-pointer, if any) and delete it from the current * buffer or root, as appropriate. */ e_size = le16_to_cpu(e->size); me = kmemdup(e, e_size, GFP_NOFS); if (!me) { err = -ENOMEM; goto out; } if (de_has_vcn(me)) { me->flags &= ~NTFS_IE_HAS_SUBNODES; le16_sub_cpu(&me->size, sizeof(u64)); } hdr_delete_de(hdr, e); if (hdr == &root->ihdr) { level = 0; hdr->total = hdr->used; /* Shrink resident root attribute. */ mi_resize_attr(mi, attr, 0 - e_size); } else { indx_write(indx, ni, n2d, 0); level = level2; } /* Mark unused buffers as free. */ trim_bit = -1; for (; level < fnd->level; level++) { ib = fnd->nodes[level]->index; if (ib_is_empty(ib)) { size_t k = le64_to_cpu(ib->vbn) >> indx->idx2vbn_bits; indx_mark_free(indx, ni, k); if (k < trim_bit) trim_bit = k; } } fnd_clear(fnd); /*fnd->root_de = NULL;*/ /* * Re-insert the entry into the tree. * Find the spot the tree where we want to insert the new entry. */ err = indx_insert_entry(indx, ni, me, ctx, fnd, 0); kfree(me); if (err) goto out; if (trim_bit != -1) indx_shrink(indx, ni, trim_bit); } else { /* * This tree needs to be collapsed down to an empty root. * Recreate the index root as an empty leaf and free all * the bits the index allocation bitmap. */ fnd_clear(fnd); fnd_clear(fnd2); in = &s_index_names[indx->type]; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, 0, NULL, false, NULL); if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, 0); err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len, false, NULL); run_close(&indx->alloc_run); err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, 0, NULL, false, NULL); err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len, false, NULL); run_close(&indx->bitmap_run); root = indx_get_root(indx, ni, &attr, &mi); if (!root) { err = -EINVAL; goto out; } root_size = le32_to_cpu(attr->res.data_size); new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE); if (new_root_size != root_size && !mi_resize_attr(mi, attr, new_root_size - root_size)) { err = -EINVAL; goto out; } /* Fill first entry. */ e = (struct NTFS_DE *)(root + 1); e->ref.low = 0; e->ref.high = 0; e->ref.seq = 0; e->size = cpu_to_le16(sizeof(struct NTFS_DE)); e->flags = NTFS_IE_LAST; // 0x02 e->key_size = 0; e->res = 0; hdr = &root->ihdr; hdr->flags = 0; hdr->used = hdr->total = cpu_to_le32( new_root_size - offsetof(struct INDEX_ROOT, ihdr)); mi->dirty = true; } out: fnd_put(fnd2); out1: fnd_put(fnd); out2: return err; } /* * Update duplicated information in directory entry * 'dup' - info from MFT record */ int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi, const struct ATTR_FILE_NAME *fname, const struct NTFS_DUP_INFO *dup, int sync) { int err, diff; struct NTFS_DE *e = NULL; struct ATTR_FILE_NAME *e_fname; struct ntfs_fnd *fnd; struct INDEX_ROOT *root; struct mft_inode *mi; struct ntfs_index *indx = &ni->dir; fnd = fnd_get(); if (!fnd) return -ENOMEM; root = indx_get_root(indx, ni, NULL, &mi); if (!root) { err = -EINVAL; goto out; } /* Find entry in directory. */ err = indx_find(indx, ni, root, fname, fname_full_size(fname), sbi, &diff, &e, fnd); if (err) goto out; if (!e) { err = -EINVAL; goto out; } if (diff) { err = -EINVAL; goto out; } e_fname = (struct ATTR_FILE_NAME *)(e + 1); if (!memcmp(&e_fname->dup, dup, sizeof(*dup))) { /* * Nothing to update in index! Try to avoid this call. */ goto out; } memcpy(&e_fname->dup, dup, sizeof(*dup)); if (fnd->level) { /* Directory entry in index. */ err = indx_write(indx, ni, fnd->nodes[fnd->level - 1], sync); } else { /* Directory entry in directory MFT record. */ mi->dirty = true; if (sync) err = mi_write(mi, 1); else mark_inode_dirty(&ni->vfs_inode); } out: fnd_put(fnd); return err; }
3166 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 /* SPDX-License-Identifier: GPL-2.0+ */ /* * RCU-based infrastructure for lightweight reader-writer locking * * Copyright (c) 2015, Red Hat, Inc. * * Author: Oleg Nesterov <oleg@redhat.com> */ #ifndef _LINUX_RCU_SYNC_H_ #define _LINUX_RCU_SYNC_H_ #include <linux/wait.h> #include <linux/rcupdate.h> /* Structure to mediate between updaters and fastpath-using readers. */ struct rcu_sync { int gp_state; int gp_count; wait_queue_head_t gp_wait; struct rcu_head cb_head; }; /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization * * Returns true if readers are permitted to use their fastpaths. Must be * invoked within some flavor of RCU read-side critical section. */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), "suspicious rcu_sync_is_idle() usage"); return !READ_ONCE(rsp->gp_state); /* GP_IDLE */ } extern void rcu_sync_init(struct rcu_sync *); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); extern void rcu_sync_dtor(struct rcu_sync *); #define __RCU_SYNC_INITIALIZER(name) { \ .gp_state = 0, \ .gp_count = 0, \ .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ } #define DEFINE_RCU_SYNC(name) \ struct rcu_sync name = __RCU_SYNC_INITIALIZER(name) #endif /* _LINUX_RCU_SYNC_H_ */
19 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _SCSI_DISK_H #define _SCSI_DISK_H /* * More than enough for everybody ;) The huge number of majors * is a leftover from 16bit dev_t days, we don't really need that * much numberspace. */ #define SD_MAJORS 16 /* * Time out in seconds for disks and Magneto-opticals (which are slower). */ #define SD_TIMEOUT (30 * HZ) #define SD_MOD_TIMEOUT (75 * HZ) /* * Flush timeout is a multiplier over the standard device timeout which is * user modifiable via sysfs but initially set to SD_TIMEOUT */ #define SD_FLUSH_TIMEOUT_MULTIPLIER 2 #define SD_WRITE_SAME_TIMEOUT (120 * HZ) /* * Number of allowed retries */ #define SD_MAX_RETRIES 5 #define SD_PASSTHROUGH_RETRIES 1 #define SD_MAX_MEDIUM_TIMEOUTS 2 /* * Size of the initial data buffer for mode and read capacity data */ #define SD_BUF_SIZE 512 /* * Number of sectors at the end of the device to avoid multi-sector * accesses to in the case of last_sector_bug */ #define SD_LAST_BUGGY_SECTORS 8 enum { SD_EXT_CDB_SIZE = 32, /* Extended CDB size */ SD_MEMPOOL_SIZE = 2, /* CDB pool size */ }; enum { SD_DEF_XFER_BLOCKS = 0xffff, SD_MAX_XFER_BLOCKS = 0xffffffff, SD_MAX_WS10_BLOCKS = 0xffff, SD_MAX_WS16_BLOCKS = 0x7fffff, }; enum { SD_LBP_FULL = 0, /* Full logical block provisioning */ SD_LBP_UNMAP, /* Use UNMAP command */ SD_LBP_WS16, /* Use WRITE SAME(16) with UNMAP bit */ SD_LBP_WS10, /* Use WRITE SAME(10) with UNMAP bit */ SD_LBP_ZERO, /* Use WRITE SAME(10) with zero payload */ SD_LBP_DISABLE, /* Discard disabled due to failed cmd */ }; enum { SD_ZERO_WRITE = 0, /* Use WRITE(10/16) command */ SD_ZERO_WS, /* Use WRITE SAME(10/16) command */ SD_ZERO_WS16_UNMAP, /* Use WRITE SAME(16) with UNMAP */ SD_ZERO_WS10_UNMAP, /* Use WRITE SAME(10) with UNMAP */ }; /** * struct zoned_disk_info - Specific properties of a ZBC SCSI device. * @nr_zones: number of zones. * @zone_blocks: number of logical blocks per zone. * * This data structure holds the ZBC SCSI device properties that are retrieved * twice: a first time before the gendisk capacity is known and a second time * after the gendisk capacity is known. */ struct zoned_disk_info { u32 nr_zones; u32 zone_blocks; }; struct scsi_disk { struct scsi_device *device; /* * disk_dev is used to show attributes in /sys/class/scsi_disk/, * but otherwise not really needed. Do not use for refcounting. */ struct device disk_dev; struct gendisk *disk; struct opal_dev *opal_dev; #ifdef CONFIG_BLK_DEV_ZONED /* Updated during revalidation before the gendisk capacity is known. */ struct zoned_disk_info early_zone_info; /* Updated during revalidation after the gendisk capacity is known. */ struct zoned_disk_info zone_info; u32 zones_optimal_open; u32 zones_optimal_nonseq; u32 zones_max_open; /* * Either zero or a power of two. If not zero it means that the offset * between zone starting LBAs is constant. */ u32 zone_starting_lba_gran; #endif atomic_t openers; sector_t capacity; /* size in logical blocks */ int max_retries; u32 min_xfer_blocks; u32 max_xfer_blocks; u32 opt_xfer_blocks; u32 max_ws_blocks; u32 max_unmap_blocks; u32 unmap_granularity; u32 unmap_alignment; u32 max_atomic; u32 atomic_alignment; u32 atomic_granularity; u32 max_atomic_with_boundary; u32 max_atomic_boundary; u32 index; unsigned int physical_block_size; unsigned int max_medium_access_timeouts; unsigned int medium_access_timed_out; /* number of permanent streams */ u16 permanent_stream_count; u8 media_present; u8 write_prot; u8 protection_type;/* Data Integrity Field */ u8 provisioning_mode; u8 zeroing_mode; u8 nr_actuators; /* Number of actuators */ bool suspended; /* Disk is suspended (stopped) */ unsigned ATO : 1; /* state of disk ATO bit */ unsigned cache_override : 1; /* temp override of WCE,RCD */ unsigned WCE : 1; /* state of disk WCE bit */ unsigned RCD : 1; /* state of disk RCD bit, unused */ unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ unsigned first_scan : 1; unsigned lbpme : 1; unsigned lbprz : 1; unsigned lbpu : 1; unsigned lbpws : 1; unsigned lbpws10 : 1; unsigned lbpvpd : 1; unsigned ws10 : 1; unsigned ws16 : 1; unsigned rc_basis: 2; unsigned zoned: 2; unsigned urswrz : 1; unsigned security : 1; unsigned ignore_medium_access_errors : 1; unsigned rscs : 1; /* reduced stream control support */ unsigned use_atomic_write_boundary : 1; }; #define to_scsi_disk(obj) container_of(obj, struct scsi_disk, disk_dev) static inline struct scsi_disk *scsi_disk(struct gendisk *disk) { return disk->private_data; } #define sd_printk(prefix, sdsk, fmt, a...) \ (sdsk)->disk ? \ sdev_prefix_printk(prefix, (sdsk)->device, \ (sdsk)->disk->disk_name, fmt, ##a) : \ sdev_printk(prefix, (sdsk)->device, fmt, ##a) #define sd_first_printk(prefix, sdsk, fmt, a...) \ do { \ if ((sdsk)->first_scan) \ sd_printk(prefix, sdsk, fmt, ##a); \ } while (0) static inline int scsi_medium_access_command(struct scsi_cmnd *scmd) { switch (scmd->cmnd[0]) { case READ_6: case READ_10: case READ_12: case READ_16: case SYNCHRONIZE_CACHE: case VERIFY: case VERIFY_12: case VERIFY_16: case WRITE_6: case WRITE_10: case WRITE_12: case WRITE_16: case WRITE_SAME: case WRITE_SAME_16: case UNMAP: return 1; case VARIABLE_LENGTH_CMD: switch (scmd->cmnd[9]) { case READ_32: case VERIFY_32: case WRITE_32: case WRITE_SAME_32: return 1; } } return 0; } static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blocks) { return blocks << (ilog2(sdev->sector_size) - 9); } static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks) { return blocks * sdev->sector_size; } static inline sector_t bytes_to_logical(struct scsi_device *sdev, unsigned int bytes) { return bytes >> ilog2(sdev->sector_size); } static inline sector_t sectors_to_logical(struct scsi_device *sdev, sector_t sector) { return sector >> (ilog2(sdev->sector_size) - 9); } void sd_dif_config_host(struct scsi_disk *sdkp, struct queue_limits *lim); #ifdef CONFIG_BLK_DEV_ZONED int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, u8 buf[SD_BUF_SIZE]); int sd_zbc_revalidate_zones(struct scsi_disk *sdkp); blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all); unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, unsigned int nr_zones, report_zones_cb cb, void *data); #else /* CONFIG_BLK_DEV_ZONED */ static inline int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim, u8 buf[SD_BUF_SIZE]) { return 0; } static inline int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) { return 0; } static inline blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd, unsigned char op, bool all) { return BLK_STS_TARGET; } static inline unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr) { return good_bytes; } #define sd_zbc_report_zones NULL #endif /* CONFIG_BLK_DEV_ZONED */ void sd_print_sense_hdr(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr); void sd_print_result(const struct scsi_disk *sdkp, const char *msg, int result); #endif /* _SCSI_DISK_H */
6 5 3 2 1 1 1 5676 5441 363 2021 4887 5894 98 13 129 7 171 5 4 1 1 6201 5399 5402 1 1 1 1 242 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 // SPDX-License-Identifier: GPL-2.0 // Generated by scripts/atomic/gen-atomic-fallback.sh // DO NOT MODIFY THIS FILE DIRECTLY #ifndef _LINUX_ATOMIC_FALLBACK_H #define _LINUX_ATOMIC_FALLBACK_H #include <linux/compiler.h> #if defined(arch_xchg) #define raw_xchg arch_xchg #elif defined(arch_xchg_relaxed) #define raw_xchg(...) \ __atomic_op_fence(arch_xchg, __VA_ARGS__) #else extern void raw_xchg_not_implemented(void); #define raw_xchg(...) raw_xchg_not_implemented() #endif #if defined(arch_xchg_acquire) #define raw_xchg_acquire arch_xchg_acquire #elif defined(arch_xchg_relaxed) #define raw_xchg_acquire(...) \ __atomic_op_acquire(arch_xchg, __VA_ARGS__) #elif defined(arch_xchg) #define raw_xchg_acquire arch_xchg #else extern void raw_xchg_acquire_not_implemented(void); #define raw_xchg_acquire(...) raw_xchg_acquire_not_implemented() #endif #if defined(arch_xchg_release) #define raw_xchg_release arch_xchg_release #elif defined(arch_xchg_relaxed) #define raw_xchg_release(...) \ __atomic_op_release(arch_xchg, __VA_ARGS__) #elif defined(arch_xchg) #define raw_xchg_release arch_xchg #else extern void raw_xchg_release_not_implemented(void); #define raw_xchg_release(...) raw_xchg_release_not_implemented() #endif #if defined(arch_xchg_relaxed) #define raw_xchg_relaxed arch_xchg_relaxed #elif defined(arch_xchg) #define raw_xchg_relaxed arch_xchg #else extern void raw_xchg_relaxed_not_implemented(void); #define raw_xchg_relaxed(...) raw_xchg_relaxed_not_implemented() #endif #if defined(arch_cmpxchg) #define raw_cmpxchg arch_cmpxchg #elif defined(arch_cmpxchg_relaxed) #define raw_cmpxchg(...) \ __atomic_op_fence(arch_cmpxchg, __VA_ARGS__) #else extern void raw_cmpxchg_not_implemented(void); #define raw_cmpxchg(...) raw_cmpxchg_not_implemented() #endif #if defined(arch_cmpxchg_acquire) #define raw_cmpxchg_acquire arch_cmpxchg_acquire #elif defined(arch_cmpxchg_relaxed) #define raw_cmpxchg_acquire(...) \ __atomic_op_acquire(arch_cmpxchg, __VA_ARGS__) #elif defined(arch_cmpxchg) #define raw_cmpxchg_acquire arch_cmpxchg #else extern void raw_cmpxchg_acquire_not_implemented(void); #define raw_cmpxchg_acquire(...) raw_cmpxchg_acquire_not_implemented() #endif #if defined(arch_cmpxchg_release) #define raw_cmpxchg_release arch_cmpxchg_release #elif defined(arch_cmpxchg_relaxed) #define raw_cmpxchg_release(...) \ __atomic_op_release(arch_cmpxchg, __VA_ARGS__) #elif defined(arch_cmpxchg) #define raw_cmpxchg_release arch_cmpxchg #else extern void raw_cmpxchg_release_not_implemented(void); #define raw_cmpxchg_release(...) raw_cmpxchg_release_not_implemented() #endif #if defined(arch_cmpxchg_relaxed) #define raw_cmpxchg_relaxed arch_cmpxchg_relaxed #elif defined(arch_cmpxchg) #define raw_cmpxchg_relaxed arch_cmpxchg #else extern void raw_cmpxchg_relaxed_not_implemented(void); #define raw_cmpxchg_relaxed(...) raw_cmpxchg_relaxed_not_implemented() #endif #if defined(arch_cmpxchg64) #define raw_cmpxchg64 arch_cmpxchg64 #elif defined(arch_cmpxchg64_relaxed) #define raw_cmpxchg64(...) \ __atomic_op_fence(arch_cmpxchg64, __VA_ARGS__) #else extern void raw_cmpxchg64_not_implemented(void); #define raw_cmpxchg64(...) raw_cmpxchg64_not_implemented() #endif #if defined(arch_cmpxchg64_acquire) #define raw_cmpxchg64_acquire arch_cmpxchg64_acquire #elif defined(arch_cmpxchg64_relaxed) #define raw_cmpxchg64_acquire(...) \ __atomic_op_acquire(arch_cmpxchg64, __VA_ARGS__) #elif defined(arch_cmpxchg64) #define raw_cmpxchg64_acquire arch_cmpxchg64 #else extern void raw_cmpxchg64_acquire_not_implemented(void); #define raw_cmpxchg64_acquire(...) raw_cmpxchg64_acquire_not_implemented() #endif #if defined(arch_cmpxchg64_release) #define raw_cmpxchg64_release arch_cmpxchg64_release #elif defined(arch_cmpxchg64_relaxed) #define raw_cmpxchg64_release(...) \ __atomic_op_release(arch_cmpxchg64, __VA_ARGS__) #elif defined(arch_cmpxchg64) #define raw_cmpxchg64_release arch_cmpxchg64 #else extern void raw_cmpxchg64_release_not_implemented(void); #define raw_cmpxchg64_release(...) raw_cmpxchg64_release_not_implemented() #endif #if defined(arch_cmpxchg64_relaxed) #define raw_cmpxchg64_relaxed arch_cmpxchg64_relaxed #elif defined(arch_cmpxchg64) #define raw_cmpxchg64_relaxed arch_cmpxchg64 #else extern void raw_cmpxchg64_relaxed_not_implemented(void); #define raw_cmpxchg64_relaxed(...) raw_cmpxchg64_relaxed_not_implemented() #endif #if defined(arch_cmpxchg128) #define raw_cmpxchg128 arch_cmpxchg128 #elif defined(arch_cmpxchg128_relaxed) #define raw_cmpxchg128(...) \ __atomic_op_fence(arch_cmpxchg128, __VA_ARGS__) #else extern void raw_cmpxchg128_not_implemented(void); #define raw_cmpxchg128(...) raw_cmpxchg128_not_implemented() #endif #if defined(arch_cmpxchg128_acquire) #define raw_cmpxchg128_acquire arch_cmpxchg128_acquire #elif defined(arch_cmpxchg128_relaxed) #define raw_cmpxchg128_acquire(...) \ __atomic_op_acquire(arch_cmpxchg128, __VA_ARGS__) #elif defined(arch_cmpxchg128) #define raw_cmpxchg128_acquire arch_cmpxchg128 #else extern void raw_cmpxchg128_acquire_not_implemented(void); #define raw_cmpxchg128_acquire(...) raw_cmpxchg128_acquire_not_implemented() #endif #if defined(arch_cmpxchg128_release) #define raw_cmpxchg128_release arch_cmpxchg128_release #elif defined(arch_cmpxchg128_relaxed) #define raw_cmpxchg128_release(...) \ __atomic_op_release(arch_cmpxchg128, __VA_ARGS__) #elif defined(arch_cmpxchg128) #define raw_cmpxchg128_release arch_cmpxchg128 #else extern void raw_cmpxchg128_release_not_implemented(void); #define raw_cmpxchg128_release(...) raw_cmpxchg128_release_not_implemented() #endif #if defined(arch_cmpxchg128_relaxed) #define raw_cmpxchg128_relaxed arch_cmpxchg128_relaxed #elif defined(arch_cmpxchg128) #define raw_cmpxchg128_relaxed arch_cmpxchg128 #else extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_cmpxchg128_relaxed(...) raw_cmpxchg128_relaxed_not_implemented() #endif #if defined(arch_try_cmpxchg) #define raw_try_cmpxchg arch_try_cmpxchg #elif defined(arch_try_cmpxchg_relaxed) #define raw_try_cmpxchg(...) \ __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__) #else #define raw_try_cmpxchg(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg_acquire) #define raw_try_cmpxchg_acquire arch_try_cmpxchg_acquire #elif defined(arch_try_cmpxchg_relaxed) #define raw_try_cmpxchg_acquire(...) \ __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__) #elif defined(arch_try_cmpxchg) #define raw_try_cmpxchg_acquire arch_try_cmpxchg #else #define raw_try_cmpxchg_acquire(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg_acquire((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg_release) #define raw_try_cmpxchg_release arch_try_cmpxchg_release #elif defined(arch_try_cmpxchg_relaxed) #define raw_try_cmpxchg_release(...) \ __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__) #elif defined(arch_try_cmpxchg) #define raw_try_cmpxchg_release arch_try_cmpxchg #else #define raw_try_cmpxchg_release(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg_release((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg_relaxed) #define raw_try_cmpxchg_relaxed arch_try_cmpxchg_relaxed #elif defined(arch_try_cmpxchg) #define raw_try_cmpxchg_relaxed arch_try_cmpxchg #else #define raw_try_cmpxchg_relaxed(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg_relaxed((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg64) #define raw_try_cmpxchg64 arch_try_cmpxchg64 #elif defined(arch_try_cmpxchg64_relaxed) #define raw_try_cmpxchg64(...) \ __atomic_op_fence(arch_try_cmpxchg64, __VA_ARGS__) #else #define raw_try_cmpxchg64(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg64((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg64_acquire) #define raw_try_cmpxchg64_acquire arch_try_cmpxchg64_acquire #elif defined(arch_try_cmpxchg64_relaxed) #define raw_try_cmpxchg64_acquire(...) \ __atomic_op_acquire(arch_try_cmpxchg64, __VA_ARGS__) #elif defined(arch_try_cmpxchg64) #define raw_try_cmpxchg64_acquire arch_try_cmpxchg64 #else #define raw_try_cmpxchg64_acquire(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg64_acquire((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg64_release) #define raw_try_cmpxchg64_release arch_try_cmpxchg64_release #elif defined(arch_try_cmpxchg64_relaxed) #define raw_try_cmpxchg64_release(...) \ __atomic_op_release(arch_try_cmpxchg64, __VA_ARGS__) #elif defined(arch_try_cmpxchg64) #define raw_try_cmpxchg64_release arch_try_cmpxchg64 #else #define raw_try_cmpxchg64_release(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg64_release((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg64_relaxed) #define raw_try_cmpxchg64_relaxed arch_try_cmpxchg64_relaxed #elif defined(arch_try_cmpxchg64) #define raw_try_cmpxchg64_relaxed arch_try_cmpxchg64 #else #define raw_try_cmpxchg64_relaxed(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg64_relaxed((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg128) #define raw_try_cmpxchg128 arch_try_cmpxchg128 #elif defined(arch_try_cmpxchg128_relaxed) #define raw_try_cmpxchg128(...) \ __atomic_op_fence(arch_try_cmpxchg128, __VA_ARGS__) #else #define raw_try_cmpxchg128(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg128((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg128_acquire) #define raw_try_cmpxchg128_acquire arch_try_cmpxchg128_acquire #elif defined(arch_try_cmpxchg128_relaxed) #define raw_try_cmpxchg128_acquire(...) \ __atomic_op_acquire(arch_try_cmpxchg128, __VA_ARGS__) #elif defined(arch_try_cmpxchg128) #define raw_try_cmpxchg128_acquire arch_try_cmpxchg128 #else #define raw_try_cmpxchg128_acquire(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg128_acquire((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg128_release) #define raw_try_cmpxchg128_release arch_try_cmpxchg128_release #elif defined(arch_try_cmpxchg128_relaxed) #define raw_try_cmpxchg128_release(...) \ __atomic_op_release(arch_try_cmpxchg128, __VA_ARGS__) #elif defined(arch_try_cmpxchg128) #define raw_try_cmpxchg128_release arch_try_cmpxchg128 #else #define raw_try_cmpxchg128_release(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg128_release((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #if defined(arch_try_cmpxchg128_relaxed) #define raw_try_cmpxchg128_relaxed arch_try_cmpxchg128_relaxed #elif defined(arch_try_cmpxchg128) #define raw_try_cmpxchg128_relaxed arch_try_cmpxchg128 #else #define raw_try_cmpxchg128_relaxed(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg128_relaxed((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #define raw_cmpxchg_local arch_cmpxchg_local #ifdef arch_try_cmpxchg_local #define raw_try_cmpxchg_local arch_try_cmpxchg_local #else #define raw_try_cmpxchg_local(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg_local((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #define raw_cmpxchg64_local arch_cmpxchg64_local #ifdef arch_try_cmpxchg64_local #define raw_try_cmpxchg64_local arch_try_cmpxchg64_local #else #define raw_try_cmpxchg64_local(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg64_local((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #define raw_cmpxchg128_local arch_cmpxchg128_local #ifdef arch_try_cmpxchg128_local #define raw_try_cmpxchg128_local arch_try_cmpxchg128_local #else #define raw_try_cmpxchg128_local(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_cmpxchg128_local((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif #define raw_sync_cmpxchg arch_sync_cmpxchg #ifdef arch_sync_try_cmpxchg #define raw_sync_try_cmpxchg arch_sync_try_cmpxchg #else #define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ ({ \ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ if (unlikely(___r != ___o)) \ *___op = ___r; \ likely(___r == ___o); \ }) #endif /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t * * Atomically loads the value of @v with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_read() elsewhere. * * Return: The value loaded from @v. */ static __always_inline int raw_atomic_read(const atomic_t *v) { return arch_atomic_read(v); } /** * raw_atomic_read_acquire() - atomic load with acquire ordering * @v: pointer to atomic_t * * Atomically loads the value of @v with acquire ordering. * * Safe to use in noinstr code; prefer atomic_read_acquire() elsewhere. * * Return: The value loaded from @v. */ static __always_inline int raw_atomic_read_acquire(const atomic_t *v) { #if defined(arch_atomic_read_acquire) return arch_atomic_read_acquire(v); #else int ret; if (__native_word(atomic_t)) { ret = smp_load_acquire(&(v)->counter); } else { ret = raw_atomic_read(v); __atomic_acquire_fence(); } return ret; #endif } /** * raw_atomic_set() - atomic set with relaxed ordering * @v: pointer to atomic_t * @i: int value to assign * * Atomically sets @v to @i with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_set() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_set(atomic_t *v, int i) { arch_atomic_set(v, i); } /** * raw_atomic_set_release() - atomic set with release ordering * @v: pointer to atomic_t * @i: int value to assign * * Atomically sets @v to @i with release ordering. * * Safe to use in noinstr code; prefer atomic_set_release() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_set_release(atomic_t *v, int i) { #if defined(arch_atomic_set_release) arch_atomic_set_release(v, i); #else if (__native_word(atomic_t)) { smp_store_release(&(v)->counter, i); } else { __atomic_release_fence(); raw_atomic_set(v, i); } #endif } /** * raw_atomic_add() - atomic add with relaxed ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_add() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_add(int i, atomic_t *v) { arch_atomic_add(i, v); } /** * raw_atomic_add_return() - atomic add with full ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_add_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_add_return(int i, atomic_t *v) { #if defined(arch_atomic_add_return) return arch_atomic_add_return(i, v); #elif defined(arch_atomic_add_return_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_add_return_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_add_return" #endif } /** * raw_atomic_add_return_acquire() - atomic add with acquire ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_add_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_add_return_acquire(int i, atomic_t *v) { #if defined(arch_atomic_add_return_acquire) return arch_atomic_add_return_acquire(i, v); #elif defined(arch_atomic_add_return_relaxed) int ret = arch_atomic_add_return_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_add_return) return arch_atomic_add_return(i, v); #else #error "Unable to define raw_atomic_add_return_acquire" #endif } /** * raw_atomic_add_return_release() - atomic add with release ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_add_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_add_return_release(int i, atomic_t *v) { #if defined(arch_atomic_add_return_release) return arch_atomic_add_return_release(i, v); #elif defined(arch_atomic_add_return_relaxed) __atomic_release_fence(); return arch_atomic_add_return_relaxed(i, v); #elif defined(arch_atomic_add_return) return arch_atomic_add_return(i, v); #else #error "Unable to define raw_atomic_add_return_release" #endif } /** * raw_atomic_add_return_relaxed() - atomic add with relaxed ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_add_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_add_return_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_add_return_relaxed) return arch_atomic_add_return_relaxed(i, v); #elif defined(arch_atomic_add_return) return arch_atomic_add_return(i, v); #else #error "Unable to define raw_atomic_add_return_relaxed" #endif } /** * raw_atomic_fetch_add() - atomic add with full ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_add() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_add(int i, atomic_t *v) { #if defined(arch_atomic_fetch_add) return arch_atomic_fetch_add(i, v); #elif defined(arch_atomic_fetch_add_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_add_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_fetch_add" #endif } /** * raw_atomic_fetch_add_acquire() - atomic add with acquire ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_add_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_add_acquire(int i, atomic_t *v) { #if defined(arch_atomic_fetch_add_acquire) return arch_atomic_fetch_add_acquire(i, v); #elif defined(arch_atomic_fetch_add_relaxed) int ret = arch_atomic_fetch_add_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_add) return arch_atomic_fetch_add(i, v); #else #error "Unable to define raw_atomic_fetch_add_acquire" #endif } /** * raw_atomic_fetch_add_release() - atomic add with release ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_add_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_add_release(int i, atomic_t *v) { #if defined(arch_atomic_fetch_add_release) return arch_atomic_fetch_add_release(i, v); #elif defined(arch_atomic_fetch_add_relaxed) __atomic_release_fence(); return arch_atomic_fetch_add_relaxed(i, v); #elif defined(arch_atomic_fetch_add) return arch_atomic_fetch_add(i, v); #else #error "Unable to define raw_atomic_fetch_add_release" #endif } /** * raw_atomic_fetch_add_relaxed() - atomic add with relaxed ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_add_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_add_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_fetch_add_relaxed) return arch_atomic_fetch_add_relaxed(i, v); #elif defined(arch_atomic_fetch_add) return arch_atomic_fetch_add(i, v); #else #error "Unable to define raw_atomic_fetch_add_relaxed" #endif } /** * raw_atomic_sub() - atomic subtract with relaxed ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_sub() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_sub(int i, atomic_t *v) { arch_atomic_sub(i, v); } /** * raw_atomic_sub_return() - atomic subtract with full ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_sub_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_sub_return(int i, atomic_t *v) { #if defined(arch_atomic_sub_return) return arch_atomic_sub_return(i, v); #elif defined(arch_atomic_sub_return_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_sub_return_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_sub_return" #endif } /** * raw_atomic_sub_return_acquire() - atomic subtract with acquire ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_sub_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_sub_return_acquire(int i, atomic_t *v) { #if defined(arch_atomic_sub_return_acquire) return arch_atomic_sub_return_acquire(i, v); #elif defined(arch_atomic_sub_return_relaxed) int ret = arch_atomic_sub_return_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_sub_return) return arch_atomic_sub_return(i, v); #else #error "Unable to define raw_atomic_sub_return_acquire" #endif } /** * raw_atomic_sub_return_release() - atomic subtract with release ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_sub_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_sub_return_release(int i, atomic_t *v) { #if defined(arch_atomic_sub_return_release) return arch_atomic_sub_return_release(i, v); #elif defined(arch_atomic_sub_return_relaxed) __atomic_release_fence(); return arch_atomic_sub_return_relaxed(i, v); #elif defined(arch_atomic_sub_return) return arch_atomic_sub_return(i, v); #else #error "Unable to define raw_atomic_sub_return_release" #endif } /** * raw_atomic_sub_return_relaxed() - atomic subtract with relaxed ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_sub_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_sub_return_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_sub_return_relaxed) return arch_atomic_sub_return_relaxed(i, v); #elif defined(arch_atomic_sub_return) return arch_atomic_sub_return(i, v); #else #error "Unable to define raw_atomic_sub_return_relaxed" #endif } /** * raw_atomic_fetch_sub() - atomic subtract with full ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_sub() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_sub(int i, atomic_t *v) { #if defined(arch_atomic_fetch_sub) return arch_atomic_fetch_sub(i, v); #elif defined(arch_atomic_fetch_sub_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_sub_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_fetch_sub" #endif } /** * raw_atomic_fetch_sub_acquire() - atomic subtract with acquire ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_sub_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_sub_acquire(int i, atomic_t *v) { #if defined(arch_atomic_fetch_sub_acquire) return arch_atomic_fetch_sub_acquire(i, v); #elif defined(arch_atomic_fetch_sub_relaxed) int ret = arch_atomic_fetch_sub_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_sub) return arch_atomic_fetch_sub(i, v); #else #error "Unable to define raw_atomic_fetch_sub_acquire" #endif } /** * raw_atomic_fetch_sub_release() - atomic subtract with release ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_sub_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_sub_release(int i, atomic_t *v) { #if defined(arch_atomic_fetch_sub_release) return arch_atomic_fetch_sub_release(i, v); #elif defined(arch_atomic_fetch_sub_relaxed) __atomic_release_fence(); return arch_atomic_fetch_sub_relaxed(i, v); #elif defined(arch_atomic_fetch_sub) return arch_atomic_fetch_sub(i, v); #else #error "Unable to define raw_atomic_fetch_sub_release" #endif } /** * raw_atomic_fetch_sub_relaxed() - atomic subtract with relaxed ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_sub_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_sub_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_fetch_sub_relaxed) return arch_atomic_fetch_sub_relaxed(i, v); #elif defined(arch_atomic_fetch_sub) return arch_atomic_fetch_sub(i, v); #else #error "Unable to define raw_atomic_fetch_sub_relaxed" #endif } /** * raw_atomic_inc() - atomic increment with relaxed ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_inc() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_inc(atomic_t *v) { #if defined(arch_atomic_inc) arch_atomic_inc(v); #else raw_atomic_add(1, v); #endif } /** * raw_atomic_inc_return() - atomic increment with full ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with full ordering. * * Safe to use in noinstr code; prefer atomic_inc_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_inc_return(atomic_t *v) { #if defined(arch_atomic_inc_return) return arch_atomic_inc_return(v); #elif defined(arch_atomic_inc_return_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_inc_return_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic_add_return(1, v); #endif } /** * raw_atomic_inc_return_acquire() - atomic increment with acquire ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_inc_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_inc_return_acquire(atomic_t *v) { #if defined(arch_atomic_inc_return_acquire) return arch_atomic_inc_return_acquire(v); #elif defined(arch_atomic_inc_return_relaxed) int ret = arch_atomic_inc_return_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_inc_return) return arch_atomic_inc_return(v); #else return raw_atomic_add_return_acquire(1, v); #endif } /** * raw_atomic_inc_return_release() - atomic increment with release ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with release ordering. * * Safe to use in noinstr code; prefer atomic_inc_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_inc_return_release(atomic_t *v) { #if defined(arch_atomic_inc_return_release) return arch_atomic_inc_return_release(v); #elif defined(arch_atomic_inc_return_relaxed) __atomic_release_fence(); return arch_atomic_inc_return_relaxed(v); #elif defined(arch_atomic_inc_return) return arch_atomic_inc_return(v); #else return raw_atomic_add_return_release(1, v); #endif } /** * raw_atomic_inc_return_relaxed() - atomic increment with relaxed ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_inc_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_inc_return_relaxed(atomic_t *v) { #if defined(arch_atomic_inc_return_relaxed) return arch_atomic_inc_return_relaxed(v); #elif defined(arch_atomic_inc_return) return arch_atomic_inc_return(v); #else return raw_atomic_add_return_relaxed(1, v); #endif } /** * raw_atomic_fetch_inc() - atomic increment with full ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_inc() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_inc(atomic_t *v) { #if defined(arch_atomic_fetch_inc) return arch_atomic_fetch_inc(v); #elif defined(arch_atomic_fetch_inc_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_inc_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic_fetch_add(1, v); #endif } /** * raw_atomic_fetch_inc_acquire() - atomic increment with acquire ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_inc_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_inc_acquire(atomic_t *v) { #if defined(arch_atomic_fetch_inc_acquire) return arch_atomic_fetch_inc_acquire(v); #elif defined(arch_atomic_fetch_inc_relaxed) int ret = arch_atomic_fetch_inc_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_inc) return arch_atomic_fetch_inc(v); #else return raw_atomic_fetch_add_acquire(1, v); #endif } /** * raw_atomic_fetch_inc_release() - atomic increment with release ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_inc_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_inc_release(atomic_t *v) { #if defined(arch_atomic_fetch_inc_release) return arch_atomic_fetch_inc_release(v); #elif defined(arch_atomic_fetch_inc_relaxed) __atomic_release_fence(); return arch_atomic_fetch_inc_relaxed(v); #elif defined(arch_atomic_fetch_inc) return arch_atomic_fetch_inc(v); #else return raw_atomic_fetch_add_release(1, v); #endif } /** * raw_atomic_fetch_inc_relaxed() - atomic increment with relaxed ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_inc_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_inc_relaxed(atomic_t *v) { #if defined(arch_atomic_fetch_inc_relaxed) return arch_atomic_fetch_inc_relaxed(v); #elif defined(arch_atomic_fetch_inc) return arch_atomic_fetch_inc(v); #else return raw_atomic_fetch_add_relaxed(1, v); #endif } /** * raw_atomic_dec() - atomic decrement with relaxed ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_dec() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_dec(atomic_t *v) { #if defined(arch_atomic_dec) arch_atomic_dec(v); #else raw_atomic_sub(1, v); #endif } /** * raw_atomic_dec_return() - atomic decrement with full ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with full ordering. * * Safe to use in noinstr code; prefer atomic_dec_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_dec_return(atomic_t *v) { #if defined(arch_atomic_dec_return) return arch_atomic_dec_return(v); #elif defined(arch_atomic_dec_return_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_dec_return_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic_sub_return(1, v); #endif } /** * raw_atomic_dec_return_acquire() - atomic decrement with acquire ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_dec_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_dec_return_acquire(atomic_t *v) { #if defined(arch_atomic_dec_return_acquire) return arch_atomic_dec_return_acquire(v); #elif defined(arch_atomic_dec_return_relaxed) int ret = arch_atomic_dec_return_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_dec_return) return arch_atomic_dec_return(v); #else return raw_atomic_sub_return_acquire(1, v); #endif } /** * raw_atomic_dec_return_release() - atomic decrement with release ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with release ordering. * * Safe to use in noinstr code; prefer atomic_dec_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_dec_return_release(atomic_t *v) { #if defined(arch_atomic_dec_return_release) return arch_atomic_dec_return_release(v); #elif defined(arch_atomic_dec_return_relaxed) __atomic_release_fence(); return arch_atomic_dec_return_relaxed(v); #elif defined(arch_atomic_dec_return) return arch_atomic_dec_return(v); #else return raw_atomic_sub_return_release(1, v); #endif } /** * raw_atomic_dec_return_relaxed() - atomic decrement with relaxed ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_dec_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline int raw_atomic_dec_return_relaxed(atomic_t *v) { #if defined(arch_atomic_dec_return_relaxed) return arch_atomic_dec_return_relaxed(v); #elif defined(arch_atomic_dec_return) return arch_atomic_dec_return(v); #else return raw_atomic_sub_return_relaxed(1, v); #endif } /** * raw_atomic_fetch_dec() - atomic decrement with full ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_dec() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_dec(atomic_t *v) { #if defined(arch_atomic_fetch_dec) return arch_atomic_fetch_dec(v); #elif defined(arch_atomic_fetch_dec_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_dec_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic_fetch_sub(1, v); #endif } /** * raw_atomic_fetch_dec_acquire() - atomic decrement with acquire ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_dec_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_dec_acquire(atomic_t *v) { #if defined(arch_atomic_fetch_dec_acquire) return arch_atomic_fetch_dec_acquire(v); #elif defined(arch_atomic_fetch_dec_relaxed) int ret = arch_atomic_fetch_dec_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_dec) return arch_atomic_fetch_dec(v); #else return raw_atomic_fetch_sub_acquire(1, v); #endif } /** * raw_atomic_fetch_dec_release() - atomic decrement with release ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_dec_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_dec_release(atomic_t *v) { #if defined(arch_atomic_fetch_dec_release) return arch_atomic_fetch_dec_release(v); #elif defined(arch_atomic_fetch_dec_relaxed) __atomic_release_fence(); return arch_atomic_fetch_dec_relaxed(v); #elif defined(arch_atomic_fetch_dec) return arch_atomic_fetch_dec(v); #else return raw_atomic_fetch_sub_release(1, v); #endif } /** * raw_atomic_fetch_dec_relaxed() - atomic decrement with relaxed ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_dec_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_dec_relaxed(atomic_t *v) { #if defined(arch_atomic_fetch_dec_relaxed) return arch_atomic_fetch_dec_relaxed(v); #elif defined(arch_atomic_fetch_dec) return arch_atomic_fetch_dec(v); #else return raw_atomic_fetch_sub_relaxed(1, v); #endif } /** * raw_atomic_and() - atomic bitwise AND with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_and() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_and(int i, atomic_t *v) { arch_atomic_and(i, v); } /** * raw_atomic_fetch_and() - atomic bitwise AND with full ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_and() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_and(int i, atomic_t *v) { #if defined(arch_atomic_fetch_and) return arch_atomic_fetch_and(i, v); #elif defined(arch_atomic_fetch_and_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_and_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_fetch_and" #endif } /** * raw_atomic_fetch_and_acquire() - atomic bitwise AND with acquire ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_and_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_and_acquire(int i, atomic_t *v) { #if defined(arch_atomic_fetch_and_acquire) return arch_atomic_fetch_and_acquire(i, v); #elif defined(arch_atomic_fetch_and_relaxed) int ret = arch_atomic_fetch_and_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_and) return arch_atomic_fetch_and(i, v); #else #error "Unable to define raw_atomic_fetch_and_acquire" #endif } /** * raw_atomic_fetch_and_release() - atomic bitwise AND with release ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_and_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_and_release(int i, atomic_t *v) { #if defined(arch_atomic_fetch_and_release) return arch_atomic_fetch_and_release(i, v); #elif defined(arch_atomic_fetch_and_relaxed) __atomic_release_fence(); return arch_atomic_fetch_and_relaxed(i, v); #elif defined(arch_atomic_fetch_and) return arch_atomic_fetch_and(i, v); #else #error "Unable to define raw_atomic_fetch_and_release" #endif } /** * raw_atomic_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_and_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_and_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_fetch_and_relaxed) return arch_atomic_fetch_and_relaxed(i, v); #elif defined(arch_atomic_fetch_and) return arch_atomic_fetch_and(i, v); #else #error "Unable to define raw_atomic_fetch_and_relaxed" #endif } /** * raw_atomic_andnot() - atomic bitwise AND NOT with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & ~@i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_andnot() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_andnot(int i, atomic_t *v) { #if defined(arch_atomic_andnot) arch_atomic_andnot(i, v); #else raw_atomic_and(~i, v); #endif } /** * raw_atomic_fetch_andnot() - atomic bitwise AND NOT with full ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & ~@i) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_andnot() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_andnot(int i, atomic_t *v) { #if defined(arch_atomic_fetch_andnot) return arch_atomic_fetch_andnot(i, v); #elif defined(arch_atomic_fetch_andnot_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_andnot_relaxed(i, v); __atomic_post_full_fence(); return ret; #else return raw_atomic_fetch_and(~i, v); #endif } /** * raw_atomic_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & ~@i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_andnot_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_andnot_acquire(int i, atomic_t *v) { #if defined(arch_atomic_fetch_andnot_acquire) return arch_atomic_fetch_andnot_acquire(i, v); #elif defined(arch_atomic_fetch_andnot_relaxed) int ret = arch_atomic_fetch_andnot_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_andnot) return arch_atomic_fetch_andnot(i, v); #else return raw_atomic_fetch_and_acquire(~i, v); #endif } /** * raw_atomic_fetch_andnot_release() - atomic bitwise AND NOT with release ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & ~@i) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_andnot_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_andnot_release(int i, atomic_t *v) { #if defined(arch_atomic_fetch_andnot_release) return arch_atomic_fetch_andnot_release(i, v); #elif defined(arch_atomic_fetch_andnot_relaxed) __atomic_release_fence(); return arch_atomic_fetch_andnot_relaxed(i, v); #elif defined(arch_atomic_fetch_andnot) return arch_atomic_fetch_andnot(i, v); #else return raw_atomic_fetch_and_release(~i, v); #endif } /** * raw_atomic_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v & ~@i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_andnot_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_andnot_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_fetch_andnot_relaxed) return arch_atomic_fetch_andnot_relaxed(i, v); #elif defined(arch_atomic_fetch_andnot) return arch_atomic_fetch_andnot(i, v); #else return raw_atomic_fetch_and_relaxed(~i, v); #endif } /** * raw_atomic_or() - atomic bitwise OR with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v | @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_or() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_or(int i, atomic_t *v) { arch_atomic_or(i, v); } /** * raw_atomic_fetch_or() - atomic bitwise OR with full ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v | @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_or() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_or(int i, atomic_t *v) { #if defined(arch_atomic_fetch_or) return arch_atomic_fetch_or(i, v); #elif defined(arch_atomic_fetch_or_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_fetch_or" #endif } /** * raw_atomic_fetch_or_acquire() - atomic bitwise OR with acquire ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v | @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_or_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_or_acquire(int i, atomic_t *v) { #if defined(arch_atomic_fetch_or_acquire) return arch_atomic_fetch_or_acquire(i, v); #elif defined(arch_atomic_fetch_or_relaxed) int ret = arch_atomic_fetch_or_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_or) return arch_atomic_fetch_or(i, v); #else #error "Unable to define raw_atomic_fetch_or_acquire" #endif } /** * raw_atomic_fetch_or_release() - atomic bitwise OR with release ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v | @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_or_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_or_release(int i, atomic_t *v) { #if defined(arch_atomic_fetch_or_release) return arch_atomic_fetch_or_release(i, v); #elif defined(arch_atomic_fetch_or_relaxed) __atomic_release_fence(); return arch_atomic_fetch_or_relaxed(i, v); #elif defined(arch_atomic_fetch_or) return arch_atomic_fetch_or(i, v); #else #error "Unable to define raw_atomic_fetch_or_release" #endif } /** * raw_atomic_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v | @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_or_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_or_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_fetch_or_relaxed) return arch_atomic_fetch_or_relaxed(i, v); #elif defined(arch_atomic_fetch_or) return arch_atomic_fetch_or(i, v); #else #error "Unable to define raw_atomic_fetch_or_relaxed" #endif } /** * raw_atomic_xor() - atomic bitwise XOR with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v ^ @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_xor() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic_xor(int i, atomic_t *v) { arch_atomic_xor(i, v); } /** * raw_atomic_fetch_xor() - atomic bitwise XOR with full ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v ^ @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_fetch_xor() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_xor(int i, atomic_t *v) { #if defined(arch_atomic_fetch_xor) return arch_atomic_fetch_xor(i, v); #elif defined(arch_atomic_fetch_xor_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_fetch_xor_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic_fetch_xor" #endif } /** * raw_atomic_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v ^ @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_fetch_xor_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_xor_acquire(int i, atomic_t *v) { #if defined(arch_atomic_fetch_xor_acquire) return arch_atomic_fetch_xor_acquire(i, v); #elif defined(arch_atomic_fetch_xor_relaxed) int ret = arch_atomic_fetch_xor_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_fetch_xor) return arch_atomic_fetch_xor(i, v); #else #error "Unable to define raw_atomic_fetch_xor_acquire" #endif } /** * raw_atomic_fetch_xor_release() - atomic bitwise XOR with release ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v ^ @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_fetch_xor_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_xor_release(int i, atomic_t *v) { #if defined(arch_atomic_fetch_xor_release) return arch_atomic_fetch_xor_release(i, v); #elif defined(arch_atomic_fetch_xor_relaxed) __atomic_release_fence(); return arch_atomic_fetch_xor_relaxed(i, v); #elif defined(arch_atomic_fetch_xor) return arch_atomic_fetch_xor(i, v); #else #error "Unable to define raw_atomic_fetch_xor_release" #endif } /** * raw_atomic_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering * @i: int value * @v: pointer to atomic_t * * Atomically updates @v to (@v ^ @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_fetch_xor_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_xor_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_fetch_xor_relaxed) return arch_atomic_fetch_xor_relaxed(i, v); #elif defined(arch_atomic_fetch_xor) return arch_atomic_fetch_xor(i, v); #else #error "Unable to define raw_atomic_fetch_xor_relaxed" #endif } /** * raw_atomic_xchg() - atomic exchange with full ordering * @v: pointer to atomic_t * @new: int value to assign * * Atomically updates @v to @new with full ordering. * * Safe to use in noinstr code; prefer atomic_xchg() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_xchg(atomic_t *v, int new) { #if defined(arch_atomic_xchg) return arch_atomic_xchg(v, new); #elif defined(arch_atomic_xchg_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_xchg_relaxed(v, new); __atomic_post_full_fence(); return ret; #else return raw_xchg(&v->counter, new); #endif } /** * raw_atomic_xchg_acquire() - atomic exchange with acquire ordering * @v: pointer to atomic_t * @new: int value to assign * * Atomically updates @v to @new with acquire ordering. * * Safe to use in noinstr code; prefer atomic_xchg_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_xchg_acquire(atomic_t *v, int new) { #if defined(arch_atomic_xchg_acquire) return arch_atomic_xchg_acquire(v, new); #elif defined(arch_atomic_xchg_relaxed) int ret = arch_atomic_xchg_relaxed(v, new); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_xchg) return arch_atomic_xchg(v, new); #else return raw_xchg_acquire(&v->counter, new); #endif } /** * raw_atomic_xchg_release() - atomic exchange with release ordering * @v: pointer to atomic_t * @new: int value to assign * * Atomically updates @v to @new with release ordering. * * Safe to use in noinstr code; prefer atomic_xchg_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_xchg_release(atomic_t *v, int new) { #if defined(arch_atomic_xchg_release) return arch_atomic_xchg_release(v, new); #elif defined(arch_atomic_xchg_relaxed) __atomic_release_fence(); return arch_atomic_xchg_relaxed(v, new); #elif defined(arch_atomic_xchg) return arch_atomic_xchg(v, new); #else return raw_xchg_release(&v->counter, new); #endif } /** * raw_atomic_xchg_relaxed() - atomic exchange with relaxed ordering * @v: pointer to atomic_t * @new: int value to assign * * Atomically updates @v to @new with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_xchg_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_xchg_relaxed(atomic_t *v, int new) { #if defined(arch_atomic_xchg_relaxed) return arch_atomic_xchg_relaxed(v, new); #elif defined(arch_atomic_xchg) return arch_atomic_xchg(v, new); #else return raw_xchg_relaxed(&v->counter, new); #endif } /** * raw_atomic_cmpxchg() - atomic compare and exchange with full ordering * @v: pointer to atomic_t * @old: int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_cmpxchg(atomic_t *v, int old, int new) { #if defined(arch_atomic_cmpxchg) return arch_atomic_cmpxchg(v, old, new); #elif defined(arch_atomic_cmpxchg_relaxed) int ret; __atomic_pre_full_fence(); ret = arch_atomic_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; #else return raw_cmpxchg(&v->counter, old, new); #endif } /** * raw_atomic_cmpxchg_acquire() - atomic compare and exchange with acquire ordering * @v: pointer to atomic_t * @old: int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_cmpxchg_acquire(atomic_t *v, int old, int new) { #if defined(arch_atomic_cmpxchg_acquire) return arch_atomic_cmpxchg_acquire(v, old, new); #elif defined(arch_atomic_cmpxchg_relaxed) int ret = arch_atomic_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_cmpxchg) return arch_atomic_cmpxchg(v, old, new); #else return raw_cmpxchg_acquire(&v->counter, old, new); #endif } /** * raw_atomic_cmpxchg_release() - atomic compare and exchange with release ordering * @v: pointer to atomic_t * @old: int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_release() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_cmpxchg_release(atomic_t *v, int old, int new) { #if defined(arch_atomic_cmpxchg_release) return arch_atomic_cmpxchg_release(v, old, new); #elif defined(arch_atomic_cmpxchg_relaxed) __atomic_release_fence(); return arch_atomic_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic_cmpxchg) return arch_atomic_cmpxchg(v, old, new); #else return raw_cmpxchg_release(&v->counter, old, new); #endif } /** * raw_atomic_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering * @v: pointer to atomic_t * @old: int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_cmpxchg_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_cmpxchg_relaxed(atomic_t *v, int old, int new) { #if defined(arch_atomic_cmpxchg_relaxed) return arch_atomic_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic_cmpxchg) return arch_atomic_cmpxchg(v, old, new); #else return raw_cmpxchg_relaxed(&v->counter, old, new); #endif } /** * raw_atomic_try_cmpxchg() - atomic compare and exchange with full ordering * @v: pointer to atomic_t * @old: pointer to int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg(atomic_t *v, int *old, int new) { #if defined(arch_atomic_try_cmpxchg) return arch_atomic_try_cmpxchg(v, old, new); #elif defined(arch_atomic_try_cmpxchg_relaxed) bool ret; __atomic_pre_full_fence(); ret = arch_atomic_try_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; #else int r, o = *old; r = raw_atomic_cmpxchg(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering * @v: pointer to atomic_t * @old: pointer to int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_acquire() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { #if defined(arch_atomic_try_cmpxchg_acquire) return arch_atomic_try_cmpxchg_acquire(v, old, new); #elif defined(arch_atomic_try_cmpxchg_relaxed) bool ret = arch_atomic_try_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_try_cmpxchg) return arch_atomic_try_cmpxchg(v, old, new); #else int r, o = *old; r = raw_atomic_cmpxchg_acquire(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic_try_cmpxchg_release() - atomic compare and exchange with release ordering * @v: pointer to atomic_t * @old: pointer to int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_release() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { #if defined(arch_atomic_try_cmpxchg_release) return arch_atomic_try_cmpxchg_release(v, old, new); #elif defined(arch_atomic_try_cmpxchg_relaxed) __atomic_release_fence(); return arch_atomic_try_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic_try_cmpxchg) return arch_atomic_try_cmpxchg(v, old, new); #else int r, o = *old; r = raw_atomic_cmpxchg_release(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering * @v: pointer to atomic_t * @old: pointer to int value to compare with * @new: int value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_try_cmpxchg_relaxed() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { #if defined(arch_atomic_try_cmpxchg_relaxed) return arch_atomic_try_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic_try_cmpxchg) return arch_atomic_try_cmpxchg(v, old, new); #else int r, o = *old; r = raw_atomic_cmpxchg_relaxed(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic_sub_and_test() - atomic subtract and test if zero with full ordering * @i: int value to subtract * @v: pointer to atomic_t * * Atomically updates @v to (@v - @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_sub_and_test() elsewhere. * * Return: @true if the resulting value of @v is zero, @false otherwise. */ static __always_inline bool raw_atomic_sub_and_test(int i, atomic_t *v) { #if defined(arch_atomic_sub_and_test) return arch_atomic_sub_and_test(i, v); #else return raw_atomic_sub_return(i, v) == 0; #endif } /** * raw_atomic_dec_and_test() - atomic decrement and test if zero with full ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v - 1) with full ordering. * * Safe to use in noinstr code; prefer atomic_dec_and_test() elsewhere. * * Return: @true if the resulting value of @v is zero, @false otherwise. */ static __always_inline bool raw_atomic_dec_and_test(atomic_t *v) { #if defined(arch_atomic_dec_and_test) return arch_atomic_dec_and_test(v); #else return raw_atomic_dec_return(v) == 0; #endif } /** * raw_atomic_inc_and_test() - atomic increment and test if zero with full ordering * @v: pointer to atomic_t * * Atomically updates @v to (@v + 1) with full ordering. * * Safe to use in noinstr code; prefer atomic_inc_and_test() elsewhere. * * Return: @true if the resulting value of @v is zero, @false otherwise. */ static __always_inline bool raw_atomic_inc_and_test(atomic_t *v) { #if defined(arch_atomic_inc_and_test) return arch_atomic_inc_and_test(v); #else return raw_atomic_inc_return(v) == 0; #endif } /** * raw_atomic_add_negative() - atomic add and test if negative with full ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with full ordering. * * Safe to use in noinstr code; prefer atomic_add_negative() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic_add_negative(int i, atomic_t *v) { #if defined(arch_atomic_add_negative) return arch_atomic_add_negative(i, v); #elif defined(arch_atomic_add_negative_relaxed) bool ret; __atomic_pre_full_fence(); ret = arch_atomic_add_negative_relaxed(i, v); __atomic_post_full_fence(); return ret; #else return raw_atomic_add_return(i, v) < 0; #endif } /** * raw_atomic_add_negative_acquire() - atomic add and test if negative with acquire ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic_add_negative_acquire() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic_add_negative_acquire(int i, atomic_t *v) { #if defined(arch_atomic_add_negative_acquire) return arch_atomic_add_negative_acquire(i, v); #elif defined(arch_atomic_add_negative_relaxed) bool ret = arch_atomic_add_negative_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic_add_negative) return arch_atomic_add_negative(i, v); #else return raw_atomic_add_return_acquire(i, v) < 0; #endif } /** * raw_atomic_add_negative_release() - atomic add and test if negative with release ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with release ordering. * * Safe to use in noinstr code; prefer atomic_add_negative_release() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic_add_negative_release(int i, atomic_t *v) { #if defined(arch_atomic_add_negative_release) return arch_atomic_add_negative_release(i, v); #elif defined(arch_atomic_add_negative_relaxed) __atomic_release_fence(); return arch_atomic_add_negative_relaxed(i, v); #elif defined(arch_atomic_add_negative) return arch_atomic_add_negative(i, v); #else return raw_atomic_add_return_release(i, v) < 0; #endif } /** * raw_atomic_add_negative_relaxed() - atomic add and test if negative with relaxed ordering * @i: int value to add * @v: pointer to atomic_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic_add_negative_relaxed() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic_add_negative_relaxed(int i, atomic_t *v) { #if defined(arch_atomic_add_negative_relaxed) return arch_atomic_add_negative_relaxed(i, v); #elif defined(arch_atomic_add_negative) return arch_atomic_add_negative(i, v); #else return raw_atomic_add_return_relaxed(i, v) < 0; #endif } /** * raw_atomic_fetch_add_unless() - atomic add unless value with full ordering * @v: pointer to atomic_t * @a: int value to add * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_fetch_add_unless() elsewhere. * * Return: The original value of @v. */ static __always_inline int raw_atomic_fetch_add_unless(atomic_t *v, int a, int u) { #if defined(arch_atomic_fetch_add_unless) return arch_atomic_fetch_add_unless(v, a, u); #else int c = raw_atomic_read(v); do { if (unlikely(c == u)) break; } while (!raw_atomic_try_cmpxchg(v, &c, c + a)); return c; #endif } /** * raw_atomic_add_unless() - atomic add unless value with full ordering * @v: pointer to atomic_t * @a: int value to add * @u: int value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_add_unless() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic_add_unless(atomic_t *v, int a, int u) { #if defined(arch_atomic_add_unless) return arch_atomic_add_unless(v, a, u); #else return raw_atomic_fetch_add_unless(v, a, u) != u; #endif } /** * raw_atomic_inc_not_zero() - atomic increment unless zero with full ordering * @v: pointer to atomic_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_not_zero() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic_inc_not_zero(atomic_t *v) { #if defined(arch_atomic_inc_not_zero) return arch_atomic_inc_not_zero(v); #else return raw_atomic_add_unless(v, 1, 0); #endif } /** * raw_atomic_inc_unless_negative() - atomic increment unless negative with full ordering * @v: pointer to atomic_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_inc_unless_negative() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic_inc_unless_negative(atomic_t *v) { #if defined(arch_atomic_inc_unless_negative) return arch_atomic_inc_unless_negative(v); #else int c = raw_atomic_read(v); do { if (unlikely(c < 0)) return false; } while (!raw_atomic_try_cmpxchg(v, &c, c + 1)); return true; #endif } /** * raw_atomic_dec_unless_positive() - atomic decrement unless positive with full ordering * @v: pointer to atomic_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_unless_positive() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic_dec_unless_positive(atomic_t *v) { #if defined(arch_atomic_dec_unless_positive) return arch_atomic_dec_unless_positive(v); #else int c = raw_atomic_read(v); do { if (unlikely(c > 0)) return false; } while (!raw_atomic_try_cmpxchg(v, &c, c - 1)); return true; #endif } /** * raw_atomic_dec_if_positive() - atomic decrement if positive with full ordering * @v: pointer to atomic_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic_dec_if_positive() elsewhere. * * Return: The old value of (@v - 1), regardless of whether @v was updated. */ static __always_inline int raw_atomic_dec_if_positive(atomic_t *v) { #if defined(arch_atomic_dec_if_positive) return arch_atomic_dec_if_positive(v); #else int dec, c = raw_atomic_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; } while (!raw_atomic_try_cmpxchg(v, &c, dec)); return dec; #endif } #ifdef CONFIG_GENERIC_ATOMIC64 #include <asm-generic/atomic64.h> #endif /** * raw_atomic64_read() - atomic load with relaxed ordering * @v: pointer to atomic64_t * * Atomically loads the value of @v with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_read() elsewhere. * * Return: The value loaded from @v. */ static __always_inline s64 raw_atomic64_read(const atomic64_t *v) { return arch_atomic64_read(v); } /** * raw_atomic64_read_acquire() - atomic load with acquire ordering * @v: pointer to atomic64_t * * Atomically loads the value of @v with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_read_acquire() elsewhere. * * Return: The value loaded from @v. */ static __always_inline s64 raw_atomic64_read_acquire(const atomic64_t *v) { #if defined(arch_atomic64_read_acquire) return arch_atomic64_read_acquire(v); #else s64 ret; if (__native_word(atomic64_t)) { ret = smp_load_acquire(&(v)->counter); } else { ret = raw_atomic64_read(v); __atomic_acquire_fence(); } return ret; #endif } /** * raw_atomic64_set() - atomic set with relaxed ordering * @v: pointer to atomic64_t * @i: s64 value to assign * * Atomically sets @v to @i with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_set() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_set(atomic64_t *v, s64 i) { arch_atomic64_set(v, i); } /** * raw_atomic64_set_release() - atomic set with release ordering * @v: pointer to atomic64_t * @i: s64 value to assign * * Atomically sets @v to @i with release ordering. * * Safe to use in noinstr code; prefer atomic64_set_release() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_set_release(atomic64_t *v, s64 i) { #if defined(arch_atomic64_set_release) arch_atomic64_set_release(v, i); #else if (__native_word(atomic64_t)) { smp_store_release(&(v)->counter, i); } else { __atomic_release_fence(); raw_atomic64_set(v, i); } #endif } /** * raw_atomic64_add() - atomic add with relaxed ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_add() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_add(s64 i, atomic64_t *v) { arch_atomic64_add(i, v); } /** * raw_atomic64_add_return() - atomic add with full ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_add_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_add_return(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_return) return arch_atomic64_add_return(i, v); #elif defined(arch_atomic64_add_return_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_add_return_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_add_return" #endif } /** * raw_atomic64_add_return_acquire() - atomic add with acquire ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_add_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_add_return_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_return_acquire) return arch_atomic64_add_return_acquire(i, v); #elif defined(arch_atomic64_add_return_relaxed) s64 ret = arch_atomic64_add_return_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_add_return) return arch_atomic64_add_return(i, v); #else #error "Unable to define raw_atomic64_add_return_acquire" #endif } /** * raw_atomic64_add_return_release() - atomic add with release ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_add_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_add_return_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_return_release) return arch_atomic64_add_return_release(i, v); #elif defined(arch_atomic64_add_return_relaxed) __atomic_release_fence(); return arch_atomic64_add_return_relaxed(i, v); #elif defined(arch_atomic64_add_return) return arch_atomic64_add_return(i, v); #else #error "Unable to define raw_atomic64_add_return_release" #endif } /** * raw_atomic64_add_return_relaxed() - atomic add with relaxed ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_add_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_add_return_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_return_relaxed) return arch_atomic64_add_return_relaxed(i, v); #elif defined(arch_atomic64_add_return) return arch_atomic64_add_return(i, v); #else #error "Unable to define raw_atomic64_add_return_relaxed" #endif } /** * raw_atomic64_fetch_add() - atomic add with full ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_add() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_add(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_add) return arch_atomic64_fetch_add(i, v); #elif defined(arch_atomic64_fetch_add_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_add_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_fetch_add" #endif } /** * raw_atomic64_fetch_add_acquire() - atomic add with acquire ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_add_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_add_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_add_acquire) return arch_atomic64_fetch_add_acquire(i, v); #elif defined(arch_atomic64_fetch_add_relaxed) s64 ret = arch_atomic64_fetch_add_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_add) return arch_atomic64_fetch_add(i, v); #else #error "Unable to define raw_atomic64_fetch_add_acquire" #endif } /** * raw_atomic64_fetch_add_release() - atomic add with release ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_add_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_add_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_add_release) return arch_atomic64_fetch_add_release(i, v); #elif defined(arch_atomic64_fetch_add_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_add_relaxed(i, v); #elif defined(arch_atomic64_fetch_add) return arch_atomic64_fetch_add(i, v); #else #error "Unable to define raw_atomic64_fetch_add_release" #endif } /** * raw_atomic64_fetch_add_relaxed() - atomic add with relaxed ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_add_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_add_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_add_relaxed) return arch_atomic64_fetch_add_relaxed(i, v); #elif defined(arch_atomic64_fetch_add) return arch_atomic64_fetch_add(i, v); #else #error "Unable to define raw_atomic64_fetch_add_relaxed" #endif } /** * raw_atomic64_sub() - atomic subtract with relaxed ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_sub() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_sub(s64 i, atomic64_t *v) { arch_atomic64_sub(i, v); } /** * raw_atomic64_sub_return() - atomic subtract with full ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_sub_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_sub_return(s64 i, atomic64_t *v) { #if defined(arch_atomic64_sub_return) return arch_atomic64_sub_return(i, v); #elif defined(arch_atomic64_sub_return_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_sub_return_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_sub_return" #endif } /** * raw_atomic64_sub_return_acquire() - atomic subtract with acquire ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_sub_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_sub_return_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_sub_return_acquire) return arch_atomic64_sub_return_acquire(i, v); #elif defined(arch_atomic64_sub_return_relaxed) s64 ret = arch_atomic64_sub_return_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_sub_return) return arch_atomic64_sub_return(i, v); #else #error "Unable to define raw_atomic64_sub_return_acquire" #endif } /** * raw_atomic64_sub_return_release() - atomic subtract with release ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_sub_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_sub_return_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_sub_return_release) return arch_atomic64_sub_return_release(i, v); #elif defined(arch_atomic64_sub_return_relaxed) __atomic_release_fence(); return arch_atomic64_sub_return_relaxed(i, v); #elif defined(arch_atomic64_sub_return) return arch_atomic64_sub_return(i, v); #else #error "Unable to define raw_atomic64_sub_return_release" #endif } /** * raw_atomic64_sub_return_relaxed() - atomic subtract with relaxed ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_sub_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_sub_return_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_sub_return_relaxed) return arch_atomic64_sub_return_relaxed(i, v); #elif defined(arch_atomic64_sub_return) return arch_atomic64_sub_return(i, v); #else #error "Unable to define raw_atomic64_sub_return_relaxed" #endif } /** * raw_atomic64_fetch_sub() - atomic subtract with full ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_sub() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_sub(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_sub) return arch_atomic64_fetch_sub(i, v); #elif defined(arch_atomic64_fetch_sub_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_sub_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_fetch_sub" #endif } /** * raw_atomic64_fetch_sub_acquire() - atomic subtract with acquire ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_sub_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_sub_acquire) return arch_atomic64_fetch_sub_acquire(i, v); #elif defined(arch_atomic64_fetch_sub_relaxed) s64 ret = arch_atomic64_fetch_sub_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_sub) return arch_atomic64_fetch_sub(i, v); #else #error "Unable to define raw_atomic64_fetch_sub_acquire" #endif } /** * raw_atomic64_fetch_sub_release() - atomic subtract with release ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_sub_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_sub_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_sub_release) return arch_atomic64_fetch_sub_release(i, v); #elif defined(arch_atomic64_fetch_sub_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_sub_relaxed(i, v); #elif defined(arch_atomic64_fetch_sub) return arch_atomic64_fetch_sub(i, v); #else #error "Unable to define raw_atomic64_fetch_sub_release" #endif } /** * raw_atomic64_fetch_sub_relaxed() - atomic subtract with relaxed ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_sub_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_sub_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_sub_relaxed) return arch_atomic64_fetch_sub_relaxed(i, v); #elif defined(arch_atomic64_fetch_sub) return arch_atomic64_fetch_sub(i, v); #else #error "Unable to define raw_atomic64_fetch_sub_relaxed" #endif } /** * raw_atomic64_inc() - atomic increment with relaxed ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_inc() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_inc(atomic64_t *v) { #if defined(arch_atomic64_inc) arch_atomic64_inc(v); #else raw_atomic64_add(1, v); #endif } /** * raw_atomic64_inc_return() - atomic increment with full ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with full ordering. * * Safe to use in noinstr code; prefer atomic64_inc_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_inc_return(atomic64_t *v) { #if defined(arch_atomic64_inc_return) return arch_atomic64_inc_return(v); #elif defined(arch_atomic64_inc_return_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_inc_return_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic64_add_return(1, v); #endif } /** * raw_atomic64_inc_return_acquire() - atomic increment with acquire ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_inc_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_inc_return_acquire(atomic64_t *v) { #if defined(arch_atomic64_inc_return_acquire) return arch_atomic64_inc_return_acquire(v); #elif defined(arch_atomic64_inc_return_relaxed) s64 ret = arch_atomic64_inc_return_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_inc_return) return arch_atomic64_inc_return(v); #else return raw_atomic64_add_return_acquire(1, v); #endif } /** * raw_atomic64_inc_return_release() - atomic increment with release ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with release ordering. * * Safe to use in noinstr code; prefer atomic64_inc_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_inc_return_release(atomic64_t *v) { #if defined(arch_atomic64_inc_return_release) return arch_atomic64_inc_return_release(v); #elif defined(arch_atomic64_inc_return_relaxed) __atomic_release_fence(); return arch_atomic64_inc_return_relaxed(v); #elif defined(arch_atomic64_inc_return) return arch_atomic64_inc_return(v); #else return raw_atomic64_add_return_release(1, v); #endif } /** * raw_atomic64_inc_return_relaxed() - atomic increment with relaxed ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_inc_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_inc_return_relaxed(atomic64_t *v) { #if defined(arch_atomic64_inc_return_relaxed) return arch_atomic64_inc_return_relaxed(v); #elif defined(arch_atomic64_inc_return) return arch_atomic64_inc_return(v); #else return raw_atomic64_add_return_relaxed(1, v); #endif } /** * raw_atomic64_fetch_inc() - atomic increment with full ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_inc() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_inc(atomic64_t *v) { #if defined(arch_atomic64_fetch_inc) return arch_atomic64_fetch_inc(v); #elif defined(arch_atomic64_fetch_inc_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_inc_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic64_fetch_add(1, v); #endif } /** * raw_atomic64_fetch_inc_acquire() - atomic increment with acquire ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_inc_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_inc_acquire(atomic64_t *v) { #if defined(arch_atomic64_fetch_inc_acquire) return arch_atomic64_fetch_inc_acquire(v); #elif defined(arch_atomic64_fetch_inc_relaxed) s64 ret = arch_atomic64_fetch_inc_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_inc) return arch_atomic64_fetch_inc(v); #else return raw_atomic64_fetch_add_acquire(1, v); #endif } /** * raw_atomic64_fetch_inc_release() - atomic increment with release ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_inc_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_inc_release(atomic64_t *v) { #if defined(arch_atomic64_fetch_inc_release) return arch_atomic64_fetch_inc_release(v); #elif defined(arch_atomic64_fetch_inc_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_inc_relaxed(v); #elif defined(arch_atomic64_fetch_inc) return arch_atomic64_fetch_inc(v); #else return raw_atomic64_fetch_add_release(1, v); #endif } /** * raw_atomic64_fetch_inc_relaxed() - atomic increment with relaxed ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_inc_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_inc_relaxed(atomic64_t *v) { #if defined(arch_atomic64_fetch_inc_relaxed) return arch_atomic64_fetch_inc_relaxed(v); #elif defined(arch_atomic64_fetch_inc) return arch_atomic64_fetch_inc(v); #else return raw_atomic64_fetch_add_relaxed(1, v); #endif } /** * raw_atomic64_dec() - atomic decrement with relaxed ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_dec() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_dec(atomic64_t *v) { #if defined(arch_atomic64_dec) arch_atomic64_dec(v); #else raw_atomic64_sub(1, v); #endif } /** * raw_atomic64_dec_return() - atomic decrement with full ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with full ordering. * * Safe to use in noinstr code; prefer atomic64_dec_return() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_dec_return(atomic64_t *v) { #if defined(arch_atomic64_dec_return) return arch_atomic64_dec_return(v); #elif defined(arch_atomic64_dec_return_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_dec_return_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic64_sub_return(1, v); #endif } /** * raw_atomic64_dec_return_acquire() - atomic decrement with acquire ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_dec_return_acquire() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_dec_return_acquire(atomic64_t *v) { #if defined(arch_atomic64_dec_return_acquire) return arch_atomic64_dec_return_acquire(v); #elif defined(arch_atomic64_dec_return_relaxed) s64 ret = arch_atomic64_dec_return_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_dec_return) return arch_atomic64_dec_return(v); #else return raw_atomic64_sub_return_acquire(1, v); #endif } /** * raw_atomic64_dec_return_release() - atomic decrement with release ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with release ordering. * * Safe to use in noinstr code; prefer atomic64_dec_return_release() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_dec_return_release(atomic64_t *v) { #if defined(arch_atomic64_dec_return_release) return arch_atomic64_dec_return_release(v); #elif defined(arch_atomic64_dec_return_relaxed) __atomic_release_fence(); return arch_atomic64_dec_return_relaxed(v); #elif defined(arch_atomic64_dec_return) return arch_atomic64_dec_return(v); #else return raw_atomic64_sub_return_release(1, v); #endif } /** * raw_atomic64_dec_return_relaxed() - atomic decrement with relaxed ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_dec_return_relaxed() elsewhere. * * Return: The updated value of @v. */ static __always_inline s64 raw_atomic64_dec_return_relaxed(atomic64_t *v) { #if defined(arch_atomic64_dec_return_relaxed) return arch_atomic64_dec_return_relaxed(v); #elif defined(arch_atomic64_dec_return) return arch_atomic64_dec_return(v); #else return raw_atomic64_sub_return_relaxed(1, v); #endif } /** * raw_atomic64_fetch_dec() - atomic decrement with full ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_dec() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_dec(atomic64_t *v) { #if defined(arch_atomic64_fetch_dec) return arch_atomic64_fetch_dec(v); #elif defined(arch_atomic64_fetch_dec_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_dec_relaxed(v); __atomic_post_full_fence(); return ret; #else return raw_atomic64_fetch_sub(1, v); #endif } /** * raw_atomic64_fetch_dec_acquire() - atomic decrement with acquire ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_dec_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_dec_acquire(atomic64_t *v) { #if defined(arch_atomic64_fetch_dec_acquire) return arch_atomic64_fetch_dec_acquire(v); #elif defined(arch_atomic64_fetch_dec_relaxed) s64 ret = arch_atomic64_fetch_dec_relaxed(v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_dec) return arch_atomic64_fetch_dec(v); #else return raw_atomic64_fetch_sub_acquire(1, v); #endif } /** * raw_atomic64_fetch_dec_release() - atomic decrement with release ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_dec_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_dec_release(atomic64_t *v) { #if defined(arch_atomic64_fetch_dec_release) return arch_atomic64_fetch_dec_release(v); #elif defined(arch_atomic64_fetch_dec_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_dec_relaxed(v); #elif defined(arch_atomic64_fetch_dec) return arch_atomic64_fetch_dec(v); #else return raw_atomic64_fetch_sub_release(1, v); #endif } /** * raw_atomic64_fetch_dec_relaxed() - atomic decrement with relaxed ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_dec_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_dec_relaxed(atomic64_t *v) { #if defined(arch_atomic64_fetch_dec_relaxed) return arch_atomic64_fetch_dec_relaxed(v); #elif defined(arch_atomic64_fetch_dec) return arch_atomic64_fetch_dec(v); #else return raw_atomic64_fetch_sub_relaxed(1, v); #endif } /** * raw_atomic64_and() - atomic bitwise AND with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_and() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_and(s64 i, atomic64_t *v) { arch_atomic64_and(i, v); } /** * raw_atomic64_fetch_and() - atomic bitwise AND with full ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_and() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_and(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_and) return arch_atomic64_fetch_and(i, v); #elif defined(arch_atomic64_fetch_and_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_and_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_fetch_and" #endif } /** * raw_atomic64_fetch_and_acquire() - atomic bitwise AND with acquire ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_and_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_and_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_and_acquire) return arch_atomic64_fetch_and_acquire(i, v); #elif defined(arch_atomic64_fetch_and_relaxed) s64 ret = arch_atomic64_fetch_and_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_and) return arch_atomic64_fetch_and(i, v); #else #error "Unable to define raw_atomic64_fetch_and_acquire" #endif } /** * raw_atomic64_fetch_and_release() - atomic bitwise AND with release ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_and_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_and_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_and_release) return arch_atomic64_fetch_and_release(i, v); #elif defined(arch_atomic64_fetch_and_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_and_relaxed(i, v); #elif defined(arch_atomic64_fetch_and) return arch_atomic64_fetch_and(i, v); #else #error "Unable to define raw_atomic64_fetch_and_release" #endif } /** * raw_atomic64_fetch_and_relaxed() - atomic bitwise AND with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_and_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_and_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_and_relaxed) return arch_atomic64_fetch_and_relaxed(i, v); #elif defined(arch_atomic64_fetch_and) return arch_atomic64_fetch_and(i, v); #else #error "Unable to define raw_atomic64_fetch_and_relaxed" #endif } /** * raw_atomic64_andnot() - atomic bitwise AND NOT with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & ~@i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_andnot() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_andnot(s64 i, atomic64_t *v) { #if defined(arch_atomic64_andnot) arch_atomic64_andnot(i, v); #else raw_atomic64_and(~i, v); #endif } /** * raw_atomic64_fetch_andnot() - atomic bitwise AND NOT with full ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & ~@i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_andnot() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_andnot(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_andnot) return arch_atomic64_fetch_andnot(i, v); #elif defined(arch_atomic64_fetch_andnot_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_andnot_relaxed(i, v); __atomic_post_full_fence(); return ret; #else return raw_atomic64_fetch_and(~i, v); #endif } /** * raw_atomic64_fetch_andnot_acquire() - atomic bitwise AND NOT with acquire ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & ~@i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_andnot_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_andnot_acquire) return arch_atomic64_fetch_andnot_acquire(i, v); #elif defined(arch_atomic64_fetch_andnot_relaxed) s64 ret = arch_atomic64_fetch_andnot_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_andnot) return arch_atomic64_fetch_andnot(i, v); #else return raw_atomic64_fetch_and_acquire(~i, v); #endif } /** * raw_atomic64_fetch_andnot_release() - atomic bitwise AND NOT with release ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & ~@i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_andnot_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_andnot_release) return arch_atomic64_fetch_andnot_release(i, v); #elif defined(arch_atomic64_fetch_andnot_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_andnot_relaxed(i, v); #elif defined(arch_atomic64_fetch_andnot) return arch_atomic64_fetch_andnot(i, v); #else return raw_atomic64_fetch_and_release(~i, v); #endif } /** * raw_atomic64_fetch_andnot_relaxed() - atomic bitwise AND NOT with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v & ~@i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_andnot_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_andnot_relaxed) return arch_atomic64_fetch_andnot_relaxed(i, v); #elif defined(arch_atomic64_fetch_andnot) return arch_atomic64_fetch_andnot(i, v); #else return raw_atomic64_fetch_and_relaxed(~i, v); #endif } /** * raw_atomic64_or() - atomic bitwise OR with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v | @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_or() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_or(s64 i, atomic64_t *v) { arch_atomic64_or(i, v); } /** * raw_atomic64_fetch_or() - atomic bitwise OR with full ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v | @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_or() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_or(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_or) return arch_atomic64_fetch_or(i, v); #elif defined(arch_atomic64_fetch_or_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_fetch_or" #endif } /** * raw_atomic64_fetch_or_acquire() - atomic bitwise OR with acquire ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v | @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_or_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_or_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_or_acquire) return arch_atomic64_fetch_or_acquire(i, v); #elif defined(arch_atomic64_fetch_or_relaxed) s64 ret = arch_atomic64_fetch_or_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_or) return arch_atomic64_fetch_or(i, v); #else #error "Unable to define raw_atomic64_fetch_or_acquire" #endif } /** * raw_atomic64_fetch_or_release() - atomic bitwise OR with release ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v | @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_or_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_or_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_or_release) return arch_atomic64_fetch_or_release(i, v); #elif defined(arch_atomic64_fetch_or_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_or_relaxed(i, v); #elif defined(arch_atomic64_fetch_or) return arch_atomic64_fetch_or(i, v); #else #error "Unable to define raw_atomic64_fetch_or_release" #endif } /** * raw_atomic64_fetch_or_relaxed() - atomic bitwise OR with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v | @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_or_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_or_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_or_relaxed) return arch_atomic64_fetch_or_relaxed(i, v); #elif defined(arch_atomic64_fetch_or) return arch_atomic64_fetch_or(i, v); #else #error "Unable to define raw_atomic64_fetch_or_relaxed" #endif } /** * raw_atomic64_xor() - atomic bitwise XOR with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v ^ @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_xor() elsewhere. * * Return: Nothing. */ static __always_inline void raw_atomic64_xor(s64 i, atomic64_t *v) { arch_atomic64_xor(i, v); } /** * raw_atomic64_fetch_xor() - atomic bitwise XOR with full ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v ^ @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_xor() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_xor(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_xor) return arch_atomic64_fetch_xor(i, v); #elif defined(arch_atomic64_fetch_xor_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_fetch_xor_relaxed(i, v); __atomic_post_full_fence(); return ret; #else #error "Unable to define raw_atomic64_fetch_xor" #endif } /** * raw_atomic64_fetch_xor_acquire() - atomic bitwise XOR with acquire ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v ^ @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_xor_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_xor_acquire) return arch_atomic64_fetch_xor_acquire(i, v); #elif defined(arch_atomic64_fetch_xor_relaxed) s64 ret = arch_atomic64_fetch_xor_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_fetch_xor) return arch_atomic64_fetch_xor(i, v); #else #error "Unable to define raw_atomic64_fetch_xor_acquire" #endif } /** * raw_atomic64_fetch_xor_release() - atomic bitwise XOR with release ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v ^ @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_xor_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_xor_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_xor_release) return arch_atomic64_fetch_xor_release(i, v); #elif defined(arch_atomic64_fetch_xor_relaxed) __atomic_release_fence(); return arch_atomic64_fetch_xor_relaxed(i, v); #elif defined(arch_atomic64_fetch_xor) return arch_atomic64_fetch_xor(i, v); #else #error "Unable to define raw_atomic64_fetch_xor_release" #endif } /** * raw_atomic64_fetch_xor_relaxed() - atomic bitwise XOR with relaxed ordering * @i: s64 value * @v: pointer to atomic64_t * * Atomically updates @v to (@v ^ @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_fetch_xor_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_xor_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_fetch_xor_relaxed) return arch_atomic64_fetch_xor_relaxed(i, v); #elif defined(arch_atomic64_fetch_xor) return arch_atomic64_fetch_xor(i, v); #else #error "Unable to define raw_atomic64_fetch_xor_relaxed" #endif } /** * raw_atomic64_xchg() - atomic exchange with full ordering * @v: pointer to atomic64_t * @new: s64 value to assign * * Atomically updates @v to @new with full ordering. * * Safe to use in noinstr code; prefer atomic64_xchg() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_xchg(atomic64_t *v, s64 new) { #if defined(arch_atomic64_xchg) return arch_atomic64_xchg(v, new); #elif defined(arch_atomic64_xchg_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_xchg_relaxed(v, new); __atomic_post_full_fence(); return ret; #else return raw_xchg(&v->counter, new); #endif } /** * raw_atomic64_xchg_acquire() - atomic exchange with acquire ordering * @v: pointer to atomic64_t * @new: s64 value to assign * * Atomically updates @v to @new with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_xchg_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_xchg_acquire(atomic64_t *v, s64 new) { #if defined(arch_atomic64_xchg_acquire) return arch_atomic64_xchg_acquire(v, new); #elif defined(arch_atomic64_xchg_relaxed) s64 ret = arch_atomic64_xchg_relaxed(v, new); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_xchg) return arch_atomic64_xchg(v, new); #else return raw_xchg_acquire(&v->counter, new); #endif } /** * raw_atomic64_xchg_release() - atomic exchange with release ordering * @v: pointer to atomic64_t * @new: s64 value to assign * * Atomically updates @v to @new with release ordering. * * Safe to use in noinstr code; prefer atomic64_xchg_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_xchg_release(atomic64_t *v, s64 new) { #if defined(arch_atomic64_xchg_release) return arch_atomic64_xchg_release(v, new); #elif defined(arch_atomic64_xchg_relaxed) __atomic_release_fence(); return arch_atomic64_xchg_relaxed(v, new); #elif defined(arch_atomic64_xchg) return arch_atomic64_xchg(v, new); #else return raw_xchg_release(&v->counter, new); #endif } /** * raw_atomic64_xchg_relaxed() - atomic exchange with relaxed ordering * @v: pointer to atomic64_t * @new: s64 value to assign * * Atomically updates @v to @new with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_xchg_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_xchg_relaxed(atomic64_t *v, s64 new) { #if defined(arch_atomic64_xchg_relaxed) return arch_atomic64_xchg_relaxed(v, new); #elif defined(arch_atomic64_xchg) return arch_atomic64_xchg(v, new); #else return raw_xchg_relaxed(&v->counter, new); #endif } /** * raw_atomic64_cmpxchg() - atomic compare and exchange with full ordering * @v: pointer to atomic64_t * @old: s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { #if defined(arch_atomic64_cmpxchg) return arch_atomic64_cmpxchg(v, old, new); #elif defined(arch_atomic64_cmpxchg_relaxed) s64 ret; __atomic_pre_full_fence(); ret = arch_atomic64_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; #else return raw_cmpxchg(&v->counter, old, new); #endif } /** * raw_atomic64_cmpxchg_acquire() - atomic compare and exchange with acquire ordering * @v: pointer to atomic64_t * @old: s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_acquire() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) { #if defined(arch_atomic64_cmpxchg_acquire) return arch_atomic64_cmpxchg_acquire(v, old, new); #elif defined(arch_atomic64_cmpxchg_relaxed) s64 ret = arch_atomic64_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_cmpxchg) return arch_atomic64_cmpxchg(v, old, new); #else return raw_cmpxchg_acquire(&v->counter, old, new); #endif } /** * raw_atomic64_cmpxchg_release() - atomic compare and exchange with release ordering * @v: pointer to atomic64_t * @old: s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_release() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { #if defined(arch_atomic64_cmpxchg_release) return arch_atomic64_cmpxchg_release(v, old, new); #elif defined(arch_atomic64_cmpxchg_relaxed) __atomic_release_fence(); return arch_atomic64_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic64_cmpxchg) return arch_atomic64_cmpxchg(v, old, new); #else return raw_cmpxchg_release(&v->counter, old, new); #endif } /** * raw_atomic64_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering * @v: pointer to atomic64_t * @old: s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_cmpxchg_relaxed() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_cmpxchg_relaxed(atomic64_t *v, s64 old, s64 new) { #if defined(arch_atomic64_cmpxchg_relaxed) return arch_atomic64_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic64_cmpxchg) return arch_atomic64_cmpxchg(v, old, new); #else return raw_cmpxchg_relaxed(&v->counter, old, new); #endif } /** * raw_atomic64_try_cmpxchg() - atomic compare and exchange with full ordering * @v: pointer to atomic64_t * @old: pointer to s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with full ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { #if defined(arch_atomic64_try_cmpxchg) return arch_atomic64_try_cmpxchg(v, old, new); #elif defined(arch_atomic64_try_cmpxchg_relaxed) bool ret; __atomic_pre_full_fence(); ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new); __atomic_post_full_fence(); return ret; #else s64 r, o = *old; r = raw_atomic64_cmpxchg(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic64_try_cmpxchg_acquire() - atomic compare and exchange with acquire ordering * @v: pointer to atomic64_t * @old: pointer to s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with acquire ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_acquire() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { #if defined(arch_atomic64_try_cmpxchg_acquire) return arch_atomic64_try_cmpxchg_acquire(v, old, new); #elif defined(arch_atomic64_try_cmpxchg_relaxed) bool ret = arch_atomic64_try_cmpxchg_relaxed(v, old, new); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_try_cmpxchg) return arch_atomic64_try_cmpxchg(v, old, new); #else s64 r, o = *old; r = raw_atomic64_cmpxchg_acquire(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic64_try_cmpxchg_release() - atomic compare and exchange with release ordering * @v: pointer to atomic64_t * @old: pointer to s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with release ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_release() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { #if defined(arch_atomic64_try_cmpxchg_release) return arch_atomic64_try_cmpxchg_release(v, old, new); #elif defined(arch_atomic64_try_cmpxchg_relaxed) __atomic_release_fence(); return arch_atomic64_try_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic64_try_cmpxchg) return arch_atomic64_try_cmpxchg(v, old, new); #else s64 r, o = *old; r = raw_atomic64_cmpxchg_release(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic64_try_cmpxchg_relaxed() - atomic compare and exchange with relaxed ordering * @v: pointer to atomic64_t * @old: pointer to s64 value to compare with * @new: s64 value to assign * * If (@v == @old), atomically updates @v to @new with relaxed ordering. * Otherwise, @v is not modified, @old is updated to the current value of @v, * and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_try_cmpxchg_relaxed() elsewhere. * * Return: @true if the exchange occured, @false otherwise. */ static __always_inline bool raw_atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { #if defined(arch_atomic64_try_cmpxchg_relaxed) return arch_atomic64_try_cmpxchg_relaxed(v, old, new); #elif defined(arch_atomic64_try_cmpxchg) return arch_atomic64_try_cmpxchg(v, old, new); #else s64 r, o = *old; r = raw_atomic64_cmpxchg_relaxed(v, o, new); if (unlikely(r != o)) *old = r; return likely(r == o); #endif } /** * raw_atomic64_sub_and_test() - atomic subtract and test if zero with full ordering * @i: s64 value to subtract * @v: pointer to atomic64_t * * Atomically updates @v to (@v - @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_sub_and_test() elsewhere. * * Return: @true if the resulting value of @v is zero, @false otherwise. */ static __always_inline bool raw_atomic64_sub_and_test(s64 i, atomic64_t *v) { #if defined(arch_atomic64_sub_and_test) return arch_atomic64_sub_and_test(i, v); #else return raw_atomic64_sub_return(i, v) == 0; #endif } /** * raw_atomic64_dec_and_test() - atomic decrement and test if zero with full ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v - 1) with full ordering. * * Safe to use in noinstr code; prefer atomic64_dec_and_test() elsewhere. * * Return: @true if the resulting value of @v is zero, @false otherwise. */ static __always_inline bool raw_atomic64_dec_and_test(atomic64_t *v) { #if defined(arch_atomic64_dec_and_test) return arch_atomic64_dec_and_test(v); #else return raw_atomic64_dec_return(v) == 0; #endif } /** * raw_atomic64_inc_and_test() - atomic increment and test if zero with full ordering * @v: pointer to atomic64_t * * Atomically updates @v to (@v + 1) with full ordering. * * Safe to use in noinstr code; prefer atomic64_inc_and_test() elsewhere. * * Return: @true if the resulting value of @v is zero, @false otherwise. */ static __always_inline bool raw_atomic64_inc_and_test(atomic64_t *v) { #if defined(arch_atomic64_inc_and_test) return arch_atomic64_inc_and_test(v); #else return raw_atomic64_inc_return(v) == 0; #endif } /** * raw_atomic64_add_negative() - atomic add and test if negative with full ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with full ordering. * * Safe to use in noinstr code; prefer atomic64_add_negative() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic64_add_negative(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_negative) return arch_atomic64_add_negative(i, v); #elif defined(arch_atomic64_add_negative_relaxed) bool ret; __atomic_pre_full_fence(); ret = arch_atomic64_add_negative_relaxed(i, v); __atomic_post_full_fence(); return ret; #else return raw_atomic64_add_return(i, v) < 0; #endif } /** * raw_atomic64_add_negative_acquire() - atomic add and test if negative with acquire ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with acquire ordering. * * Safe to use in noinstr code; prefer atomic64_add_negative_acquire() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic64_add_negative_acquire(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_negative_acquire) return arch_atomic64_add_negative_acquire(i, v); #elif defined(arch_atomic64_add_negative_relaxed) bool ret = arch_atomic64_add_negative_relaxed(i, v); __atomic_acquire_fence(); return ret; #elif defined(arch_atomic64_add_negative) return arch_atomic64_add_negative(i, v); #else return raw_atomic64_add_return_acquire(i, v) < 0; #endif } /** * raw_atomic64_add_negative_release() - atomic add and test if negative with release ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with release ordering. * * Safe to use in noinstr code; prefer atomic64_add_negative_release() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic64_add_negative_release(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_negative_release) return arch_atomic64_add_negative_release(i, v); #elif defined(arch_atomic64_add_negative_relaxed) __atomic_release_fence(); return arch_atomic64_add_negative_relaxed(i, v); #elif defined(arch_atomic64_add_negative) return arch_atomic64_add_negative(i, v); #else return raw_atomic64_add_return_release(i, v) < 0; #endif } /** * raw_atomic64_add_negative_relaxed() - atomic add and test if negative with relaxed ordering * @i: s64 value to add * @v: pointer to atomic64_t * * Atomically updates @v to (@v + @i) with relaxed ordering. * * Safe to use in noinstr code; prefer atomic64_add_negative_relaxed() elsewhere. * * Return: @true if the resulting value of @v is negative, @false otherwise. */ static __always_inline bool raw_atomic64_add_negative_relaxed(s64 i, atomic64_t *v) { #if defined(arch_atomic64_add_negative_relaxed) return arch_atomic64_add_negative_relaxed(i, v); #elif defined(arch_atomic64_add_negative) return arch_atomic64_add_negative(i, v); #else return raw_atomic64_add_return_relaxed(i, v) < 0; #endif } /** * raw_atomic64_fetch_add_unless() - atomic add unless value with full ordering * @v: pointer to atomic64_t * @a: s64 value to add * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_fetch_add_unless() elsewhere. * * Return: The original value of @v. */ static __always_inline s64 raw_atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { #if defined(arch_atomic64_fetch_add_unless) return arch_atomic64_fetch_add_unless(v, a, u); #else s64 c = raw_atomic64_read(v); do { if (unlikely(c == u)) break; } while (!raw_atomic64_try_cmpxchg(v, &c, c + a)); return c; #endif } /** * raw_atomic64_add_unless() - atomic add unless value with full ordering * @v: pointer to atomic64_t * @a: s64 value to add * @u: s64 value to compare with * * If (@v != @u), atomically updates @v to (@v + @a) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_add_unless() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { #if defined(arch_atomic64_add_unless) return arch_atomic64_add_unless(v, a, u); #else return raw_atomic64_fetch_add_unless(v, a, u) != u; #endif } /** * raw_atomic64_inc_not_zero() - atomic increment unless zero with full ordering * @v: pointer to atomic64_t * * If (@v != 0), atomically updates @v to (@v + 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_not_zero() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic64_inc_not_zero(atomic64_t *v) { #if defined(arch_atomic64_inc_not_zero) return arch_atomic64_inc_not_zero(v); #else return raw_atomic64_add_unless(v, 1, 0); #endif } /** * raw_atomic64_inc_unless_negative() - atomic increment unless negative with full ordering * @v: pointer to atomic64_t * * If (@v >= 0), atomically updates @v to (@v + 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_inc_unless_negative() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic64_inc_unless_negative(atomic64_t *v) { #if defined(arch_atomic64_inc_unless_negative) return arch_atomic64_inc_unless_negative(v); #else s64 c = raw_atomic64_read(v); do { if (unlikely(c < 0)) return false; } while (!raw_atomic64_try_cmpxchg(v, &c, c + 1)); return true; #endif } /** * raw_atomic64_dec_unless_positive() - atomic decrement unless positive with full ordering * @v: pointer to atomic64_t * * If (@v <= 0), atomically updates @v to (@v - 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_unless_positive() elsewhere. * * Return: @true if @v was updated, @false otherwise. */ static __always_inline bool raw_atomic64_dec_unless_positive(atomic64_t *v) { #if defined(arch_atomic64_dec_unless_positive) return arch_atomic64_dec_unless_positive(v); #else s64 c = raw_atomic64_read(v); do { if (unlikely(c > 0)) return false; } while (!raw_atomic64_try_cmpxchg(v, &c, c - 1)); return true; #endif } /** * raw_atomic64_dec_if_positive() - atomic decrement if positive with full ordering * @v: pointer to atomic64_t * * If (@v > 0), atomically updates @v to (@v - 1) with full ordering. * Otherwise, @v is not modified and relaxed ordering is provided. * * Safe to use in noinstr code; prefer atomic64_dec_if_positive() elsewhere. * * Return: The old value of (@v - 1), regardless of whether @v was updated. */ static __always_inline s64 raw_atomic64_dec_if_positive(atomic64_t *v) { #if defined(arch_atomic64_dec_if_positive) return arch_atomic64_dec_if_positive(v); #else s64 dec, c = raw_atomic64_read(v); do { dec = c - 1; if (unlikely(dec < 0)) break; } while (!raw_atomic64_try_cmpxchg(v, &c, dec)); return dec; #endif } #endif /* _LINUX_ATOMIC_FALLBACK_H */ // b565db590afeeff0d7c9485ccbca5bb6e155749f
1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_GUE_H #define __NET_GUE_H /* Definitions for the GUE header, standard and private flags, lengths * of optional fields are below. * * Diagram of GUE header: * * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * |Ver|C| Hlen | Proto/ctype | Standard flags |P| * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | | * ~ Fields (optional) ~ * | | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | Private flags (optional, P bit is set) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * | | * ~ Private fields (optional) ~ * | | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ * * C bit indicates control message when set, data message when unset. * For a control message, proto/ctype is interpreted as a type of * control message. For data messages, proto/ctype is the IP protocol * of the next header. * * P bit indicates private flags field is present. The private flags * may refer to options placed after this field. */ #include <asm/byteorder.h> #include <linux/types.h> struct guehdr { union { struct { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 hlen:5, control:1, version:2; #elif defined (__BIG_ENDIAN_BITFIELD) __u8 version:2, control:1, hlen:5; #else #error "Please fix <asm/byteorder.h>" #endif __u8 proto_ctype; __be16 flags; }; __be32 word; }; }; /* Standard flags in GUE header */ #define GUE_FLAG_PRIV htons(1<<0) /* Private flags are in options */ #define GUE_LEN_PRIV 4 #define GUE_FLAGS_ALL (GUE_FLAG_PRIV) /* Private flags in the private option extension */ #define GUE_PFLAG_REMCSUM htonl(1U << 31) #define GUE_PLEN_REMCSUM 4 #define GUE_PFLAGS_ALL (GUE_PFLAG_REMCSUM) /* Functions to compute options length corresponding to flags. * If we ever have a lot of flags this can be potentially be * converted to a more optimized algorithm (table lookup * for instance). */ static inline size_t guehdr_flags_len(__be16 flags) { return ((flags & GUE_FLAG_PRIV) ? GUE_LEN_PRIV : 0); } static inline size_t guehdr_priv_flags_len(__be32 flags) { return 0; } /* Validate standard and private flags. Returns non-zero (meaning invalid) * if there is an unknown standard or private flags, or the options length for * the flags exceeds the options length specific in hlen of the GUE header. */ static inline int validate_gue_flags(struct guehdr *guehdr, size_t optlen) { __be16 flags = guehdr->flags; size_t len; if (flags & ~GUE_FLAGS_ALL) return 1; len = guehdr_flags_len(flags); if (len > optlen) return 1; if (flags & GUE_FLAG_PRIV) { /* Private flags are last four bytes accounted in * guehdr_flags_len */ __be32 pflags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV); if (pflags & ~GUE_PFLAGS_ALL) return 1; len += guehdr_priv_flags_len(pflags); if (len > optlen) return 1; } return 0; } #endif
1 6 5 1 1 1 2 1 1 8 1 1 1 1 1 3 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 // SPDX-License-Identifier: GPL-2.0-only /* * File: datagram.c * * Datagram (ISI) Phonet sockets * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/slab.h> #include <linux/socket.h> #include <asm/ioctls.h> #include <net/sock.h> #include <linux/phonet.h> #include <linux/export.h> #include <net/phonet/phonet.h> static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); /* associated socket ceases to exist */ static void pn_sock_close(struct sock *sk, long timeout) { sk_common_release(sk); } static int pn_ioctl(struct sock *sk, int cmd, int *karg) { struct sk_buff *skb; switch (cmd) { case SIOCINQ: spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); *karg = skb ? skb->len : 0; spin_unlock_bh(&sk->sk_receive_queue.lock); return 0; case SIOCPNADDRESOURCE: case SIOCPNDELRESOURCE: { u32 res = *karg; if (res >= 256) return -EINVAL; if (cmd == SIOCPNADDRESOURCE) return pn_sock_bind_res(sk, res); else return pn_sock_unbind_res(sk, res); } } return -ENOIOCTLCMD; } /* Destroy socket. All references are gone. */ static void pn_destruct(struct sock *sk) { skb_queue_purge(&sk->sk_receive_queue); } static int pn_init(struct sock *sk) { sk->sk_destruct = pn_destruct; return 0; } static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_pn *, target, msg->msg_name); struct sk_buff *skb; int err; if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) return -EOPNOTSUPP; if (target == NULL) return -EDESTADDRREQ; if (msg->msg_namelen < sizeof(struct sockaddr_pn)) return -EINVAL; if (target->spn_family != AF_PHONET) return -EAFNOSUPPORT; skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, msg->msg_flags & MSG_DONTWAIT, &err); if (skb == NULL) return err; skb_reserve(skb, MAX_PHONET_HEADER); err = memcpy_from_msg((void *)skb_put(skb, len), msg, len); if (err < 0) { kfree_skb(skb); return err; } /* * Fill in the Phonet header and * finally pass the packet forwards. */ err = pn_skb_send(sk, skb, target); /* If ok, return len. */ return (err >= 0) ? len : err; } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; int rval = -EOPNOTSUPP; int copylen; if (flags & ~(MSG_PEEK|MSG_TRUNC|MSG_DONTWAIT|MSG_NOSIGNAL| MSG_CMSG_COMPAT)) goto out_nofree; skb = skb_recv_datagram(sk, flags, &rval); if (skb == NULL) goto out_nofree; pn_skb_get_src_sockaddr(skb, &sa); copylen = skb->len; if (len < copylen) { msg->msg_flags |= MSG_TRUNC; copylen = len; } rval = skb_copy_datagram_msg(skb, 0, msg, copylen); if (rval) { rval = -EFAULT; goto out; } rval = (flags & MSG_TRUNC) ? skb->len : copylen; if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); *addr_len = sizeof(sa); } out: skb_free_datagram(sk, skb); out_nofree: return rval; } /* Queue an skb for a sock. */ static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); return err ? NET_RX_DROP : NET_RX_SUCCESS; } /* Module registration */ static struct proto pn_proto = { .close = pn_sock_close, .ioctl = pn_ioctl, .init = pn_init, .sendmsg = pn_sendmsg, .recvmsg = pn_recvmsg, .backlog_rcv = pn_backlog_rcv, .hash = pn_sock_hash, .unhash = pn_sock_unhash, .get_port = pn_sock_get_port, .obj_size = sizeof(struct pn_sock), .owner = THIS_MODULE, .name = "PHONET", }; static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, }; int __init isi_register(void) { return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); } void __exit isi_unregister(void) { phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); }
24 67 81 27 39 71 14 17 164 161 15 181 182 181 182 181 179 179 179 177 182 182 30 31 119 6 77 10 83 2 6 15 118 103 50 82 2 4 4 87 101 1329 1331 1328 1333 1339 1340 1310 87 82 1083 461 6 6 6 6 5 1 1 1 1 36 36 10 32 90 5 5 5 5 90 10 98 7 97 2 97 97 8 5 8 8 7 5 6 9 9 98 7 77 78 76 77 73 67 68 17 17 26 129 27 144 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RMAP_H #define _LINUX_RMAP_H /* * Declarations for Reverse Mapping functions in mm/rmap.c */ #include <linux/list.h> #include <linux/slab.h> #include <linux/mm.h> #include <linux/rwsem.h> #include <linux/memcontrol.h> #include <linux/highmem.h> #include <linux/pagemap.h> #include <linux/memremap.h> #include <linux/bit_spinlock.h> /* * The anon_vma heads a list of private "related" vmas, to scan if * an anonymous page pointing to this anon_vma needs to be unmapped: * the vmas on the list will be related by forking, or by splitting. * * Since vmas come and go as they are split and merged (particularly * in mprotect), the mapping field of an anonymous page cannot point * directly to a vma: instead it points to an anon_vma, on whose list * the related vmas can be easily linked or unlinked. * * After unlinking the last vma on the list, we must garbage collect * the anon_vma object itself: we're guaranteed no page can be * pointing to this anon_vma once its vma list is empty. */ struct anon_vma { struct anon_vma *root; /* Root of this anon_vma tree */ struct rw_semaphore rwsem; /* W: modification, R: walking the list */ /* * The refcount is taken on an anon_vma when there is no * guarantee that the vma of page tables will exist for * the duration of the operation. A caller that takes * the reference is responsible for clearing up the * anon_vma if they are the last user on release */ atomic_t refcount; /* * Count of child anon_vmas. Equals to the count of all anon_vmas that * have ->parent pointing to this one, including itself. * * This counter is used for making decision about reusing anon_vma * instead of forking new one. See comments in function anon_vma_clone. */ unsigned long num_children; /* Count of VMAs whose ->anon_vma pointer points to this object. */ unsigned long num_active_vmas; struct anon_vma *parent; /* Parent of this anon_vma */ /* * NOTE: the LSB of the rb_root.rb_node is set by * mm_take_all_locks() _after_ taking the above lock. So the * rb_root must only be read/written after taking the above lock * to be sure to see a valid next pointer. The LSB bit itself * is serialized by a system wide lock only visible to * mm_take_all_locks() (mm_all_locks_mutex). */ /* Interval tree of private "related" vmas */ struct rb_root_cached rb_root; }; /* * The copy-on-write semantics of fork mean that an anon_vma * can become associated with multiple processes. Furthermore, * each child process will have its own anon_vma, where new * pages for that process are instantiated. * * This structure allows us to find the anon_vmas associated * with a VMA, or the VMAs associated with an anon_vma. * The "same_vma" list contains the anon_vma_chains linking * all the anon_vmas associated with this VMA. * The "rb" field indexes on an interval tree the anon_vma_chains * which link all the VMAs associated with this anon_vma. */ struct anon_vma_chain { struct vm_area_struct *vma; struct anon_vma *anon_vma; struct list_head same_vma; /* locked by mmap_lock & page_table_lock */ struct rb_node rb; /* locked by anon_vma->rwsem */ unsigned long rb_subtree_last; #ifdef CONFIG_DEBUG_VM_RB unsigned long cached_vma_start, cached_vma_last; #endif }; enum ttu_flags { TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ TTU_SYNC = 0x10, /* avoid racy checks with PVMW_SYNC */ TTU_HWPOISON = 0x20, /* do convert pte to hwpoison entry */ TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ TTU_RMAP_LOCKED = 0x80, /* do not grab rmap lock: * caller holds it */ }; #ifdef CONFIG_MMU static inline void get_anon_vma(struct anon_vma *anon_vma) { atomic_inc(&anon_vma->refcount); } void __put_anon_vma(struct anon_vma *anon_vma); static inline void put_anon_vma(struct anon_vma *anon_vma) { if (atomic_dec_and_test(&anon_vma->refcount)) __put_anon_vma(anon_vma); } static inline void anon_vma_lock_write(struct anon_vma *anon_vma) { down_write(&anon_vma->root->rwsem); } static inline int anon_vma_trylock_write(struct anon_vma *anon_vma) { return down_write_trylock(&anon_vma->root->rwsem); } static inline void anon_vma_unlock_write(struct anon_vma *anon_vma) { up_write(&anon_vma->root->rwsem); } static inline void anon_vma_lock_read(struct anon_vma *anon_vma) { down_read(&anon_vma->root->rwsem); } static inline int anon_vma_trylock_read(struct anon_vma *anon_vma) { return down_read_trylock(&anon_vma->root->rwsem); } static inline void anon_vma_unlock_read(struct anon_vma *anon_vma) { up_read(&anon_vma->root->rwsem); } /* * anon_vma helper functions. */ void anon_vma_init(void); /* create anon_vma_cachep */ int __anon_vma_prepare(struct vm_area_struct *); void unlink_anon_vmas(struct vm_area_struct *); int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); static inline int anon_vma_prepare(struct vm_area_struct *vma) { if (likely(vma->anon_vma)) return 0; return __anon_vma_prepare(vma); } static inline void anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) { VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma); unlink_anon_vmas(next); } struct anon_vma *folio_get_anon_vma(const struct folio *folio); #ifdef CONFIG_MM_ID static __always_inline void folio_lock_large_mapcount(struct folio *folio) { bit_spin_lock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); } static __always_inline void folio_unlock_large_mapcount(struct folio *folio) { __bit_spin_unlock(FOLIO_MM_IDS_LOCK_BITNUM, &folio->_mm_ids); } static inline unsigned int folio_mm_id(const struct folio *folio, int idx) { VM_WARN_ON_ONCE(idx != 0 && idx != 1); return folio->_mm_id[idx] & MM_ID_MASK; } static inline void folio_set_mm_id(struct folio *folio, int idx, mm_id_t id) { VM_WARN_ON_ONCE(idx != 0 && idx != 1); folio->_mm_id[idx] &= ~MM_ID_MASK; folio->_mm_id[idx] |= id; } static inline void __folio_large_mapcount_sanity_checks(const struct folio *folio, int diff, mm_id_t mm_id) { VM_WARN_ON_ONCE(!folio_test_large(folio) || folio_test_hugetlb(folio)); VM_WARN_ON_ONCE(diff <= 0); VM_WARN_ON_ONCE(mm_id < MM_ID_MIN || mm_id > MM_ID_MAX); /* * Make sure we can detect at least one complete PTE mapping of the * folio in a single MM as "exclusively mapped". This is primarily * a check on 32bit, where we currently reduce the size of the per-MM * mapcount to a short. */ VM_WARN_ON_ONCE(diff > folio_large_nr_pages(folio)); VM_WARN_ON_ONCE(folio_large_nr_pages(folio) - 1 > MM_ID_MAPCOUNT_MAX); VM_WARN_ON_ONCE(folio_mm_id(folio, 0) == MM_ID_DUMMY && folio->_mm_id_mapcount[0] != -1); VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY && folio->_mm_id_mapcount[0] < 0); VM_WARN_ON_ONCE(folio_mm_id(folio, 1) == MM_ID_DUMMY && folio->_mm_id_mapcount[1] != -1); VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY && folio->_mm_id_mapcount[1] < 0); VM_WARN_ON_ONCE(!folio_mapped(folio) && test_bit(FOLIO_MM_IDS_SHARED_BITNUM, &folio->_mm_ids)); } static __always_inline void folio_set_large_mapcount(struct folio *folio, int mapcount, struct vm_area_struct *vma) { __folio_large_mapcount_sanity_checks(folio, mapcount, vma->vm_mm->mm_id); VM_WARN_ON_ONCE(folio_mm_id(folio, 0) != MM_ID_DUMMY); VM_WARN_ON_ONCE(folio_mm_id(folio, 1) != MM_ID_DUMMY); /* Note: mapcounts start at -1. */ atomic_set(&folio->_large_mapcount, mapcount - 1); folio->_mm_id_mapcount[0] = mapcount - 1; folio_set_mm_id(folio, 0, vma->vm_mm->mm_id); } static __always_inline int folio_add_return_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { const mm_id_t mm_id = vma->vm_mm->mm_id; int new_mapcount_val; folio_lock_large_mapcount(folio); __folio_large_mapcount_sanity_checks(folio, diff, mm_id); new_mapcount_val = atomic_read(&folio->_large_mapcount) + diff; atomic_set(&folio->_large_mapcount, new_mapcount_val); /* * If a folio is mapped more than once into an MM on 32bit, we * can in theory overflow the per-MM mapcount (although only for * fairly large folios), turning it negative. In that case, just * free up the slot and mark the folio "mapped shared", otherwise * we might be in trouble when unmapping pages later. */ if (folio_mm_id(folio, 0) == mm_id) { folio->_mm_id_mapcount[0] += diff; if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[0] < 0)) { folio->_mm_id_mapcount[0] = -1; folio_set_mm_id(folio, 0, MM_ID_DUMMY); folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; } } else if (folio_mm_id(folio, 1) == mm_id) { folio->_mm_id_mapcount[1] += diff; if (!IS_ENABLED(CONFIG_64BIT) && unlikely(folio->_mm_id_mapcount[1] < 0)) { folio->_mm_id_mapcount[1] = -1; folio_set_mm_id(folio, 1, MM_ID_DUMMY); folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; } } else if (folio_mm_id(folio, 0) == MM_ID_DUMMY) { folio_set_mm_id(folio, 0, mm_id); folio->_mm_id_mapcount[0] = diff - 1; /* We might have other mappings already. */ if (new_mapcount_val != diff - 1) folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; } else if (folio_mm_id(folio, 1) == MM_ID_DUMMY) { folio_set_mm_id(folio, 1, mm_id); folio->_mm_id_mapcount[1] = diff - 1; /* Slot 0 certainly has mappings as well. */ folio->_mm_ids |= FOLIO_MM_IDS_SHARED_BIT; } folio_unlock_large_mapcount(folio); return new_mapcount_val + 1; } #define folio_add_large_mapcount folio_add_return_large_mapcount static __always_inline int folio_sub_return_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { const mm_id_t mm_id = vma->vm_mm->mm_id; int new_mapcount_val; folio_lock_large_mapcount(folio); __folio_large_mapcount_sanity_checks(folio, diff, mm_id); new_mapcount_val = atomic_read(&folio->_large_mapcount) - diff; atomic_set(&folio->_large_mapcount, new_mapcount_val); /* * There are valid corner cases where we might underflow a per-MM * mapcount (some mappings added when no slot was free, some mappings * added once a slot was free), so we always set it to -1 once we go * negative. */ if (folio_mm_id(folio, 0) == mm_id) { folio->_mm_id_mapcount[0] -= diff; if (folio->_mm_id_mapcount[0] >= 0) goto out; folio->_mm_id_mapcount[0] = -1; folio_set_mm_id(folio, 0, MM_ID_DUMMY); } else if (folio_mm_id(folio, 1) == mm_id) { folio->_mm_id_mapcount[1] -= diff; if (folio->_mm_id_mapcount[1] >= 0) goto out; folio->_mm_id_mapcount[1] = -1; folio_set_mm_id(folio, 1, MM_ID_DUMMY); } /* * If one MM slot owns all mappings, the folio is mapped exclusively. * Note that if the folio is now unmapped (new_mapcount_val == -1), both * slots must be free (mapcount == -1), and we'll also mark it as * exclusive. */ if (folio->_mm_id_mapcount[0] == new_mapcount_val || folio->_mm_id_mapcount[1] == new_mapcount_val) folio->_mm_ids &= ~FOLIO_MM_IDS_SHARED_BIT; out: folio_unlock_large_mapcount(folio); return new_mapcount_val + 1; } #define folio_sub_large_mapcount folio_sub_return_large_mapcount #else /* !CONFIG_MM_ID */ /* * See __folio_rmap_sanity_checks(), we might map large folios even without * CONFIG_TRANSPARENT_HUGEPAGE. We'll keep that working for now. */ static inline void folio_set_large_mapcount(struct folio *folio, int mapcount, struct vm_area_struct *vma) { /* Note: mapcounts start at -1. */ atomic_set(&folio->_large_mapcount, mapcount - 1); } static inline void folio_add_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { atomic_add(diff, &folio->_large_mapcount); } static inline int folio_add_return_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { BUILD_BUG(); } static inline void folio_sub_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { atomic_sub(diff, &folio->_large_mapcount); } static inline int folio_sub_return_large_mapcount(struct folio *folio, int diff, struct vm_area_struct *vma) { BUILD_BUG(); } #endif /* CONFIG_MM_ID */ #define folio_inc_large_mapcount(folio, vma) \ folio_add_large_mapcount(folio, 1, vma) #define folio_inc_return_large_mapcount(folio, vma) \ folio_add_return_large_mapcount(folio, 1, vma) #define folio_dec_large_mapcount(folio, vma) \ folio_sub_large_mapcount(folio, 1, vma) #define folio_dec_return_large_mapcount(folio, vma) \ folio_sub_return_large_mapcount(folio, 1, vma) /* RMAP flags, currently only relevant for some anon rmap operations. */ typedef int __bitwise rmap_t; /* * No special request: A mapped anonymous (sub)page is possibly shared between * processes. */ #define RMAP_NONE ((__force rmap_t)0) /* The anonymous (sub)page is exclusive to a single process. */ #define RMAP_EXCLUSIVE ((__force rmap_t)BIT(0)) static inline void __folio_rmap_sanity_checks(const struct folio *folio, const struct page *page, int nr_pages, enum pgtable_level level) { /* hugetlb folios are handled separately. */ VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); /* When (un)mapping zeropages, we should never touch ref+mapcount. */ VM_WARN_ON_FOLIO(is_zero_folio(folio), folio); /* * TODO: we get driver-allocated folios that have nothing to do with * the rmap using vm_insert_page(); therefore, we cannot assume that * folio_test_large_rmappable() holds for large folios. We should * handle any desired mapcount+stats accounting for these folios in * VM_MIXEDMAP VMAs separately, and then sanity-check here that * we really only get rmappable folios. */ VM_WARN_ON_ONCE(nr_pages <= 0); VM_WARN_ON_FOLIO(page_folio(page) != folio, folio); VM_WARN_ON_FOLIO(page_folio(page + nr_pages - 1) != folio, folio); switch (level) { case PGTABLE_LEVEL_PTE: break; case PGTABLE_LEVEL_PMD: /* * We don't support folios larger than a single PMD yet. So * when PGTABLE_LEVEL_PMD is set, we assume that we are creating * a single "entire" mapping of the folio. */ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PMD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PMD_NR, folio); break; case PGTABLE_LEVEL_PUD: /* * Assume that we are creating a single "entire" mapping of the * folio. */ VM_WARN_ON_FOLIO(folio_nr_pages(folio) != HPAGE_PUD_NR, folio); VM_WARN_ON_FOLIO(nr_pages != HPAGE_PUD_NR, folio); break; default: BUILD_BUG(); } /* * Anon folios must have an associated live anon_vma as long as they're * mapped into userspace. * Note that the atomic_read() mainly does two things: * * 1. In KASAN builds with CONFIG_SLUB_RCU_DEBUG, it causes KASAN to * check that the associated anon_vma has not yet been freed (subject * to KASAN's usual limitations). This check will pass if the * anon_vma's refcount has already dropped to 0 but an RCU grace * period hasn't passed since then. * 2. If the anon_vma has not yet been freed, it checks that the * anon_vma still has a nonzero refcount (as opposed to being in the * middle of an RCU delay for getting freed). */ if (folio_test_anon(folio) && !folio_test_ksm(folio)) { unsigned long mapping = (unsigned long)folio->mapping; struct anon_vma *anon_vma; anon_vma = (void *)(mapping - FOLIO_MAPPING_ANON); VM_WARN_ON_FOLIO(atomic_read(&anon_vma->refcount) == 0, folio); } } /* * rmap interfaces called when adding or removing pte of page */ void folio_move_anon_rmap(struct folio *, struct vm_area_struct *); void folio_add_anon_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *, unsigned long address, rmap_t flags); #define folio_add_anon_rmap_pte(folio, page, vma, address, flags) \ folio_add_anon_rmap_ptes(folio, page, 1, vma, address, flags) void folio_add_anon_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *, unsigned long address, rmap_t flags); void folio_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); void folio_add_file_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_add_file_rmap_pte(folio, page, vma) \ folio_add_file_rmap_ptes(folio, page, 1, vma) void folio_add_file_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); void folio_add_file_rmap_pud(struct folio *, struct page *, struct vm_area_struct *); void folio_remove_rmap_ptes(struct folio *, struct page *, int nr_pages, struct vm_area_struct *); #define folio_remove_rmap_pte(folio, page, vma) \ folio_remove_rmap_ptes(folio, page, 1, vma) void folio_remove_rmap_pmd(struct folio *, struct page *, struct vm_area_struct *); void folio_remove_rmap_pud(struct folio *, struct page *, struct vm_area_struct *); void hugetlb_add_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address, rmap_t flags); void hugetlb_add_new_anon_rmap(struct folio *, struct vm_area_struct *, unsigned long address); /* See folio_try_dup_anon_rmap_*() */ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, struct vm_area_struct *vma) { VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); if (PageAnonExclusive(&folio->page)) { if (unlikely(folio_needs_cow_for_dma(vma, folio))) return -EBUSY; ClearPageAnonExclusive(&folio->page); } atomic_inc(&folio->_entire_mapcount); atomic_inc(&folio->_large_mapcount); return 0; } /* See folio_try_share_anon_rmap_*() */ static inline int hugetlb_try_share_anon_rmap(struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); VM_WARN_ON_FOLIO(!PageAnonExclusive(&folio->page), folio); /* Paired with the memory barrier in try_grab_folio(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb(); if (unlikely(folio_maybe_dma_pinned(folio))) return -EBUSY; ClearPageAnonExclusive(&folio->page); /* * This is conceptually a smp_wmb() paired with the smp_rmb() in * gup_must_unshare(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb__after_atomic(); return 0; } static inline void hugetlb_add_file_rmap(struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); atomic_inc(&folio->_large_mapcount); } static inline void hugetlb_remove_rmap(struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); atomic_dec(&folio->_entire_mapcount); atomic_dec(&folio->_large_mapcount); } static __always_inline void __folio_dup_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, enum pgtable_level level) { const int orig_nr_pages = nr_pages; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { case PGTABLE_LEVEL_PTE: if (!folio_test_large(folio)) { atomic_inc(&folio->_mapcount); break; } if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) { do { atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); } folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case PGTABLE_LEVEL_PMD: case PGTABLE_LEVEL_PUD: atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; default: BUILD_BUG(); } } /** * folio_dup_file_rmap_ptes - duplicate PTE mappings of a page range of a folio * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + nr_pages) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma) { __folio_dup_file_rmap(folio, page, nr_pages, dst_vma, PGTABLE_LEVEL_PTE); } static __always_inline void folio_dup_file_rmap_pte(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { __folio_dup_file_rmap(folio, page, 1, dst_vma, PGTABLE_LEVEL_PTE); } /** * folio_dup_file_rmap_pmd - duplicate a PMD mapping of a page range of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of * @dst_vma: The destination vm area * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock. */ static inline void folio_dup_file_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE __folio_dup_file_rmap(folio, page, HPAGE_PMD_NR, dst_vma, PGTABLE_LEVEL_PTE); #else WARN_ON_ONCE(true); #endif } static __always_inline int __folio_try_dup_anon_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, enum pgtable_level level) { const int orig_nr_pages = nr_pages; bool maybe_pinned; int i; VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); __folio_rmap_sanity_checks(folio, page, nr_pages, level); /* * If this folio may have been pinned by the parent process, * don't allow to duplicate the mappings but instead require to e.g., * copy the subpage immediately for the child so that we'll always * guarantee the pinned folio won't be randomly replaced in the * future on write faults. */ maybe_pinned = likely(!folio_is_device_private(folio)) && unlikely(folio_needs_cow_for_dma(src_vma, folio)); /* * No need to check+clear for already shared PTEs/PMDs of the * folio. But if any page is PageAnonExclusive, we must fallback to * copying if the folio maybe pinned. */ switch (level) { case PGTABLE_LEVEL_PTE: if (unlikely(maybe_pinned)) { for (i = 0; i < nr_pages; i++) if (PageAnonExclusive(page + i)) return -EBUSY; } if (!folio_test_large(folio)) { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); atomic_inc(&folio->_mapcount); break; } do { if (PageAnonExclusive(page)) ClearPageAnonExclusive(page); if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) atomic_inc(&page->_mapcount); } while (page++, --nr_pages > 0); folio_add_large_mapcount(folio, orig_nr_pages, dst_vma); break; case PGTABLE_LEVEL_PMD: case PGTABLE_LEVEL_PUD: if (PageAnonExclusive(page)) { if (unlikely(maybe_pinned)) return -EBUSY; ClearPageAnonExclusive(page); } atomic_inc(&folio->_entire_mapcount); folio_inc_large_mapcount(folio, dst_vma); break; default: BUILD_BUG(); } return 0; } /** * folio_try_dup_anon_rmap_ptes - try duplicating PTE mappings of a page range * of a folio * @folio: The folio to duplicate the mappings of * @page: The first page to duplicate the mappings of * @nr_pages: The number of pages of which the mapping will be duplicated * @dst_vma: The destination vm area * @src_vma: The vm area from which the mappings are duplicated * * The page range of the folio is defined by [page, page + nr_pages) * * The caller needs to hold the page table lock and the * vma->vma_mm->write_protect_seq. * * Duplicating the mappings can only fail if the folio may be pinned; device * private folios cannot get pinned and consequently this function cannot fail * for them. * * If duplicating the mappings succeeded, the duplicated PTEs have to be R/O in * the parent and the child. They must *not* be writable after this call * succeeded. * * Returns 0 if duplicating the mappings succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_ptes(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, nr_pages, dst_vma, src_vma, PGTABLE_LEVEL_PTE); } static __always_inline int folio_try_dup_anon_rmap_pte(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { return __folio_try_dup_anon_rmap(folio, page, 1, dst_vma, src_vma, PGTABLE_LEVEL_PTE); } /** * folio_try_dup_anon_rmap_pmd - try duplicating a PMD mapping of a page range * of a folio * @folio: The folio to duplicate the mapping of * @page: The first page to duplicate the mapping of * @dst_vma: The destination vm area * @src_vma: The vm area from which the mapping is duplicated * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock and the * vma->vma_mm->write_protect_seq. * * Duplicating the mapping can only fail if the folio may be pinned; device * private folios cannot get pinned and consequently this function cannot fail * for them. * * If duplicating the mapping succeeds, the duplicated PMD has to be R/O in * the parent and the child. They must *not* be writable after this call * succeeded. * * Returns 0 if duplicating the mapping succeeded. Returns -EBUSY otherwise. */ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, struct page *page, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_dup_anon_rmap(folio, page, HPAGE_PMD_NR, dst_vma, src_vma, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; #endif } static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, struct page *page, int nr_pages, enum pgtable_level level) { VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); __folio_rmap_sanity_checks(folio, page, nr_pages, level); /* device private folios cannot get pinned via GUP. */ if (unlikely(folio_is_device_private(folio))) { ClearPageAnonExclusive(page); return 0; } /* * We have to make sure that when we clear PageAnonExclusive, that * the page is not pinned and that concurrent GUP-fast won't succeed in * concurrently pinning the page. * * Conceptually, PageAnonExclusive clearing consists of: * (A1) Clear PTE * (A2) Check if the page is pinned; back off if so. * (A3) Clear PageAnonExclusive * (A4) Restore PTE (optional, but certainly not writable) * * When clearing PageAnonExclusive, we cannot possibly map the page * writable again, because anon pages that may be shared must never * be writable. So in any case, if the PTE was writable it cannot * be writable anymore afterwards and there would be a PTE change. Only * if the PTE wasn't writable, there might not be a PTE change. * * Conceptually, GUP-fast pinning of an anon page consists of: * (B1) Read the PTE * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. * (B3) Pin the mapped page * (B4) Check if the PTE changed by re-reading it; back off if so. * (B5) If the original PTE is not writable, check if * PageAnonExclusive is not set; back off if so. * * If the PTE was writable, we only have to make sure that GUP-fast * observes a PTE change and properly backs off. * * If the PTE was not writable, we have to make sure that GUP-fast either * detects a (temporary) PTE change or that PageAnonExclusive is cleared * and properly backs off. * * Consequently, when clearing PageAnonExclusive(), we have to make * sure that (A1), (A2)/(A3) and (A4) happen in the right memory * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) * and (B5) happen in the right memory order. * * We assume that there might not be a memory barrier after * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), * so we use explicit ones here. */ /* Paired with the memory barrier in try_grab_folio(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb(); if (unlikely(folio_maybe_dma_pinned(folio))) return -EBUSY; ClearPageAnonExclusive(page); /* * This is conceptually a smp_wmb() paired with the smp_rmb() in * gup_must_unshare(). */ if (IS_ENABLED(CONFIG_HAVE_GUP_FAST)) smp_mb__after_atomic(); return 0; } /** * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page * mapped by a PTE possibly shared to prepare * for KSM or temporary unmapping * @folio: The folio to share a mapping of * @page: The mapped exclusive page * * The caller needs to hold the page table lock and has to have the page table * entries cleared/invalidated. * * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during * fork() to duplicate mappings, but instead to prepare for KSM or temporarily * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte(). * * Marking the mapped page shared can only fail if the folio maybe pinned; * device private folios cannot get pinned and consequently this function cannot * fail. * * Returns 0 if marking the mapped page possibly shared succeeded. Returns * -EBUSY otherwise. */ static inline int folio_try_share_anon_rmap_pte(struct folio *folio, struct page *page) { return __folio_try_share_anon_rmap(folio, page, 1, PGTABLE_LEVEL_PTE); } /** * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page * range mapped by a PMD possibly shared to * prepare for temporary unmapping * @folio: The folio to share the mapping of * @page: The first page to share the mapping of * * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) * * The caller needs to hold the page table lock and has to have the page table * entries cleared/invalidated. * * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during * fork() to duplicate a mapping, but instead to prepare for temporarily * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd(). * * Marking the mapped pages shared can only fail if the folio maybe pinned; * device private folios cannot get pinned and consequently this function cannot * fail. * * Returns 0 if marking the mapped pages possibly shared succeeded. Returns * -EBUSY otherwise. */ static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, PGTABLE_LEVEL_PMD); #else WARN_ON_ONCE(true); return -EBUSY; #endif } /* * Called from mm/vmscan.c to handle paging out */ int folio_referenced(struct folio *, int is_locked, struct mem_cgroup *memcg, vm_flags_t *vm_flags); void try_to_migrate(struct folio *folio, enum ttu_flags flags); void try_to_unmap(struct folio *, enum ttu_flags flags); struct page *make_device_exclusive(struct mm_struct *mm, unsigned long addr, void *owner, struct folio **foliop); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) /* Look for migration entries rather than present PTEs */ #define PVMW_MIGRATION (1 << 1) struct page_vma_mapped_walk { unsigned long pfn; unsigned long nr_pages; pgoff_t pgoff; struct vm_area_struct *vma; unsigned long address; pmd_t *pmd; pte_t *pte; spinlock_t *ptl; unsigned int flags; }; #define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ .pfn = folio_pfn(_folio), \ .nr_pages = folio_nr_pages(_folio), \ .pgoff = folio_pgoff(_folio), \ .vma = _vma, \ .address = _address, \ .flags = _flags, \ } static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) { /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ if (pvmw->pte && !is_vm_hugetlb_page(pvmw->vma)) pte_unmap(pvmw->pte); if (pvmw->ptl) spin_unlock(pvmw->ptl); } /** * page_vma_mapped_walk_restart - Restart the page table walk. * @pvmw: Pointer to struct page_vma_mapped_walk. * * It restarts the page table walk when changes occur in the page * table, such as splitting a PMD. Ensures that the PTL held during * the previous walk is released and resets the state to allow for * a new walk starting at the current address stored in pvmw->address. */ static inline void page_vma_mapped_walk_restart(struct page_vma_mapped_walk *pvmw) { WARN_ON_ONCE(!pvmw->pmd && !pvmw->pte); if (likely(pvmw->ptl)) spin_unlock(pvmw->ptl); else WARN_ON_ONCE(1); pvmw->ptl = NULL; pvmw->pmd = NULL; pvmw->pte = NULL; } bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw); unsigned long page_address_in_vma(const struct folio *folio, const struct page *, const struct vm_area_struct *); /* * Cleans the PTEs of shared mappings. * (and since clean PTEs should also be readonly, write protects them too) * * returns the number of cleaned PTEs. */ int folio_mkclean(struct folio *); int mapping_wrprotect_range(struct address_space *mapping, pgoff_t pgoff, unsigned long pfn, unsigned long nr_pages); int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, struct vm_area_struct *vma); enum rmp_flags { RMP_LOCKED = 1 << 0, RMP_USE_SHARED_ZEROPAGE = 1 << 1, }; void remove_migration_ptes(struct folio *src, struct folio *dst, int flags); /* * rmap_walk_control: To control rmap traversing for specific needs * * arg: passed to rmap_one() and invalid_vma() * try_lock: bail out if the rmap lock is contended * contended: indicate the rmap traversal bailed out due to lock contention * rmap_one: executed on each vma where page is mapped * done: for checking traversing termination condition * anon_lock: for getting anon_lock by optimized way rather than default * invalid_vma: for skipping uninterested vma */ struct rmap_walk_control { void *arg; bool try_lock; bool contended; /* * Return false if page table scanning in rmap_walk should be stopped. * Otherwise, return true. */ bool (*rmap_one)(struct folio *folio, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct folio *folio); struct anon_vma *(*anon_lock)(const struct folio *folio, struct rmap_walk_control *rwc); bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc); void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); struct anon_vma *folio_lock_anon_vma_read(const struct folio *folio, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) #define anon_vma_prepare(vma) (0) static inline int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, vm_flags_t *vm_flags) { *vm_flags = 0; return 0; } static inline void try_to_unmap(struct folio *folio, enum ttu_flags flags) { } static inline int folio_mkclean(struct folio *folio) { return 0; } #endif /* CONFIG_MMU */ #endif /* _LINUX_RMAP_H */
3 3 5 5 2 3 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 /* * Route Plug-In * Copyright (c) 2000 by Abramo Bagnara <abramo@alsa-project.org> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" static void zero_areas(struct snd_pcm_plugin_channel *dvp, int ndsts, snd_pcm_uframes_t frames, snd_pcm_format_t format) { int dst = 0; for (; dst < ndsts; ++dst) { if (dvp->wanted) snd_pcm_area_silence(&dvp->area, 0, frames, format); dvp->enabled = 0; dvp++; } } static inline void copy_area(const struct snd_pcm_plugin_channel *src_channel, struct snd_pcm_plugin_channel *dst_channel, snd_pcm_uframes_t frames, snd_pcm_format_t format) { dst_channel->enabled = 1; snd_pcm_area_copy(&src_channel->area, 0, &dst_channel->area, 0, frames, format); } static snd_pcm_sframes_t route_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { int nsrcs, ndsts, dst; struct snd_pcm_plugin_channel *dvp; snd_pcm_format_t format; if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; nsrcs = plugin->src_format.channels; ndsts = plugin->dst_format.channels; format = plugin->dst_format.format; dvp = dst_channels; if (nsrcs <= 1) { /* expand to all channels */ for (dst = 0; dst < ndsts; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; } return frames; } for (dst = 0; dst < ndsts && dst < nsrcs; ++dst) { copy_area(src_channels, dvp, frames, format); dvp++; src_channels++; } if (dst < ndsts) zero_areas(dvp, ndsts - dst, frames, format); return frames; } int snd_pcm_plugin_build_route(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { struct snd_pcm_plugin *plugin; int err; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->format != dst_format->format)) return -ENXIO; err = snd_pcm_plugin_build(plug, "route conversion", src_format, dst_format, 0, &plugin); if (err < 0) return err; plugin->transfer = route_transfer; *r_plugin = plugin; return 0; }
1 1 1 1 2 2 2 1 1 1 1 2 3 2 1 1 1 3 3 3 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Source Hashing scheduling module * * Authors: Wensong Zhang <wensong@gnuchina.org> * * Changes: */ /* * The sh algorithm is to select server by the hash key of source IP * address. The pseudo code is as follows: * * n <- servernode[src_ip]; * if (n is dead) OR * (n is overloaded) or (n.weight <= 0) then * return NULL; * * return n; * * Notes that servernode is a 256-bucket hash table that maps the hash * index derived from packet source IP address to the current server * array. If the sh scheduler is used in cache cluster, it is good to * combine it with cache_bypass feature. When the statically assigned * server is dead or overloaded, the load balancer can bypass the cache * server and send requests to the original server directly. * * The weight destination attribute can be used to control the * distribution of connections to the destinations in servernode. The * greater the weight, the more connections the destination * will receive. * */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/ip.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/ip_vs.h> #include <net/tcp.h> #include <linux/udp.h> #include <linux/sctp.h> /* * IPVS SH bucket */ struct ip_vs_sh_bucket { struct ip_vs_dest __rcu *dest; /* real server (cache) */ }; /* * for IPVS SH entry hash table */ #ifndef CONFIG_IP_VS_SH_TAB_BITS #define CONFIG_IP_VS_SH_TAB_BITS 8 #endif #define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS #define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) struct ip_vs_sh_state { struct rcu_head rcu_head; struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; }; /* Helper function to determine if server is unavailable */ static inline bool is_unavailable(struct ip_vs_dest *dest) { return atomic_read(&dest->weight) <= 0 || dest->flags & IP_VS_DEST_F_OVERLOAD; } /* * Returns hash value for IPVS SH entry */ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, __be16 port, unsigned int offset) { __be32 addr_fold = addr->ip; #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) addr_fold = addr->ip6[0]^addr->ip6[1]^ addr->ip6[2]^addr->ip6[3]; #endif return (offset + hash_32(ntohs(port) + ntohl(addr_fold), IP_VS_SH_TAB_BITS)) & IP_VS_SH_TAB_MASK; } /* * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); return (!dest || is_unavailable(dest)) ? NULL : dest; } /* As ip_vs_sh_get, but with fallback if selected server is unavailable * * The fallback strategy loops around the table starting from a "random" * point (in fact, it is chosen to be the original hash value to make the * algorithm deterministic) to find a new server. */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { unsigned int offset, roffset; unsigned int hash, ihash; struct ip_vs_dest *dest; /* first try the dest it's supposed to go to */ ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); dest = rcu_dereference(s->buckets[ihash].dest); if (!dest) return NULL; if (!is_unavailable(dest)) return dest; IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); /* if the original dest is unavailable, loop around the table * starting from ihash to find a new dest */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; if (!is_unavailable(dest)) return dest; IP_VS_DBG_BUF(6, "SH: selected unavailable " "server %s:%d (offset %d), reselecting", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), roffset); } return NULL; } /* * Assign all the hash buckets of the specified table with the service. */ static int ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc) { int i; struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; int d_count; bool empty; b = &s->buckets[0]; p = &svc->destinations; empty = list_empty(p); d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { dest = rcu_dereference_protected(b->dest, 1); if (dest) ip_vs_dest_put(dest); if (empty) RCU_INIT_POINTER(b->dest, NULL); else { if (p == &svc->destinations) p = p->next; dest = list_entry(p, struct ip_vs_dest, n_list); ip_vs_dest_hold(dest); RCU_INIT_POINTER(b->dest, dest); IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", i, IP_VS_DBG_ADDR(dest->af, &dest->addr), atomic_read(&dest->weight)); /* Don't move to next dest until filling weight */ if (++d_count >= atomic_read(&dest->weight)) { p = p->next; d_count = 0; } } b++; } return 0; } /* * Flush all the hash buckets of the specified table. */ static void ip_vs_sh_flush(struct ip_vs_sh_state *s) { int i; struct ip_vs_sh_bucket *b; struct ip_vs_dest *dest; b = &s->buckets[0]; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { dest = rcu_dereference_protected(b->dest, 1); if (dest) { ip_vs_dest_put(dest); RCU_INIT_POINTER(b->dest, NULL); } b++; } } static int ip_vs_sh_init_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s; /* allocate the SH table for this service */ s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); if (s == NULL) return -ENOMEM; svc->sched_data = s; IP_VS_DBG(6, "SH hash table (memory=%zdbytes) allocated for " "current service\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); /* assign the hash buckets with current dests */ ip_vs_sh_reassign(s, svc); return 0; } static void ip_vs_sh_done_svc(struct ip_vs_service *svc) { struct ip_vs_sh_state *s = svc->sched_data; /* got to clean up hash buckets here */ ip_vs_sh_flush(s); /* release the table itself */ kfree_rcu(s, rcu_head); IP_VS_DBG(6, "SH hash table (memory=%zdbytes) released\n", sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); } static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct ip_vs_sh_state *s = svc->sched_data; /* assign the hash buckets with the updated service */ ip_vs_sh_reassign(s, svc); return 0; } /* Helper function to get port number */ static inline __be16 ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) { __be16 _ports[2], *ports; /* At this point we know that we have a valid packet of some kind. * Because ICMP packets are only guaranteed to have the first 8 * bytes, let's just grab the ports. Fortunately they're in the * same position for all three of the protocols we care about. */ switch (iph->protocol) { case IPPROTO_TCP: case IPPROTO_UDP: case IPPROTO_SCTP: ports = skb_header_pointer(skb, iph->len, sizeof(_ports), &_ports); if (unlikely(!ports)) return 0; if (likely(!ip_vs_iph_inverse(iph))) return ports[0]; else return ports[1]; default: return 0; } } /* * Source Hashing scheduling */ static struct ip_vs_dest * ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest; struct ip_vs_sh_state *s; __be16 port = 0; const union nf_inet_addr *hash_addr; hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) port = ip_vs_sh_get_port(skb, iph); s = (struct ip_vs_sh_state *) svc->sched_data; if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port); else dest = ip_vs_sh_get(svc, s, hash_addr, port); if (!dest) { ip_vs_scheduler_err(svc, "no destination available"); return NULL; } IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", IP_VS_DBG_ADDR(svc->af, hash_addr), IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); return dest; } /* * IPVS SH Scheduler structure */ static struct ip_vs_scheduler ip_vs_sh_scheduler = { .name = "sh", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .add_dest = ip_vs_sh_dest_changed, .del_dest = ip_vs_sh_dest_changed, .upd_dest = ip_vs_sh_dest_changed, .schedule = ip_vs_sh_schedule, }; static int __init ip_vs_sh_init(void) { return register_ip_vs_scheduler(&ip_vs_sh_scheduler); } static void __exit ip_vs_sh_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); synchronize_rcu(); } module_init(ip_vs_sh_init); module_exit(ip_vs_sh_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs source hashing scheduler");
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* X.509 certificate parser internal definitions * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/cleanup.h> #include <linux/time.h> #include <crypto/public_key.h> #include <keys/asymmetric-type.h> struct x509_certificate { struct x509_certificate *next; struct x509_certificate *signer; /* Certificate that signed this one */ struct public_key *pub; /* Public key details */ struct public_key_signature *sig; /* Signature parameters */ char *issuer; /* Name of certificate issuer */ char *subject; /* Name of certificate subject */ struct asymmetric_key_id *id; /* Issuer + Serial number */ struct asymmetric_key_id *skid; /* Subject + subjectKeyId (optional) */ time64_t valid_from; time64_t valid_to; const void *tbs; /* Signed data */ unsigned tbs_size; /* Size of signed data */ unsigned raw_sig_size; /* Size of signature */ const void *raw_sig; /* Signature data */ const void *raw_serial; /* Raw serial number in ASN.1 */ unsigned raw_serial_size; unsigned raw_issuer_size; const void *raw_issuer; /* Raw issuer name in ASN.1 */ const void *raw_subject; /* Raw subject name in ASN.1 */ unsigned raw_subject_size; unsigned raw_skid_size; const void *raw_skid; /* Raw subjectKeyId in ASN.1 */ unsigned index; bool seen; /* Infinite recursion prevention */ bool verified; bool self_signed; /* T if self-signed (check unsupported_sig too) */ bool unsupported_sig; /* T if signature uses unsupported crypto */ bool blacklisted; }; /* * x509_cert_parser.c */ extern void x509_free_certificate(struct x509_certificate *cert); DEFINE_FREE(x509_free_certificate, struct x509_certificate *, if (!IS_ERR(_T)) x509_free_certificate(_T)) extern struct x509_certificate *x509_cert_parse(const void *data, size_t datalen); extern int x509_decode_time(time64_t *_t, size_t hdrlen, unsigned char tag, const unsigned char *value, size_t vlen); /* * x509_public_key.c */ extern int x509_get_sig_params(struct x509_certificate *cert); extern int x509_check_for_self_signed(struct x509_certificate *cert);
2358 2382 2352 2351 2338 2358 2366 2396 2389 2397 2398 2415 11 2375 2366 71 292 105 292 2519 50 2491 2384 2408 2385 19 2383 2378 2350 2333 2334 2330 2332 2333 2329 2 2330 2308 2505 2520 2514 826 2360 2366 3 2307 35 35 2519 2371 2383 28 2363 9 12 2368 2356 15 2321 2321 60 2360 2368 2364 2360 2308 2305 2516 2521 29 814 22 2398 2397 3 3 76 2 2364 31 2 10 67 23 57 71 7 67 67 67 45 2368 28 45 2512 71 2503 2366 2490 2389 2513 2372 92 2363 92 2515 2519 2516 2366 2370 2366 2337 2350 901 2334 18 2351 2352 2345 2336 2336 2372 2373 2367 5 4 2366 4 1 4 2364 2363 2367 2497 2488 2312 2314 2310 2372 2363 2361 2351 165 2350 2346 74 4 74 2332 2331 2352 2358 2350 2351 2345 2767 2343 2296 2537 2263 2539 2490 2501 2354 2347 2355 2354 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/power/runtime.c - Helper functions for device runtime PM * * Copyright (c) 2009 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc. * Copyright (C) 2010 Alan Stern <stern@rowland.harvard.edu> */ #include <linux/sched/mm.h> #include <linux/ktime.h> #include <linux/hrtimer.h> #include <linux/export.h> #include <linux/pm_runtime.h> #include <linux/pm_wakeirq.h> #include <linux/rculist.h> #include <trace/events/rpm.h> #include "../base.h" #include "power.h" typedef int (*pm_callback_t)(struct device *); static inline pm_callback_t get_callback_ptr(const void *start, size_t offset) { return *(pm_callback_t *)(start + offset); } static pm_callback_t __rpm_get_driver_callback(struct device *dev, size_t cb_offset) { if (dev->driver && dev->driver->pm) return get_callback_ptr(dev->driver->pm, cb_offset); return NULL; } static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset) { const struct dev_pm_ops *ops; pm_callback_t cb = NULL; if (dev->pm_domain) ops = &dev->pm_domain->ops; else if (dev->type && dev->type->pm) ops = dev->type->pm; else if (dev->class && dev->class->pm) ops = dev->class->pm; else if (dev->bus && dev->bus->pm) ops = dev->bus->pm; else ops = NULL; if (ops) cb = get_callback_ptr(ops, cb_offset); if (!cb) cb = __rpm_get_driver_callback(dev, cb_offset); return cb; } #define RPM_GET_CALLBACK(dev, callback) \ __rpm_get_callback(dev, offsetof(struct dev_pm_ops, callback)) static int rpm_resume(struct device *dev, int rpmflags); static int rpm_suspend(struct device *dev, int rpmflags); /** * update_pm_runtime_accounting - Update the time accounting of power states * @dev: Device to update the accounting for * * In order to be able to have time accounting of the various power states * (as used by programs such as PowerTOP to show the effectiveness of runtime * PM), we need to track the time spent in each state. * update_pm_runtime_accounting must be called each time before the * runtime_status field is updated, to account the time in the old state * correctly. */ static void update_pm_runtime_accounting(struct device *dev) { u64 now, last, delta; if (dev->power.disable_depth > 0) return; last = dev->power.accounting_timestamp; now = ktime_get_mono_fast_ns(); dev->power.accounting_timestamp = now; /* * Because ktime_get_mono_fast_ns() is not monotonic during * timekeeping updates, ensure that 'now' is after the last saved * timesptamp. */ if (now < last) return; delta = now - last; if (dev->power.runtime_status == RPM_SUSPENDED) dev->power.suspended_time += delta; else dev->power.active_time += delta; } static void __update_runtime_status(struct device *dev, enum rpm_status status) { update_pm_runtime_accounting(dev); trace_rpm_status(dev, status); dev->power.runtime_status = status; } static u64 rpm_get_accounted_time(struct device *dev, bool suspended) { u64 time; unsigned long flags; spin_lock_irqsave(&dev->power.lock, flags); update_pm_runtime_accounting(dev); time = suspended ? dev->power.suspended_time : dev->power.active_time; spin_unlock_irqrestore(&dev->power.lock, flags); return time; } u64 pm_runtime_active_time(struct device *dev) { return rpm_get_accounted_time(dev, false); } u64 pm_runtime_suspended_time(struct device *dev) { return rpm_get_accounted_time(dev, true); } EXPORT_SYMBOL_GPL(pm_runtime_suspended_time); /** * pm_runtime_deactivate_timer - Deactivate given device's suspend timer. * @dev: Device to handle. */ static void pm_runtime_deactivate_timer(struct device *dev) { if (dev->power.timer_expires > 0) { hrtimer_try_to_cancel(&dev->power.suspend_timer); dev->power.timer_expires = 0; } } /** * pm_runtime_cancel_pending - Deactivate suspend timer and cancel requests. * @dev: Device to handle. */ static void pm_runtime_cancel_pending(struct device *dev) { pm_runtime_deactivate_timer(dev); /* * In case there's a request pending, make sure its work function will * return without doing anything. */ dev->power.request = RPM_REQ_NONE; } /* * pm_runtime_autosuspend_expiration - Get a device's autosuspend-delay expiration time. * @dev: Device to handle. * * Compute the autosuspend-delay expiration time based on the device's * power.last_busy time. If the delay has already expired or is disabled * (negative) or the power.use_autosuspend flag isn't set, return 0. * Otherwise return the expiration time in nanoseconds (adjusted to be nonzero). * * This function may be called either with or without dev->power.lock held. * Either way it can be racy, since power.last_busy may be updated at any time. */ u64 pm_runtime_autosuspend_expiration(struct device *dev) { int autosuspend_delay; u64 expires; if (!dev->power.use_autosuspend) return 0; autosuspend_delay = READ_ONCE(dev->power.autosuspend_delay); if (autosuspend_delay < 0) return 0; expires = READ_ONCE(dev->power.last_busy); expires += (u64)autosuspend_delay * NSEC_PER_MSEC; if (expires > ktime_get_mono_fast_ns()) return expires; /* Expires in the future */ return 0; } EXPORT_SYMBOL_GPL(pm_runtime_autosuspend_expiration); static int dev_memalloc_noio(struct device *dev, void *data) { return dev->power.memalloc_noio; } /* * pm_runtime_set_memalloc_noio - Set a device's memalloc_noio flag. * @dev: Device to handle. * @enable: True for setting the flag and False for clearing the flag. * * Set the flag for all devices in the path from the device to the * root device in the device tree if @enable is true, otherwise clear * the flag for devices in the path whose siblings don't set the flag. * * The function should only be called by block device, or network * device driver for solving the deadlock problem during runtime * resume/suspend: * * If memory allocation with GFP_KERNEL is called inside runtime * resume/suspend callback of any one of its ancestors(or the * block device itself), the deadlock may be triggered inside the * memory allocation since it might not complete until the block * device becomes active and the involed page I/O finishes. The * situation is pointed out first by Alan Stern. Network device * are involved in iSCSI kind of situation. * * The lock of dev_hotplug_mutex is held in the function for handling * hotplug race because pm_runtime_set_memalloc_noio() may be called * in async probe(). * * The function should be called between device_add() and device_del() * on the affected device(block/network device). */ void pm_runtime_set_memalloc_noio(struct device *dev, bool enable) { static DEFINE_MUTEX(dev_hotplug_mutex); mutex_lock(&dev_hotplug_mutex); for (;;) { bool enabled; /* hold power lock since bitfield is not SMP-safe. */ spin_lock_irq(&dev->power.lock); enabled = dev->power.memalloc_noio; dev->power.memalloc_noio = enable; spin_unlock_irq(&dev->power.lock); /* * not need to enable ancestors any more if the device * has been enabled. */ if (enabled && enable) break; dev = dev->parent; /* * clear flag of the parent device only if all the * children don't set the flag because ancestor's * flag was set by any one of the descendants. */ if (!dev || (!enable && device_for_each_child(dev, NULL, dev_memalloc_noio))) break; } mutex_unlock(&dev_hotplug_mutex); } EXPORT_SYMBOL_GPL(pm_runtime_set_memalloc_noio); /** * rpm_check_suspend_allowed - Test whether a device may be suspended. * @dev: Device to test. */ static int rpm_check_suspend_allowed(struct device *dev) { int retval = 0; if (dev->power.runtime_error) retval = -EINVAL; else if (dev->power.disable_depth > 0) retval = -EACCES; else if (atomic_read(&dev->power.usage_count)) retval = -EAGAIN; else if (!dev->power.ignore_children && atomic_read(&dev->power.child_count)) retval = -EBUSY; /* Pending resume requests take precedence over suspends. */ else if ((dev->power.deferred_resume && dev->power.runtime_status == RPM_SUSPENDING) || (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME)) retval = -EAGAIN; else if (__dev_pm_qos_resume_latency(dev) == 0) retval = -EPERM; else if (dev->power.runtime_status == RPM_SUSPENDED) retval = 1; return retval; } static int rpm_get_suppliers(struct device *dev) { struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) { int retval; if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) continue; retval = pm_runtime_get_sync(link->supplier); /* Ignore suppliers with disabled runtime PM. */ if (retval < 0 && retval != -EACCES) { pm_runtime_put_noidle(link->supplier); return retval; } refcount_inc(&link->rpm_active); } return 0; } /** * pm_runtime_release_supplier - Drop references to device link's supplier. * @link: Target device link. * * Drop all runtime PM references associated with @link to its supplier device. */ void pm_runtime_release_supplier(struct device_link *link) { struct device *supplier = link->supplier; /* * The additional power.usage_count check is a safety net in case * the rpm_active refcount becomes saturated, in which case * refcount_dec_not_one() would return true forever, but it is not * strictly necessary. */ while (refcount_dec_not_one(&link->rpm_active) && atomic_read(&supplier->power.usage_count) > 0) pm_runtime_put_noidle(supplier); } static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) { struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) { pm_runtime_release_supplier(link); if (try_to_suspend) pm_request_idle(link->supplier); } } static void rpm_put_suppliers(struct device *dev) { __rpm_put_suppliers(dev, true); } static void rpm_suspend_suppliers(struct device *dev) { struct device_link *link; int idx = device_links_read_lock(); list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) pm_request_idle(link->supplier); device_links_read_unlock(idx); } /** * __rpm_callback - Run a given runtime PM callback for a given device. * @cb: Runtime PM callback to run. * @dev: Device to run the callback for. */ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) __releases(&dev->power.lock) __acquires(&dev->power.lock) { int retval = 0, idx; bool use_links = dev->power.links_count > 0; if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); } else { spin_unlock_irq(&dev->power.lock); /* * Resume suppliers if necessary. * * The device's runtime PM status cannot change until this * routine returns, so it is safe to read the status outside of * the lock. */ if (use_links && dev->power.runtime_status == RPM_RESUMING) { idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); if (retval) { rpm_put_suppliers(dev); goto fail; } device_links_read_unlock(idx); } } if (cb) retval = cb(dev); if (dev->power.irq_safe) { spin_lock(&dev->power.lock); } else { /* * If the device is suspending and the callback has returned * success, drop the usage counters of the suppliers that have * been reference counted on its resume. * * Do that if resume fails too. */ if (use_links && ((dev->power.runtime_status == RPM_SUSPENDING && !retval) || (dev->power.runtime_status == RPM_RESUMING && retval))) { idx = device_links_read_lock(); __rpm_put_suppliers(dev, false); fail: device_links_read_unlock(idx); } spin_lock_irq(&dev->power.lock); } return retval; } /** * rpm_callback - Run a given runtime PM callback for a given device. * @cb: Runtime PM callback to run. * @dev: Device to run the callback for. */ static int rpm_callback(int (*cb)(struct device *), struct device *dev) { int retval; if (dev->power.memalloc_noio) { unsigned int noio_flag; /* * Deadlock might be caused if memory allocation with * GFP_KERNEL happens inside runtime_suspend and * runtime_resume callbacks of one block device's * ancestor or the block device itself. Network * device might be thought as part of iSCSI block * device, so network device and its ancestor should * be marked as memalloc_noio too. */ noio_flag = memalloc_noio_save(); retval = __rpm_callback(cb, dev); memalloc_noio_restore(noio_flag); } else { retval = __rpm_callback(cb, dev); } /* * Since -EACCES means that runtime PM is disabled for the given device, * it should not be returned by runtime PM callbacks. If it is returned * nevertheless, assume it to be a transient error and convert it to * -EAGAIN. */ if (retval == -EACCES) retval = -EAGAIN; if (retval != -EAGAIN && retval != -EBUSY) dev->power.runtime_error = retval; return retval; } /** * rpm_idle - Notify device bus type if the device can be suspended. * @dev: Device to notify the bus type about. * @rpmflags: Flag bits. * * Check if the device's runtime PM status allows it to be suspended. If * another idle notification has been started earlier, return immediately. If * the RPM_ASYNC flag is set then queue an idle-notification request; otherwise * run the ->runtime_idle() callback directly. If the ->runtime_idle callback * doesn't exist or if it returns 0, call rpm_suspend with the RPM_AUTO flag. * * This function must be called under dev->power.lock with interrupts disabled. */ static int rpm_idle(struct device *dev, int rpmflags) { int (*callback)(struct device *); int retval; trace_rpm_idle(dev, rpmflags); retval = rpm_check_suspend_allowed(dev); if (retval < 0) ; /* Conditions are wrong. */ /* Idle notifications are allowed only in the RPM_ACTIVE state. */ else if (dev->power.runtime_status != RPM_ACTIVE) retval = -EAGAIN; /* * Any pending request other than an idle notification takes * precedence over us, except that the timer may be running. */ else if (dev->power.request_pending && dev->power.request > RPM_REQ_IDLE) retval = -EAGAIN; /* Act as though RPM_NOWAIT is always set. */ else if (dev->power.idle_notification) retval = -EINPROGRESS; if (retval) goto out; /* Pending requests need to be canceled. */ dev->power.request = RPM_REQ_NONE; callback = RPM_GET_CALLBACK(dev, runtime_idle); /* If no callback assume success. */ if (!callback || dev->power.no_callbacks) goto out; /* Carry out an asynchronous or a synchronous idle notification. */ if (rpmflags & RPM_ASYNC) { dev->power.request = RPM_REQ_IDLE; if (!dev->power.request_pending) { dev->power.request_pending = true; queue_work(pm_wq, &dev->power.work); } trace_rpm_return_int(dev, _THIS_IP_, 0); return 0; } dev->power.idle_notification = true; if (dev->power.irq_safe) spin_unlock(&dev->power.lock); else spin_unlock_irq(&dev->power.lock); retval = callback(dev); if (dev->power.irq_safe) spin_lock(&dev->power.lock); else spin_lock_irq(&dev->power.lock); dev->power.idle_notification = false; wake_up_all(&dev->power.wait_queue); out: trace_rpm_return_int(dev, _THIS_IP_, retval); return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO); } /** * rpm_suspend - Carry out runtime suspend of given device. * @dev: Device to suspend. * @rpmflags: Flag bits. * * Check if the device's runtime PM status allows it to be suspended. * Cancel a pending idle notification, autosuspend or suspend. If * another suspend has been started earlier, either return immediately * or wait for it to finish, depending on the RPM_NOWAIT and RPM_ASYNC * flags. If the RPM_ASYNC flag is set then queue a suspend request; * otherwise run the ->runtime_suspend() callback directly. When * ->runtime_suspend succeeded, if a deferred resume was requested while * the callback was running then carry it out, otherwise send an idle * notification for its parent (if the suspend succeeded and both * ignore_children of parent->power and irq_safe of dev->power are not set). * If ->runtime_suspend failed with -EAGAIN or -EBUSY, and if the RPM_AUTO * flag is set and the next autosuspend-delay expiration time is in the * future, schedule another autosuspend attempt. * * This function must be called under dev->power.lock with interrupts disabled. */ static int rpm_suspend(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { int (*callback)(struct device *); struct device *parent = NULL; int retval; trace_rpm_suspend(dev, rpmflags); repeat: retval = rpm_check_suspend_allowed(dev); if (retval < 0) goto out; /* Conditions are wrong. */ /* Synchronous suspends are not allowed in the RPM_RESUMING state. */ if (dev->power.runtime_status == RPM_RESUMING && !(rpmflags & RPM_ASYNC)) retval = -EAGAIN; if (retval) goto out; /* If the autosuspend_delay time hasn't expired yet, reschedule. */ if ((rpmflags & RPM_AUTO) && dev->power.runtime_status != RPM_SUSPENDING) { u64 expires = pm_runtime_autosuspend_expiration(dev); if (expires != 0) { /* Pending requests need to be canceled. */ dev->power.request = RPM_REQ_NONE; /* * Optimization: If the timer is already running and is * set to expire at or before the autosuspend delay, * avoid the overhead of resetting it. Just let it * expire; pm_suspend_timer_fn() will take care of the * rest. */ if (!(dev->power.timer_expires && dev->power.timer_expires <= expires)) { /* * We add a slack of 25% to gather wakeups * without sacrificing the granularity. */ u64 slack = (u64)READ_ONCE(dev->power.autosuspend_delay) * (NSEC_PER_MSEC >> 2); dev->power.timer_expires = expires; hrtimer_start_range_ns(&dev->power.suspend_timer, ns_to_ktime(expires), slack, HRTIMER_MODE_ABS); } dev->power.timer_autosuspends = 1; goto out; } } /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); if (dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { retval = -EINPROGRESS; goto out; } if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); cpu_relax(); spin_lock(&dev->power.lock); goto repeat; } /* Wait for the other suspend running in parallel with us. */ for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, TASK_UNINTERRUPTIBLE); if (dev->power.runtime_status != RPM_SUSPENDING) break; spin_unlock_irq(&dev->power.lock); schedule(); spin_lock_irq(&dev->power.lock); } finish_wait(&dev->power.wait_queue, &wait); goto repeat; } if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ /* Carry out an asynchronous or a synchronous suspend. */ if (rpmflags & RPM_ASYNC) { dev->power.request = (rpmflags & RPM_AUTO) ? RPM_REQ_AUTOSUSPEND : RPM_REQ_SUSPEND; if (!dev->power.request_pending) { dev->power.request_pending = true; queue_work(pm_wq, &dev->power.work); } goto out; } __update_runtime_status(dev, RPM_SUSPENDING); callback = RPM_GET_CALLBACK(dev, runtime_suspend); dev_pm_enable_wake_irq_check(dev, true); retval = rpm_callback(callback, dev); if (retval) goto fail; dev_pm_enable_wake_irq_complete(dev); no_callback: __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_deactivate_timer(dev); if (dev->parent) { parent = dev->parent; atomic_add_unless(&parent->power.child_count, -1, 0); } wake_up_all(&dev->power.wait_queue); if (dev->power.deferred_resume) { dev->power.deferred_resume = false; rpm_resume(dev, 0); retval = -EAGAIN; goto out; } if (dev->power.irq_safe) goto out; /* Maybe the parent is now able to suspend. */ if (parent && !parent->power.ignore_children) { spin_unlock(&dev->power.lock); spin_lock(&parent->power.lock); rpm_idle(parent, RPM_ASYNC); spin_unlock(&parent->power.lock); spin_lock(&dev->power.lock); } /* Maybe the suppliers are now able to suspend. */ if (dev->power.links_count > 0) { spin_unlock_irq(&dev->power.lock); rpm_suspend_suppliers(dev); spin_lock_irq(&dev->power.lock); } out: trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; fail: dev_pm_disable_wake_irq_check(dev, true); __update_runtime_status(dev, RPM_ACTIVE); dev->power.deferred_resume = false; wake_up_all(&dev->power.wait_queue); /* * On transient errors, if the callback routine failed an autosuspend, * and if the last_busy time has been updated so that there is a new * autosuspend expiration time, automatically reschedule another * autosuspend. */ if (!dev->power.runtime_error && (rpmflags & RPM_AUTO) && pm_runtime_autosuspend_expiration(dev) != 0) goto repeat; pm_runtime_cancel_pending(dev); goto out; } /** * rpm_resume - Carry out runtime resume of given device. * @dev: Device to resume. * @rpmflags: Flag bits. * * Check if the device's runtime PM status allows it to be resumed. Cancel * any scheduled or pending requests. If another resume has been started * earlier, either return immediately or wait for it to finish, depending on the * RPM_NOWAIT and RPM_ASYNC flags. Similarly, if there's a suspend running in * parallel with this function, either tell the other process to resume after * suspending (deferred_resume) or wait for it to finish. If the RPM_ASYNC * flag is set then queue a resume request; otherwise run the * ->runtime_resume() callback directly. Queue an idle notification for the * device if the resume succeeded. * * This function must be called under dev->power.lock with interrupts disabled. */ static int rpm_resume(struct device *dev, int rpmflags) __releases(&dev->power.lock) __acquires(&dev->power.lock) { int (*callback)(struct device *); struct device *parent = NULL; int retval = 0; trace_rpm_resume(dev, rpmflags); repeat: if (dev->power.runtime_error) { retval = -EINVAL; } else if (dev->power.disable_depth > 0) { if (dev->power.runtime_status == RPM_ACTIVE && dev->power.last_status == RPM_ACTIVE) retval = 1; else retval = -EACCES; } if (retval) goto out; /* * Other scheduled or pending requests need to be canceled. Small * optimization: If an autosuspend timer is running, leave it running * rather than cancelling it now only to restart it again in the near * future. */ dev->power.request = RPM_REQ_NONE; if (!dev->power.timer_autosuspends) pm_runtime_deactivate_timer(dev); if (dev->power.runtime_status == RPM_ACTIVE) { retval = 1; goto out; } if (dev->power.runtime_status == RPM_RESUMING || dev->power.runtime_status == RPM_SUSPENDING) { DEFINE_WAIT(wait); if (rpmflags & (RPM_ASYNC | RPM_NOWAIT)) { if (dev->power.runtime_status == RPM_SUSPENDING) { dev->power.deferred_resume = true; if (rpmflags & RPM_NOWAIT) retval = -EINPROGRESS; } else { retval = -EINPROGRESS; } goto out; } if (dev->power.irq_safe) { spin_unlock(&dev->power.lock); cpu_relax(); spin_lock(&dev->power.lock); goto repeat; } /* Wait for the operation carried out in parallel with us. */ for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, TASK_UNINTERRUPTIBLE); if (dev->power.runtime_status != RPM_RESUMING && dev->power.runtime_status != RPM_SUSPENDING) break; spin_unlock_irq(&dev->power.lock); schedule(); spin_lock_irq(&dev->power.lock); } finish_wait(&dev->power.wait_queue, &wait); goto repeat; } /* * See if we can skip waking up the parent. This is safe only if * power.no_callbacks is set, because otherwise we don't know whether * the resume will actually succeed. */ if (dev->power.no_callbacks && !parent && dev->parent) { spin_lock_nested(&dev->parent->power.lock, SINGLE_DEPTH_NESTING); if (dev->parent->power.disable_depth > 0 || dev->parent->power.ignore_children || dev->parent->power.runtime_status == RPM_ACTIVE) { atomic_inc(&dev->parent->power.child_count); spin_unlock(&dev->parent->power.lock); retval = 1; goto no_callback; /* Assume success. */ } spin_unlock(&dev->parent->power.lock); } /* Carry out an asynchronous or a synchronous resume. */ if (rpmflags & RPM_ASYNC) { dev->power.request = RPM_REQ_RESUME; if (!dev->power.request_pending) { dev->power.request_pending = true; queue_work(pm_wq, &dev->power.work); } retval = 0; goto out; } if (!parent && dev->parent) { /* * Increment the parent's usage counter and resume it if * necessary. Not needed if dev is irq-safe; then the * parent is permanently resumed. */ parent = dev->parent; if (dev->power.irq_safe) goto skip_parent; spin_unlock(&dev->power.lock); pm_runtime_get_noresume(parent); spin_lock(&parent->power.lock); /* * Resume the parent if it has runtime PM enabled and not been * set to ignore its children. */ if (!parent->power.disable_depth && !parent->power.ignore_children) { rpm_resume(parent, 0); if (parent->power.runtime_status != RPM_ACTIVE) retval = -EBUSY; } spin_unlock(&parent->power.lock); spin_lock(&dev->power.lock); if (retval) goto out; goto repeat; } skip_parent: if (dev->power.no_callbacks) goto no_callback; /* Assume success. */ __update_runtime_status(dev, RPM_RESUMING); callback = RPM_GET_CALLBACK(dev, runtime_resume); dev_pm_disable_wake_irq_check(dev, false); retval = rpm_callback(callback, dev); if (retval) { __update_runtime_status(dev, RPM_SUSPENDED); pm_runtime_cancel_pending(dev); dev_pm_enable_wake_irq_check(dev, false); } else { no_callback: __update_runtime_status(dev, RPM_ACTIVE); pm_runtime_mark_last_busy(dev); if (parent) atomic_inc(&parent->power.child_count); } wake_up_all(&dev->power.wait_queue); if (retval >= 0) rpm_idle(dev, RPM_ASYNC); out: if (parent && !dev->power.irq_safe) { spin_unlock_irq(&dev->power.lock); pm_runtime_put(parent); spin_lock_irq(&dev->power.lock); } trace_rpm_return_int(dev, _THIS_IP_, retval); return retval; } /** * pm_runtime_work - Universal runtime PM work function. * @work: Work structure used for scheduling the execution of this function. * * Use @work to get the device object the work is to be done for, determine what * is to be done and execute the appropriate runtime PM function. */ static void pm_runtime_work(struct work_struct *work) { struct device *dev = container_of(work, struct device, power.work); enum rpm_request req; spin_lock_irq(&dev->power.lock); if (!dev->power.request_pending) goto out; req = dev->power.request; dev->power.request = RPM_REQ_NONE; dev->power.request_pending = false; switch (req) { case RPM_REQ_NONE: break; case RPM_REQ_IDLE: rpm_idle(dev, RPM_NOWAIT); break; case RPM_REQ_SUSPEND: rpm_suspend(dev, RPM_NOWAIT); break; case RPM_REQ_AUTOSUSPEND: rpm_suspend(dev, RPM_NOWAIT | RPM_AUTO); break; case RPM_REQ_RESUME: rpm_resume(dev, RPM_NOWAIT); break; } out: spin_unlock_irq(&dev->power.lock); } /** * pm_suspend_timer_fn - Timer function for pm_schedule_suspend(). * @timer: hrtimer used by pm_schedule_suspend(). * * Check if the time is right and queue a suspend request. */ static enum hrtimer_restart pm_suspend_timer_fn(struct hrtimer *timer) { struct device *dev = container_of(timer, struct device, power.suspend_timer); unsigned long flags; u64 expires; spin_lock_irqsave(&dev->power.lock, flags); expires = dev->power.timer_expires; /* * If 'expires' is after the current time, we've been called * too early. */ if (expires > 0 && expires <= ktime_get_mono_fast_ns()) { dev->power.timer_expires = 0; rpm_suspend(dev, dev->power.timer_autosuspends ? (RPM_ASYNC | RPM_AUTO) : RPM_ASYNC); } spin_unlock_irqrestore(&dev->power.lock, flags); return HRTIMER_NORESTART; } /** * pm_schedule_suspend - Set up a timer to submit a suspend request in future. * @dev: Device to suspend. * @delay: Time to wait before submitting a suspend request, in milliseconds. */ int pm_schedule_suspend(struct device *dev, unsigned int delay) { unsigned long flags; u64 expires; int retval; spin_lock_irqsave(&dev->power.lock, flags); if (!delay) { retval = rpm_suspend(dev, RPM_ASYNC); goto out; } retval = rpm_check_suspend_allowed(dev); if (retval) goto out; /* Other scheduled or pending requests need to be canceled. */ pm_runtime_cancel_pending(dev); expires = ktime_get_mono_fast_ns() + (u64)delay * NSEC_PER_MSEC; dev->power.timer_expires = expires; dev->power.timer_autosuspends = 0; hrtimer_start(&dev->power.suspend_timer, expires, HRTIMER_MODE_ABS); out: spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } EXPORT_SYMBOL_GPL(pm_schedule_suspend); static int rpm_drop_usage_count(struct device *dev) { int ret; ret = atomic_sub_return(1, &dev->power.usage_count); if (ret >= 0) return ret; /* * Because rpm_resume() does not check the usage counter, it will resume * the device even if the usage counter is 0 or negative, so it is * sufficient to increment the usage counter here to reverse the change * made above. */ atomic_inc(&dev->power.usage_count); dev_warn(dev, "Runtime PM usage count underflow!\n"); return -EINVAL; } /** * __pm_runtime_idle - Entry point for runtime idle operations. * @dev: Device to send idle notification for. * @rpmflags: Flag bits. * * If the RPM_GET_PUT flag is set, decrement the device's usage count and * return immediately if it is larger than zero (if it becomes negative, log a * warning, increment it, and return an error). Then carry out an idle * notification, either synchronous or asynchronous. * * This routine may be called in atomic context if the RPM_ASYNC flag is set, * or if pm_runtime_irq_safe() has been called. */ int __pm_runtime_idle(struct device *dev, int rpmflags) { unsigned long flags; int retval; if (rpmflags & RPM_GET_PUT) { retval = rpm_drop_usage_count(dev); if (retval < 0) { return retval; } else if (retval > 0) { trace_rpm_usage(dev, rpmflags); return 0; } } might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); spin_lock_irqsave(&dev->power.lock, flags); retval = rpm_idle(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } EXPORT_SYMBOL_GPL(__pm_runtime_idle); /** * __pm_runtime_suspend - Entry point for runtime put/suspend operations. * @dev: Device to suspend. * @rpmflags: Flag bits. * * If the RPM_GET_PUT flag is set, decrement the device's usage count and * return immediately if it is larger than zero (if it becomes negative, log a * warning, increment it, and return an error). Then carry out a suspend, * either synchronous or asynchronous. * * This routine may be called in atomic context if the RPM_ASYNC flag is set, * or if pm_runtime_irq_safe() has been called. */ int __pm_runtime_suspend(struct device *dev, int rpmflags) { unsigned long flags; int retval; if (rpmflags & RPM_GET_PUT) { retval = rpm_drop_usage_count(dev); if (retval < 0) { return retval; } else if (retval > 0) { trace_rpm_usage(dev, rpmflags); return 0; } } might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe); spin_lock_irqsave(&dev->power.lock, flags); retval = rpm_suspend(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } EXPORT_SYMBOL_GPL(__pm_runtime_suspend); /** * __pm_runtime_resume - Entry point for runtime resume operations. * @dev: Device to resume. * @rpmflags: Flag bits. * * If the RPM_GET_PUT flag is set, increment the device's usage count. Then * carry out a resume, either synchronous or asynchronous. * * This routine may be called in atomic context if the RPM_ASYNC flag is set, * or if pm_runtime_irq_safe() has been called. */ int __pm_runtime_resume(struct device *dev, int rpmflags) { unsigned long flags; int retval; might_sleep_if(!(rpmflags & RPM_ASYNC) && !dev->power.irq_safe && dev->power.runtime_status != RPM_ACTIVE); if (rpmflags & RPM_GET_PUT) atomic_inc(&dev->power.usage_count); spin_lock_irqsave(&dev->power.lock, flags); retval = rpm_resume(dev, rpmflags); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } EXPORT_SYMBOL_GPL(__pm_runtime_resume); /** * pm_runtime_get_conditional - Conditionally bump up device usage counter. * @dev: Device to handle. * @ign_usage_count: Whether or not to look at the current usage counter value. * * Return -EINVAL if runtime PM is disabled for @dev. * * Otherwise, if its runtime PM status is %RPM_ACTIVE and (1) @ign_usage_count * is set, or (2) @dev is not ignoring children and its active child count is * nonero, or (3) the runtime PM usage counter of @dev is not zero, increment * the usage counter of @dev and return 1. * * Otherwise, return 0 without changing the usage counter. * * If @ign_usage_count is %true, this function can be used to prevent suspending * the device when its runtime PM status is %RPM_ACTIVE. * * If @ign_usage_count is %false, this function can be used to prevent * suspending the device when both its runtime PM status is %RPM_ACTIVE and its * runtime PM usage counter is not zero. * * The caller is responsible for decrementing the runtime PM usage counter of * @dev after this function has returned a positive value for it. */ static int pm_runtime_get_conditional(struct device *dev, bool ign_usage_count) { unsigned long flags; int retval; spin_lock_irqsave(&dev->power.lock, flags); if (dev->power.disable_depth > 0) { retval = -EINVAL; } else if (dev->power.runtime_status != RPM_ACTIVE) { retval = 0; } else if (ign_usage_count || (!dev->power.ignore_children && atomic_read(&dev->power.child_count) > 0)) { retval = 1; atomic_inc(&dev->power.usage_count); } else { retval = atomic_inc_not_zero(&dev->power.usage_count); } trace_rpm_usage(dev, 0); spin_unlock_irqrestore(&dev->power.lock, flags); return retval; } /** * pm_runtime_get_if_active - Bump up runtime PM usage counter if the device is * in active state * @dev: Target device. * * Increment the runtime PM usage counter of @dev if its runtime PM status is * %RPM_ACTIVE, in which case it returns 1. If the device is in a different * state, 0 is returned. -EINVAL is returned if runtime PM is disabled for the * device, in which case also the usage_count will remain unmodified. */ int pm_runtime_get_if_active(struct device *dev) { return pm_runtime_get_conditional(dev, true); } EXPORT_SYMBOL_GPL(pm_runtime_get_if_active); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. * @dev: Target device. * * Increment the runtime PM usage counter of @dev if its runtime PM status is * %RPM_ACTIVE and its runtime PM usage counter is greater than 0 or it is not * ignoring children and its active child count is nonzero. 1 is returned in * this case. * * If @dev is in a different state or it is not in use (that is, its usage * counter is 0, or it is ignoring children, or its active child count is 0), * 0 is returned. * * -EINVAL is returned if runtime PM is disabled for the device, in which case * also the usage counter of @dev is not updated. */ int pm_runtime_get_if_in_use(struct device *dev) { return pm_runtime_get_conditional(dev, false); } EXPORT_SYMBOL_GPL(pm_runtime_get_if_in_use); /** * __pm_runtime_set_status - Set runtime PM status of a device. * @dev: Device to handle. * @status: New runtime PM status of the device. * * If runtime PM of the device is disabled or its power.runtime_error field is * different from zero, the status may be changed either to RPM_ACTIVE, or to * RPM_SUSPENDED, as long as that reflects the actual state of the device. * However, if the device has a parent and the parent is not active, and the * parent's power.ignore_children flag is unset, the device's status cannot be * set to RPM_ACTIVE, so -EBUSY is returned in that case. * * If successful, __pm_runtime_set_status() clears the power.runtime_error field * and the device parent's counter of unsuspended children is modified to * reflect the new status. If the new status is RPM_SUSPENDED, an idle * notification request for the parent is submitted. * * If @dev has any suppliers (as reflected by device links to them), and @status * is RPM_ACTIVE, they will be activated upfront and if the activation of one * of them fails, the status of @dev will be changed to RPM_SUSPENDED (instead * of the @status value) and the suppliers will be deacticated on exit. The * error returned by the failing supplier activation will be returned in that * case. */ int __pm_runtime_set_status(struct device *dev, unsigned int status) { struct device *parent = dev->parent; bool notify_parent = false; unsigned long flags; int error = 0; if (status != RPM_ACTIVE && status != RPM_SUSPENDED) return -EINVAL; spin_lock_irqsave(&dev->power.lock, flags); /* * Prevent PM-runtime from being enabled for the device or return an * error if it is enabled already and working. */ if (dev->power.runtime_error || dev->power.disable_depth) dev->power.disable_depth++; else error = -EAGAIN; spin_unlock_irqrestore(&dev->power.lock, flags); if (error) return error; /* * If the new status is RPM_ACTIVE, the suppliers can be activated * upfront regardless of the current status, because next time * rpm_put_suppliers() runs, the rpm_active refcounts of the links * involved will be dropped down to one anyway. */ if (status == RPM_ACTIVE) { int idx = device_links_read_lock(); error = rpm_get_suppliers(dev); if (error) status = RPM_SUSPENDED; device_links_read_unlock(idx); } spin_lock_irqsave(&dev->power.lock, flags); if (dev->power.runtime_status == status || !parent) goto out_set; if (status == RPM_SUSPENDED) { atomic_add_unless(&parent->power.child_count, -1, 0); notify_parent = !parent->power.ignore_children; } else { spin_lock_nested(&parent->power.lock, SINGLE_DEPTH_NESTING); /* * It is invalid to put an active child under a parent that is * not active, has runtime PM enabled and the * 'power.ignore_children' flag unset. */ if (!parent->power.disable_depth && !parent->power.ignore_children && parent->power.runtime_status != RPM_ACTIVE) { dev_err(dev, "runtime PM trying to activate child device %s but parent (%s) is not active\n", dev_name(dev), dev_name(parent)); error = -EBUSY; } else if (dev->power.runtime_status == RPM_SUSPENDED) { atomic_inc(&parent->power.child_count); } spin_unlock(&parent->power.lock); if (error) { status = RPM_SUSPENDED; goto out; } } out_set: __update_runtime_status(dev, status); if (!error) dev->power.runtime_error = 0; out: spin_unlock_irqrestore(&dev->power.lock, flags); if (notify_parent) pm_request_idle(parent); if (status == RPM_SUSPENDED) { int idx = device_links_read_lock(); rpm_put_suppliers(dev); device_links_read_unlock(idx); } pm_runtime_enable(dev); return error; } EXPORT_SYMBOL_GPL(__pm_runtime_set_status); /** * __pm_runtime_barrier - Cancel pending requests and wait for completions. * @dev: Device to handle. * * Flush all pending requests for the device from pm_wq and wait for all * runtime PM operations involving the device in progress to complete. * * Should be called under dev->power.lock with interrupts disabled. */ static void __pm_runtime_barrier(struct device *dev) { pm_runtime_deactivate_timer(dev); if (dev->power.request_pending) { dev->power.request = RPM_REQ_NONE; spin_unlock_irq(&dev->power.lock); cancel_work_sync(&dev->power.work); spin_lock_irq(&dev->power.lock); dev->power.request_pending = false; } if (dev->power.runtime_status == RPM_SUSPENDING || dev->power.runtime_status == RPM_RESUMING || dev->power.idle_notification) { DEFINE_WAIT(wait); /* Suspend, wake-up or idle notification in progress. */ for (;;) { prepare_to_wait(&dev->power.wait_queue, &wait, TASK_UNINTERRUPTIBLE); if (dev->power.runtime_status != RPM_SUSPENDING && dev->power.runtime_status != RPM_RESUMING && !dev->power.idle_notification) break; spin_unlock_irq(&dev->power.lock); schedule(); spin_lock_irq(&dev->power.lock); } finish_wait(&dev->power.wait_queue, &wait); } } /** * pm_runtime_barrier - Flush pending requests and wait for completions. * @dev: Device to handle. * * Prevent the device from being suspended by incrementing its usage counter and * if there's a pending resume request for the device, wake the device up. * Next, make sure that all pending requests for the device have been flushed * from pm_wq and wait for all runtime PM operations involving the device in * progress to complete. * * Return value: * 1, if there was a resume request pending and the device had to be woken up, * 0, otherwise */ int pm_runtime_barrier(struct device *dev) { int retval = 0; pm_runtime_get_noresume(dev); spin_lock_irq(&dev->power.lock); if (dev->power.request_pending && dev->power.request == RPM_REQ_RESUME) { rpm_resume(dev, 0); retval = 1; } __pm_runtime_barrier(dev); spin_unlock_irq(&dev->power.lock); pm_runtime_put_noidle(dev); return retval; } EXPORT_SYMBOL_GPL(pm_runtime_barrier); bool pm_runtime_block_if_disabled(struct device *dev) { bool ret; spin_lock_irq(&dev->power.lock); ret = !pm_runtime_enabled(dev); if (ret && dev->power.last_status == RPM_INVALID) dev->power.last_status = RPM_BLOCKED; spin_unlock_irq(&dev->power.lock); return ret; } void pm_runtime_unblock(struct device *dev) { spin_lock_irq(&dev->power.lock); if (dev->power.last_status == RPM_BLOCKED) dev->power.last_status = RPM_INVALID; spin_unlock_irq(&dev->power.lock); } void __pm_runtime_disable(struct device *dev, bool check_resume) { spin_lock_irq(&dev->power.lock); if (dev->power.disable_depth > 0) { dev->power.disable_depth++; goto out; } /* * Wake up the device if there's a resume request pending, because that * means there probably is some I/O to process and disabling runtime PM * shouldn't prevent the device from processing the I/O. */ if (check_resume && dev->power.request_pending && dev->power.request == RPM_REQ_RESUME) { /* * Prevent suspends and idle notifications from being carried * out after we have woken up the device. */ pm_runtime_get_noresume(dev); rpm_resume(dev, 0); pm_runtime_put_noidle(dev); } /* Update time accounting before disabling PM-runtime. */ update_pm_runtime_accounting(dev); if (!dev->power.disable_depth++) { __pm_runtime_barrier(dev); dev->power.last_status = dev->power.runtime_status; } out: spin_unlock_irq(&dev->power.lock); } EXPORT_SYMBOL_GPL(__pm_runtime_disable); /** * pm_runtime_enable - Enable runtime PM of a device. * @dev: Device to handle. */ void pm_runtime_enable(struct device *dev) { unsigned long flags; spin_lock_irqsave(&dev->power.lock, flags); if (!dev->power.disable_depth) { dev_warn(dev, "Unbalanced %s!\n", __func__); goto out; } if (--dev->power.disable_depth > 0) goto out; if (dev->power.last_status == RPM_BLOCKED) { dev_warn(dev, "Attempt to enable runtime PM when it is blocked\n"); dump_stack(); } dev->power.last_status = RPM_INVALID; dev->power.accounting_timestamp = ktime_get_mono_fast_ns(); if (dev->power.runtime_status == RPM_SUSPENDED && !dev->power.ignore_children && atomic_read(&dev->power.child_count) > 0) dev_warn(dev, "Enabling runtime PM for inactive device with active children\n"); out: spin_unlock_irqrestore(&dev->power.lock, flags); } EXPORT_SYMBOL_GPL(pm_runtime_enable); static void pm_runtime_set_suspended_action(void *data) { pm_runtime_set_suspended(data); } /** * devm_pm_runtime_set_active_enabled - set_active version of devm_pm_runtime_enable. * * @dev: Device to handle. */ int devm_pm_runtime_set_active_enabled(struct device *dev) { int err; err = pm_runtime_set_active(dev); if (err) return err; err = devm_add_action_or_reset(dev, pm_runtime_set_suspended_action, dev); if (err) return err; return devm_pm_runtime_enable(dev); } EXPORT_SYMBOL_GPL(devm_pm_runtime_set_active_enabled); static void pm_runtime_disable_action(void *data) { pm_runtime_dont_use_autosuspend(data); pm_runtime_disable(data); } /** * devm_pm_runtime_enable - devres-enabled version of pm_runtime_enable. * * NOTE: this will also handle calling pm_runtime_dont_use_autosuspend() for * you at driver exit time if needed. * * @dev: Device to handle. */ int devm_pm_runtime_enable(struct device *dev) { pm_runtime_enable(dev); return devm_add_action_or_reset(dev, pm_runtime_disable_action, dev); } EXPORT_SYMBOL_GPL(devm_pm_runtime_enable); static void pm_runtime_put_noidle_action(void *data) { pm_runtime_put_noidle(data); } /** * devm_pm_runtime_get_noresume - devres-enabled version of pm_runtime_get_noresume. * * @dev: Device to handle. */ int devm_pm_runtime_get_noresume(struct device *dev) { pm_runtime_get_noresume(dev); return devm_add_action_or_reset(dev, pm_runtime_put_noidle_action, dev); } EXPORT_SYMBOL_GPL(devm_pm_runtime_get_noresume); /** * pm_runtime_forbid - Block runtime PM of a device. * @dev: Device to handle. * * Increase the device's usage count and clear its power.runtime_auto flag, * so that it cannot be suspended at run time until pm_runtime_allow() is called * for it. */ void pm_runtime_forbid(struct device *dev) { spin_lock_irq(&dev->power.lock); if (!dev->power.runtime_auto) goto out; dev->power.runtime_auto = false; atomic_inc(&dev->power.usage_count); rpm_resume(dev, 0); out: spin_unlock_irq(&dev->power.lock); } EXPORT_SYMBOL_GPL(pm_runtime_forbid); /** * pm_runtime_allow - Unblock runtime PM of a device. * @dev: Device to handle. * * Decrease the device's usage count and set its power.runtime_auto flag. */ void pm_runtime_allow(struct device *dev) { int ret; spin_lock_irq(&dev->power.lock); if (dev->power.runtime_auto) goto out; dev->power.runtime_auto = true; ret = rpm_drop_usage_count(dev); if (ret == 0) rpm_idle(dev, RPM_AUTO | RPM_ASYNC); else if (ret > 0) trace_rpm_usage(dev, RPM_AUTO | RPM_ASYNC); out: spin_unlock_irq(&dev->power.lock); } EXPORT_SYMBOL_GPL(pm_runtime_allow); /** * pm_runtime_no_callbacks - Ignore runtime PM callbacks for a device. * @dev: Device to handle. * * Set the power.no_callbacks flag, which tells the PM core that this * device is power-managed through its parent and has no runtime PM * callbacks of its own. The runtime sysfs attributes will be removed. */ void pm_runtime_no_callbacks(struct device *dev) { spin_lock_irq(&dev->power.lock); dev->power.no_callbacks = 1; spin_unlock_irq(&dev->power.lock); if (device_is_registered(dev)) rpm_sysfs_remove(dev); } EXPORT_SYMBOL_GPL(pm_runtime_no_callbacks); /** * pm_runtime_irq_safe - Leave interrupts disabled during callbacks. * @dev: Device to handle * * Set the power.irq_safe flag, which tells the PM core that the * ->runtime_suspend() and ->runtime_resume() callbacks for this device should * always be invoked with the spinlock held and interrupts disabled. It also * causes the parent's usage counter to be permanently incremented, preventing * the parent from runtime suspending -- otherwise an irq-safe child might have * to wait for a non-irq-safe parent. */ void pm_runtime_irq_safe(struct device *dev) { if (dev->parent) pm_runtime_get_sync(dev->parent); spin_lock_irq(&dev->power.lock); dev->power.irq_safe = 1; spin_unlock_irq(&dev->power.lock); } EXPORT_SYMBOL_GPL(pm_runtime_irq_safe); /** * update_autosuspend - Handle a change to a device's autosuspend settings. * @dev: Device to handle. * @old_delay: The former autosuspend_delay value. * @old_use: The former use_autosuspend value. * * Prevent runtime suspend if the new delay is negative and use_autosuspend is * set; otherwise allow it. Send an idle notification if suspends are allowed. * * This function must be called under dev->power.lock with interrupts disabled. */ static void update_autosuspend(struct device *dev, int old_delay, int old_use) { int delay = dev->power.autosuspend_delay; /* Should runtime suspend be prevented now? */ if (dev->power.use_autosuspend && delay < 0) { /* If it used to be allowed then prevent it. */ if (!old_use || old_delay >= 0) { atomic_inc(&dev->power.usage_count); rpm_resume(dev, 0); } else { trace_rpm_usage(dev, 0); } } /* Runtime suspend should be allowed now. */ else { /* If it used to be prevented then allow it. */ if (old_use && old_delay < 0) atomic_dec(&dev->power.usage_count); /* Maybe we can autosuspend now. */ rpm_idle(dev, RPM_AUTO); } } /** * pm_runtime_set_autosuspend_delay - Set a device's autosuspend_delay value. * @dev: Device to handle. * @delay: Value of the new delay in milliseconds. * * Set the device's power.autosuspend_delay value. If it changes to negative * and the power.use_autosuspend flag is set, prevent runtime suspends. If it * changes the other way, allow runtime suspends. */ void pm_runtime_set_autosuspend_delay(struct device *dev, int delay) { int old_delay, old_use; spin_lock_irq(&dev->power.lock); old_delay = dev->power.autosuspend_delay; old_use = dev->power.use_autosuspend; dev->power.autosuspend_delay = delay; update_autosuspend(dev, old_delay, old_use); spin_unlock_irq(&dev->power.lock); } EXPORT_SYMBOL_GPL(pm_runtime_set_autosuspend_delay); /** * __pm_runtime_use_autosuspend - Set a device's use_autosuspend flag. * @dev: Device to handle. * @use: New value for use_autosuspend. * * Set the device's power.use_autosuspend flag, and allow or prevent runtime * suspends as needed. */ void __pm_runtime_use_autosuspend(struct device *dev, bool use) { int old_delay, old_use; spin_lock_irq(&dev->power.lock); old_delay = dev->power.autosuspend_delay; old_use = dev->power.use_autosuspend; dev->power.use_autosuspend = use; update_autosuspend(dev, old_delay, old_use); spin_unlock_irq(&dev->power.lock); } EXPORT_SYMBOL_GPL(__pm_runtime_use_autosuspend); /** * pm_runtime_init - Initialize runtime PM fields in given device object. * @dev: Device object to initialize. */ void pm_runtime_init(struct device *dev) { dev->power.runtime_status = RPM_SUSPENDED; dev->power.last_status = RPM_INVALID; dev->power.idle_notification = false; dev->power.disable_depth = 1; atomic_set(&dev->power.usage_count, 0); dev->power.runtime_error = 0; atomic_set(&dev->power.child_count, 0); pm_suspend_ignore_children(dev, false); dev->power.runtime_auto = true; dev->power.request_pending = false; dev->power.request = RPM_REQ_NONE; dev->power.deferred_resume = false; dev->power.needs_force_resume = false; INIT_WORK(&dev->power.work, pm_runtime_work); dev->power.timer_expires = 0; hrtimer_setup(&dev->power.suspend_timer, pm_suspend_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); init_waitqueue_head(&dev->power.wait_queue); } /** * pm_runtime_reinit - Re-initialize runtime PM fields in given device object. * @dev: Device object to re-initialize. */ void pm_runtime_reinit(struct device *dev) { if (!pm_runtime_enabled(dev)) { if (dev->power.runtime_status == RPM_ACTIVE) pm_runtime_set_suspended(dev); if (dev->power.irq_safe) { spin_lock_irq(&dev->power.lock); dev->power.irq_safe = 0; spin_unlock_irq(&dev->power.lock); if (dev->parent) pm_runtime_put(dev->parent); } } /* * Clear power.needs_force_resume in case it has been set by * pm_runtime_force_suspend() invoked from a driver remove callback. */ dev->power.needs_force_resume = false; } /** * pm_runtime_remove - Prepare for removing a device from device hierarchy. * @dev: Device object being removed from device hierarchy. */ void pm_runtime_remove(struct device *dev) { __pm_runtime_disable(dev, false); pm_runtime_reinit(dev); } /** * pm_runtime_get_suppliers - Resume and reference-count supplier devices. * @dev: Consumer device. */ void pm_runtime_get_suppliers(struct device *dev) { struct device_link *link; int idx; idx = device_links_read_lock(); list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) if (device_link_test(link, DL_FLAG_PM_RUNTIME)) { link->supplier_preactivated = true; pm_runtime_get_sync(link->supplier); } device_links_read_unlock(idx); } /** * pm_runtime_put_suppliers - Drop references to supplier devices. * @dev: Consumer device. */ void pm_runtime_put_suppliers(struct device *dev) { struct device_link *link; int idx; idx = device_links_read_lock(); list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, device_links_read_lock_held()) if (link->supplier_preactivated) { link->supplier_preactivated = false; pm_runtime_put(link->supplier); } device_links_read_unlock(idx); } void pm_runtime_new_link(struct device *dev) { spin_lock_irq(&dev->power.lock); dev->power.links_count++; spin_unlock_irq(&dev->power.lock); } static void pm_runtime_drop_link_count(struct device *dev) { spin_lock_irq(&dev->power.lock); WARN_ON(dev->power.links_count == 0); dev->power.links_count--; spin_unlock_irq(&dev->power.lock); } /** * pm_runtime_drop_link - Prepare for device link removal. * @link: Device link going away. * * Drop the link count of the consumer end of @link and decrement the supplier * device's runtime PM usage counter as many times as needed to drop all of the * PM runtime reference to it from the consumer. */ void pm_runtime_drop_link(struct device_link *link) { if (!device_link_test(link, DL_FLAG_PM_RUNTIME)) return; pm_runtime_drop_link_count(link->consumer); pm_runtime_release_supplier(link); pm_request_idle(link->supplier); } static pm_callback_t get_callback(struct device *dev, size_t cb_offset) { /* * Setting power.strict_midlayer means that the middle layer * code does not want its runtime PM callbacks to be invoked via * pm_runtime_force_suspend() and pm_runtime_force_resume(), so * return a direct pointer to the driver callback in that case. */ if (dev_pm_strict_midlayer_is_set(dev)) return __rpm_get_driver_callback(dev, cb_offset); return __rpm_get_callback(dev, cb_offset); } #define GET_CALLBACK(dev, callback) \ get_callback(dev, offsetof(struct dev_pm_ops, callback)) /** * pm_runtime_force_suspend - Force a device into suspend state if needed. * @dev: Device to suspend. * * Disable runtime PM so we safely can check the device's runtime PM status and * if it is active, invoke its ->runtime_suspend callback to suspend it and * change its runtime PM status field to RPM_SUSPENDED. Also, if the device's * usage and children counters don't indicate that the device was in use before * the system-wide transition under way, decrement its parent's children counter * (if there is a parent). Keep runtime PM disabled to preserve the state * unless we encounter errors. * * Typically this function may be invoked from a system suspend callback to make * sure the device is put into low power state and it should only be used during * system-wide PM transitions to sleep states. It assumes that the analogous * pm_runtime_force_resume() will be used to resume the device. */ int pm_runtime_force_suspend(struct device *dev) { int (*callback)(struct device *); int ret; pm_runtime_disable(dev); if (pm_runtime_status_suspended(dev) || dev->power.needs_force_resume) return 0; callback = GET_CALLBACK(dev, runtime_suspend); dev_pm_enable_wake_irq_check(dev, true); ret = callback ? callback(dev) : 0; if (ret) goto err; dev_pm_enable_wake_irq_complete(dev); /* * If the device can stay in suspend after the system-wide transition * to the working state that will follow, drop the children counter of * its parent and the usage counters of its suppliers. Otherwise, set * power.needs_force_resume to let pm_runtime_force_resume() know that * the device needs to be taken care of and to prevent this function * from handling the device again in case the device is passed to it * once more subsequently. */ if (pm_runtime_need_not_resume(dev)) pm_runtime_set_suspended(dev); else dev->power.needs_force_resume = true; return 0; err: dev_pm_disable_wake_irq_check(dev, true); pm_runtime_enable(dev); return ret; } EXPORT_SYMBOL_GPL(pm_runtime_force_suspend); #ifdef CONFIG_PM_SLEEP /** * pm_runtime_force_resume - Force a device into resume state if needed. * @dev: Device to resume. * * This function expects that either pm_runtime_force_suspend() has put the * device into a low-power state prior to calling it, or the device had been * runtime-suspended before the preceding system-wide suspend transition and it * was left in suspend during that transition. * * The actions carried out by pm_runtime_force_suspend(), or by a runtime * suspend in general, are reversed and the device is brought back into full * power if it is expected to be used on system resume, which is the case when * its needs_force_resume flag is set or when its smart_suspend flag is set and * its runtime PM status is "active". * * In other cases, the resume is deferred to be managed via runtime PM. * * Typically, this function may be invoked from a system resume callback. */ int pm_runtime_force_resume(struct device *dev) { int (*callback)(struct device *); int ret = 0; if (!dev->power.needs_force_resume && (!dev_pm_smart_suspend(dev) || pm_runtime_status_suspended(dev))) goto out; callback = GET_CALLBACK(dev, runtime_resume); dev_pm_disable_wake_irq_check(dev, false); ret = callback ? callback(dev) : 0; if (ret) { pm_runtime_set_suspended(dev); dev_pm_enable_wake_irq_check(dev, false); goto out; } pm_runtime_mark_last_busy(dev); out: /* * The smart_suspend flag can be cleared here because it is not going * to be necessary until the next system-wide suspend transition that * will update it again. */ dev->power.smart_suspend = false; /* * Also clear needs_force_resume to make this function skip devices that * have been seen by it once. */ dev->power.needs_force_resume = false; pm_runtime_enable(dev); return ret; } EXPORT_SYMBOL_GPL(pm_runtime_force_resume); bool pm_runtime_need_not_resume(struct device *dev) { return atomic_read(&dev->power.usage_count) <= 1 && (atomic_read(&dev->power.child_count) == 0 || dev->power.ignore_children); } #endif /* CONFIG_PM_SLEEP */
12 12 32 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 // SPDX-License-Identifier: GPL-2.0+ // // Empiatech em28x1 audio extension // // Copyright (C) 2006 Markus Rechberger <mrechberger@gmail.com> // // Copyright (C) 2007-2016 Mauro Carvalho Chehab // - Port to work with the in-kernel driver // - Cleanups, fixes, alsa-controls, etc. // // This driver is based on my previous au600 usb pstn audio driver // and inherits all the copyrights #include "em28xx.h" #include <linux/kernel.h> #include <linux/usb.h> #include <linux/init.h> #include <linux/sound.h> #include <linux/spinlock.h> #include <linux/soundcard.h> #include <linux/slab.h> #include <linux/module.h> #include <sound/core.h> #include <sound/pcm.h> #include <sound/pcm_params.h> #include <sound/info.h> #include <sound/initval.h> #include <sound/control.h> #include <sound/tlv.h> #include <sound/ac97_codec.h> #include <media/v4l2-common.h> static int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "activates debug info"); #define EM28XX_MAX_AUDIO_BUFS 5 #define EM28XX_MIN_AUDIO_PACKETS 64 #define dprintk(fmt, arg...) do { \ if (debug) \ dev_printk(KERN_DEBUG, &dev->intf->dev, \ "video: %s: " fmt, __func__, ## arg); \ } while (0) static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; static int em28xx_deinit_isoc_audio(struct em28xx *dev) { int i; dprintk("Stopping isoc\n"); for (i = 0; i < dev->adev.num_urb; i++) { struct urb *urb = dev->adev.urb[i]; if (!irqs_disabled()) usb_kill_urb(urb); else usb_unlink_urb(urb); } return 0; } static void em28xx_audio_isocirq(struct urb *urb) { struct em28xx *dev = urb->context; int i; unsigned int oldptr; int period_elapsed = 0; int status; unsigned char *cp; unsigned int stride; struct snd_pcm_substream *substream; struct snd_pcm_runtime *runtime; if (dev->disconnected) { dprintk("device disconnected while streaming. URB status=%d.\n", urb->status); atomic_set(&dev->adev.stream_started, 0); return; } switch (urb->status) { case 0: /* success */ case -ETIMEDOUT: /* NAK */ break; case -ECONNRESET: /* kill */ case -ENOENT: case -ESHUTDOWN: return; default: /* error */ dprintk("urb completion error %d.\n", urb->status); break; } if (atomic_read(&dev->adev.stream_started) == 0) return; if (dev->adev.capture_pcm_substream) { substream = dev->adev.capture_pcm_substream; runtime = substream->runtime; stride = runtime->frame_bits >> 3; for (i = 0; i < urb->number_of_packets; i++) { unsigned long flags; int length = urb->iso_frame_desc[i].actual_length / stride; cp = (unsigned char *)urb->transfer_buffer + urb->iso_frame_desc[i].offset; if (!length) continue; oldptr = dev->adev.hwptr_done_capture; if (oldptr + length >= runtime->buffer_size) { unsigned int cnt = runtime->buffer_size - oldptr; memcpy(runtime->dma_area + oldptr * stride, cp, cnt * stride); memcpy(runtime->dma_area, cp + cnt * stride, length * stride - cnt * stride); } else { memcpy(runtime->dma_area + oldptr * stride, cp, length * stride); } snd_pcm_stream_lock_irqsave(substream, flags); dev->adev.hwptr_done_capture += length; if (dev->adev.hwptr_done_capture >= runtime->buffer_size) dev->adev.hwptr_done_capture -= runtime->buffer_size; dev->adev.capture_transfer_done += length; if (dev->adev.capture_transfer_done >= runtime->period_size) { dev->adev.capture_transfer_done -= runtime->period_size; period_elapsed = 1; } snd_pcm_stream_unlock_irqrestore(substream, flags); } if (period_elapsed) snd_pcm_period_elapsed(substream); } urb->status = 0; status = usb_submit_urb(urb, GFP_ATOMIC); if (status < 0) dev_err(&dev->intf->dev, "resubmit of audio urb failed (error=%i)\n", status); } static int em28xx_init_audio_isoc(struct em28xx *dev) { int i, err; dprintk("Starting isoc transfers\n"); /* Start streaming */ for (i = 0; i < dev->adev.num_urb; i++) { memset(dev->adev.transfer_buffer[i], 0x80, dev->adev.urb[i]->transfer_buffer_length); err = usb_submit_urb(dev->adev.urb[i], GFP_ATOMIC); if (err) { dev_err(&dev->intf->dev, "submit of audio urb failed (error=%i)\n", err); em28xx_deinit_isoc_audio(dev); atomic_set(&dev->adev.stream_started, 0); return err; } } return 0; } static const struct snd_pcm_hardware snd_em28xx_hw_capture = { .info = SNDRV_PCM_INFO_BLOCK_TRANSFER | SNDRV_PCM_INFO_MMAP | SNDRV_PCM_INFO_INTERLEAVED | SNDRV_PCM_INFO_BATCH | SNDRV_PCM_INFO_MMAP_VALID, .formats = SNDRV_PCM_FMTBIT_S16_LE, .rates = SNDRV_PCM_RATE_48000, .rate_min = 48000, .rate_max = 48000, .channels_min = 2, .channels_max = 2, .buffer_bytes_max = 62720 * 8, /* just about the value in usbaudio.c */ /* * The period is 12.288 bytes. Allow a 10% of variation along its * value, in order to avoid overruns/underruns due to some clock * drift. * * FIXME: This period assumes 64 packets, and a 48000 PCM rate. * Calculate it dynamically. */ .period_bytes_min = 11059, .period_bytes_max = 13516, .periods_min = 2, .periods_max = 98, /* 12544, */ }; static int snd_em28xx_capture_open(struct snd_pcm_substream *substream) { struct em28xx *dev = snd_pcm_substream_chip(substream); struct snd_pcm_runtime *runtime = substream->runtime; int nonblock, ret = 0; if (!dev) { pr_err("em28xx-audio: BUG: em28xx can't find device struct. Can't proceed with open\n"); return -ENODEV; } if (dev->disconnected) return -ENODEV; dprintk("opening device and trying to acquire exclusive lock\n"); nonblock = !!(substream->f_flags & O_NONBLOCK); if (nonblock) { if (!mutex_trylock(&dev->lock)) return -EAGAIN; } else { mutex_lock(&dev->lock); } runtime->hw = snd_em28xx_hw_capture; if (dev->adev.users == 0) { if (!dev->alt || dev->is_audio_only) { struct usb_device *udev; udev = interface_to_usbdev(dev->intf); if (dev->is_audio_only) /* audio is on a separate interface */ dev->alt = 1; else /* audio is on the same interface as video */ dev->alt = 7; /* * FIXME: The intention seems to be to select * the alt setting with the largest * wMaxPacketSize for the video endpoint. * At least dev->alt should be used instead, but * we should probably not touch it at all if it * is already >0, because wMaxPacketSize of the * audio endpoints seems to be the same for all. */ dprintk("changing alternate number on interface %d to %d\n", dev->ifnum, dev->alt); usb_set_interface(udev, dev->ifnum, dev->alt); } /* Sets volume, mute, etc */ dev->mute = 0; ret = em28xx_audio_analog_set(dev); if (ret < 0) goto err; } kref_get(&dev->ref); dev->adev.users++; mutex_unlock(&dev->lock); /* Dynamically adjust the period size */ snd_pcm_hw_constraint_integer(runtime, SNDRV_PCM_HW_PARAM_PERIODS); snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_PERIOD_BYTES, dev->adev.period * 95 / 100, dev->adev.period * 105 / 100); dev->adev.capture_pcm_substream = substream; return 0; err: mutex_unlock(&dev->lock); dev_err(&dev->intf->dev, "Error while configuring em28xx mixer\n"); return ret; } static int snd_em28xx_pcm_close(struct snd_pcm_substream *substream) { struct em28xx *dev = snd_pcm_substream_chip(substream); dprintk("closing device\n"); dev->mute = 1; mutex_lock(&dev->lock); dev->adev.users--; if (atomic_read(&dev->adev.stream_started) > 0) { atomic_set(&dev->adev.stream_started, 0); schedule_work(&dev->adev.wq_trigger); } em28xx_audio_analog_set(dev); mutex_unlock(&dev->lock); kref_put(&dev->ref, em28xx_free_device); return 0; } static int snd_em28xx_prepare(struct snd_pcm_substream *substream) { struct em28xx *dev = snd_pcm_substream_chip(substream); if (dev->disconnected) return -ENODEV; dev->adev.hwptr_done_capture = 0; dev->adev.capture_transfer_done = 0; return 0; } static void audio_trigger(struct work_struct *work) { struct em28xx_audio *adev = container_of(work, struct em28xx_audio, wq_trigger); struct em28xx *dev = container_of(adev, struct em28xx, adev); if (atomic_read(&adev->stream_started)) { dprintk("starting capture"); em28xx_init_audio_isoc(dev); } else { dprintk("stopping capture"); em28xx_deinit_isoc_audio(dev); } } static int snd_em28xx_capture_trigger(struct snd_pcm_substream *substream, int cmd) { struct em28xx *dev = snd_pcm_substream_chip(substream); int retval = 0; if (dev->disconnected) return -ENODEV; switch (cmd) { case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_START: atomic_set(&dev->adev.stream_started, 1); break; case SNDRV_PCM_TRIGGER_PAUSE_PUSH: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_STOP: atomic_set(&dev->adev.stream_started, 0); break; default: retval = -EINVAL; } schedule_work(&dev->adev.wq_trigger); return retval; } static snd_pcm_uframes_t snd_em28xx_capture_pointer(struct snd_pcm_substream *substream) { unsigned long flags; struct em28xx *dev; snd_pcm_uframes_t hwptr_done; dev = snd_pcm_substream_chip(substream); if (dev->disconnected) return SNDRV_PCM_POS_XRUN; spin_lock_irqsave(&dev->adev.slock, flags); hwptr_done = dev->adev.hwptr_done_capture; spin_unlock_irqrestore(&dev->adev.slock, flags); return hwptr_done; } /* * AC97 volume control support */ static int em28xx_vol_info(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_info *info) { struct em28xx *dev = snd_kcontrol_chip(kcontrol); if (dev->disconnected) return -ENODEV; info->type = SNDRV_CTL_ELEM_TYPE_INTEGER; info->count = 2; info->value.integer.min = 0; info->value.integer.max = 0x1f; return 0; } static int em28xx_vol_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *value) { struct em28xx *dev = snd_kcontrol_chip(kcontrol); struct snd_pcm_substream *substream = dev->adev.capture_pcm_substream; u16 val = (0x1f - (value->value.integer.value[0] & 0x1f)) | (0x1f - (value->value.integer.value[1] & 0x1f)) << 8; int nonblock = 0; int rc; if (dev->disconnected) return -ENODEV; if (substream) nonblock = !!(substream->f_flags & O_NONBLOCK); if (nonblock) { if (!mutex_trylock(&dev->lock)) return -EAGAIN; } else { mutex_lock(&dev->lock); } rc = em28xx_read_ac97(dev, kcontrol->private_value); if (rc < 0) goto err; val |= rc & 0x8000; /* Preserve the mute flag */ rc = em28xx_write_ac97(dev, kcontrol->private_value, val); if (rc < 0) goto err; dprintk("%sleft vol %d, right vol %d (0x%04x) to ac97 volume control 0x%04x\n", (val & 0x8000) ? "muted " : "", 0x1f - ((val >> 8) & 0x1f), 0x1f - (val & 0x1f), val, (int)kcontrol->private_value); err: mutex_unlock(&dev->lock); return rc; } static int em28xx_vol_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *value) { struct em28xx *dev = snd_kcontrol_chip(kcontrol); struct snd_pcm_substream *substream = dev->adev.capture_pcm_substream; int nonblock = 0; int val; if (dev->disconnected) return -ENODEV; if (substream) nonblock = !!(substream->f_flags & O_NONBLOCK); if (nonblock) { if (!mutex_trylock(&dev->lock)) return -EAGAIN; } else { mutex_lock(&dev->lock); } val = em28xx_read_ac97(dev, kcontrol->private_value); mutex_unlock(&dev->lock); if (val < 0) return val; dprintk("%sleft vol %d, right vol %d (0x%04x) from ac97 volume control 0x%04x\n", (val & 0x8000) ? "muted " : "", 0x1f - ((val >> 8) & 0x1f), 0x1f - (val & 0x1f), val, (int)kcontrol->private_value); value->value.integer.value[0] = 0x1f - (val & 0x1f); value->value.integer.value[1] = 0x1f - ((val >> 8) & 0x1f); return 0; } static int em28xx_vol_put_mute(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *value) { struct em28xx *dev = snd_kcontrol_chip(kcontrol); u16 val = value->value.integer.value[0]; struct snd_pcm_substream *substream = dev->adev.capture_pcm_substream; int nonblock = 0; int rc; if (dev->disconnected) return -ENODEV; if (substream) nonblock = !!(substream->f_flags & O_NONBLOCK); if (nonblock) { if (!mutex_trylock(&dev->lock)) return -EAGAIN; } else { mutex_lock(&dev->lock); } rc = em28xx_read_ac97(dev, kcontrol->private_value); if (rc < 0) goto err; if (val) rc &= 0x1f1f; else rc |= 0x8000; rc = em28xx_write_ac97(dev, kcontrol->private_value, rc); if (rc < 0) goto err; dprintk("%sleft vol %d, right vol %d (0x%04x) to ac97 volume control 0x%04x\n", (val & 0x8000) ? "muted " : "", 0x1f - ((val >> 8) & 0x1f), 0x1f - (val & 0x1f), val, (int)kcontrol->private_value); err: mutex_unlock(&dev->lock); return rc; } static int em28xx_vol_get_mute(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *value) { struct em28xx *dev = snd_kcontrol_chip(kcontrol); struct snd_pcm_substream *substream = dev->adev.capture_pcm_substream; int nonblock = 0; int val; if (dev->disconnected) return -ENODEV; if (substream) nonblock = !!(substream->f_flags & O_NONBLOCK); if (nonblock) { if (!mutex_trylock(&dev->lock)) return -EAGAIN; } else { mutex_lock(&dev->lock); } val = em28xx_read_ac97(dev, kcontrol->private_value); mutex_unlock(&dev->lock); if (val < 0) return val; if (val & 0x8000) value->value.integer.value[0] = 0; else value->value.integer.value[0] = 1; dprintk("%sleft vol %d, right vol %d (0x%04x) from ac97 volume control 0x%04x\n", (val & 0x8000) ? "muted " : "", 0x1f - ((val >> 8) & 0x1f), 0x1f - (val & 0x1f), val, (int)kcontrol->private_value); return 0; } static const DECLARE_TLV_DB_SCALE(em28xx_db_scale, -3450, 150, 0); static int em28xx_cvol_new(struct snd_card *card, struct em28xx *dev, char *name, int id) { int err; char ctl_name[44]; struct snd_kcontrol *kctl; struct snd_kcontrol_new tmp; memset(&tmp, 0, sizeof(tmp)); tmp.iface = SNDRV_CTL_ELEM_IFACE_MIXER; tmp.private_value = id; tmp.name = ctl_name; /* Add Mute Control */ sprintf(ctl_name, "%s Switch", name); tmp.get = em28xx_vol_get_mute; tmp.put = em28xx_vol_put_mute; tmp.info = snd_ctl_boolean_mono_info; kctl = snd_ctl_new1(&tmp, dev); err = snd_ctl_add(card, kctl); if (err < 0) return err; dprintk("Added control %s for ac97 volume control 0x%04x\n", ctl_name, id); memset(&tmp, 0, sizeof(tmp)); tmp.iface = SNDRV_CTL_ELEM_IFACE_MIXER; tmp.private_value = id; tmp.name = ctl_name; /* Add Volume Control */ sprintf(ctl_name, "%s Volume", name); tmp.get = em28xx_vol_get; tmp.put = em28xx_vol_put; tmp.info = em28xx_vol_info; tmp.tlv.p = em28xx_db_scale; kctl = snd_ctl_new1(&tmp, dev); err = snd_ctl_add(card, kctl); if (err < 0) return err; dprintk("Added control %s for ac97 volume control 0x%04x\n", ctl_name, id); return 0; } /* * register/unregister code and data */ static const struct snd_pcm_ops snd_em28xx_pcm_capture = { .open = snd_em28xx_capture_open, .close = snd_em28xx_pcm_close, .prepare = snd_em28xx_prepare, .trigger = snd_em28xx_capture_trigger, .pointer = snd_em28xx_capture_pointer, }; static void em28xx_audio_free_urb(struct em28xx *dev) { struct usb_device *udev = interface_to_usbdev(dev->intf); int i; for (i = 0; i < dev->adev.num_urb; i++) { struct urb *urb = dev->adev.urb[i]; if (!urb) continue; usb_free_coherent(udev, urb->transfer_buffer_length, dev->adev.transfer_buffer[i], urb->transfer_dma); usb_free_urb(urb); } kfree(dev->adev.urb); kfree(dev->adev.transfer_buffer); dev->adev.num_urb = 0; } /* high bandwidth multiplier, as encoded in highspeed endpoint descriptors */ static int em28xx_audio_ep_packet_size(struct usb_device *udev, struct usb_endpoint_descriptor *e) { int size = le16_to_cpu(e->wMaxPacketSize); if (udev->speed == USB_SPEED_HIGH) return (size & 0x7ff) * (1 + (((size) >> 11) & 0x03)); return size & 0x7ff; } static int em28xx_audio_urb_init(struct em28xx *dev) { struct usb_interface *intf; struct usb_endpoint_descriptor *e, *ep = NULL; struct usb_device *udev = interface_to_usbdev(dev->intf); int i, ep_size, interval, num_urb, npackets; int urb_size, bytes_per_transfer; u8 alt; if (dev->ifnum) alt = 1; else alt = 7; intf = usb_ifnum_to_if(udev, dev->ifnum); if (intf->num_altsetting <= alt) { dev_err(&dev->intf->dev, "alt %d doesn't exist on interface %d\n", dev->ifnum, alt); return -ENODEV; } for (i = 0; i < intf->altsetting[alt].desc.bNumEndpoints; i++) { e = &intf->altsetting[alt].endpoint[i].desc; if (!usb_endpoint_dir_in(e)) continue; if (e->bEndpointAddress == EM28XX_EP_AUDIO) { ep = e; break; } } if (!ep) { dev_err(&dev->intf->dev, "Couldn't find an audio endpoint"); return -ENODEV; } ep_size = em28xx_audio_ep_packet_size(udev, ep); interval = 1 << (ep->bInterval - 1); dev_info(&dev->intf->dev, "Endpoint 0x%02x %s on intf %d alt %d interval = %d, size %d\n", EM28XX_EP_AUDIO, usb_speed_string(udev->speed), dev->ifnum, alt, interval, ep_size); /* Calculate the number and size of URBs to better fit the audio samples */ /* * Estimate the number of bytes per DMA transfer. * * This is given by the bit rate (for now, only 48000 Hz) multiplied * by 2 channels and 2 bytes/sample divided by the number of microframe * intervals and by the microframe rate (125 us) */ bytes_per_transfer = DIV_ROUND_UP(48000 * 2 * 2, 125 * interval); /* * Estimate the number of transfer URBs. Don't let it go past the * maximum number of URBs that is known to be supported by the device. */ num_urb = DIV_ROUND_UP(bytes_per_transfer, ep_size); if (num_urb > EM28XX_MAX_AUDIO_BUFS) num_urb = EM28XX_MAX_AUDIO_BUFS; /* * Now that we know the number of bytes per transfer and the number of * URBs, estimate the typical size of an URB, in order to adjust the * minimal number of packets. */ urb_size = bytes_per_transfer / num_urb; /* * Now, calculate the amount of audio packets to be filled on each * URB. In order to preserve the old behaviour, use a minimal * threshold for this value. */ npackets = EM28XX_MIN_AUDIO_PACKETS; if (urb_size > ep_size * npackets) npackets = DIV_ROUND_UP(urb_size, ep_size); dev_info(&dev->intf->dev, "Number of URBs: %d, with %d packets and %d size\n", num_urb, npackets, urb_size); /* Estimate the bytes per period */ dev->adev.period = urb_size * npackets; /* Allocate space to store the number of URBs to be used */ dev->adev.transfer_buffer = kcalloc(num_urb, sizeof(*dev->adev.transfer_buffer), GFP_KERNEL); if (!dev->adev.transfer_buffer) return -ENOMEM; dev->adev.urb = kcalloc(num_urb, sizeof(*dev->adev.urb), GFP_KERNEL); if (!dev->adev.urb) { kfree(dev->adev.transfer_buffer); return -ENOMEM; } /* Alloc memory for each URB and for each transfer buffer */ dev->adev.num_urb = num_urb; for (i = 0; i < num_urb; i++) { struct urb *urb; int j, k; void *buf; urb = usb_alloc_urb(npackets, GFP_KERNEL); if (!urb) { em28xx_audio_free_urb(dev); return -ENOMEM; } dev->adev.urb[i] = urb; buf = usb_alloc_coherent(udev, npackets * ep_size, GFP_KERNEL, &urb->transfer_dma); if (!buf) { dev_err(&dev->intf->dev, "usb_alloc_coherent failed!\n"); em28xx_audio_free_urb(dev); return -ENOMEM; } dev->adev.transfer_buffer[i] = buf; urb->dev = udev; urb->context = dev; urb->pipe = usb_rcvisocpipe(udev, EM28XX_EP_AUDIO); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; urb->transfer_buffer = buf; urb->interval = interval; urb->complete = em28xx_audio_isocirq; urb->number_of_packets = npackets; urb->transfer_buffer_length = ep_size * npackets; for (j = k = 0; j < npackets; j++, k += ep_size) { urb->iso_frame_desc[j].offset = k; urb->iso_frame_desc[j].length = ep_size; } } return 0; } static int em28xx_audio_init(struct em28xx *dev) { struct em28xx_audio *adev = &dev->adev; struct usb_device *udev = interface_to_usbdev(dev->intf); struct snd_pcm *pcm; struct snd_card *card; static int devnr; int err; if (dev->usb_audio_type != EM28XX_USB_AUDIO_VENDOR) { /* * This device does not support the extension (in this case * the device is expecting the snd-usb-audio module or * doesn't have analog audio support at all) */ return 0; } dev_info(&dev->intf->dev, "Binding audio extension\n"); kref_get(&dev->ref); dev_info(&dev->intf->dev, "em28xx-audio.c: Copyright (C) 2006 Markus Rechberger\n"); dev_info(&dev->intf->dev, "em28xx-audio.c: Copyright (C) 2007-2016 Mauro Carvalho Chehab\n"); err = snd_card_new(&dev->intf->dev, index[devnr], "Em28xx Audio", THIS_MODULE, 0, &card); if (err < 0) return err; spin_lock_init(&adev->slock); adev->sndcard = card; adev->udev = udev; err = snd_pcm_new(card, "Em28xx Audio", 0, 0, 1, &pcm); if (err < 0) goto card_free; snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE, &snd_em28xx_pcm_capture); snd_pcm_set_managed_buffer_all(pcm, SNDRV_DMA_TYPE_VMALLOC, NULL, 0, 0); pcm->info_flags = 0; pcm->private_data = dev; strscpy(pcm->name, "Empia 28xx Capture", sizeof(pcm->name)); strscpy(card->driver, "Em28xx-Audio", sizeof(card->driver)); strscpy(card->shortname, "Em28xx Audio", sizeof(card->shortname)); strscpy(card->longname, "Empia Em28xx Audio", sizeof(card->longname)); INIT_WORK(&adev->wq_trigger, audio_trigger); if (dev->audio_mode.ac97 != EM28XX_NO_AC97) { em28xx_cvol_new(card, dev, "Video", AC97_VIDEO); em28xx_cvol_new(card, dev, "Line In", AC97_LINE); em28xx_cvol_new(card, dev, "Phone", AC97_PHONE); em28xx_cvol_new(card, dev, "Microphone", AC97_MIC); em28xx_cvol_new(card, dev, "CD", AC97_CD); em28xx_cvol_new(card, dev, "AUX", AC97_AUX); em28xx_cvol_new(card, dev, "PCM", AC97_PCM); em28xx_cvol_new(card, dev, "Master", AC97_MASTER); em28xx_cvol_new(card, dev, "Line", AC97_HEADPHONE); em28xx_cvol_new(card, dev, "Mono", AC97_MASTER_MONO); em28xx_cvol_new(card, dev, "LFE", AC97_CENTER_LFE_MASTER); em28xx_cvol_new(card, dev, "Surround", AC97_SURROUND_MASTER); } err = em28xx_audio_urb_init(dev); if (err) goto card_free; err = snd_card_register(card); if (err < 0) goto urb_free; dev_info(&dev->intf->dev, "Audio extension successfully initialized\n"); return 0; urb_free: em28xx_audio_free_urb(dev); card_free: snd_card_free(card); adev->sndcard = NULL; return err; } static int em28xx_audio_fini(struct em28xx *dev) { if (!dev) return 0; if (dev->usb_audio_type != EM28XX_USB_AUDIO_VENDOR) { /* * This device does not support the extension (in this case * the device is expecting the snd-usb-audio module or * doesn't have analog audio support at all) */ return 0; } dev_info(&dev->intf->dev, "Closing audio extension\n"); if (dev->adev.sndcard) { snd_card_disconnect(dev->adev.sndcard); flush_work(&dev->adev.wq_trigger); em28xx_audio_free_urb(dev); snd_card_free(dev->adev.sndcard); dev->adev.sndcard = NULL; } kref_put(&dev->ref, em28xx_free_device); return 0; } static int em28xx_audio_suspend(struct em28xx *dev) { if (!dev) return 0; if (dev->usb_audio_type != EM28XX_USB_AUDIO_VENDOR) return 0; dev_info(&dev->intf->dev, "Suspending audio extension\n"); em28xx_deinit_isoc_audio(dev); atomic_set(&dev->adev.stream_started, 0); return 0; } static int em28xx_audio_resume(struct em28xx *dev) { if (!dev) return 0; if (dev->usb_audio_type != EM28XX_USB_AUDIO_VENDOR) return 0; dev_info(&dev->intf->dev, "Resuming audio extension\n"); /* Nothing to do other than schedule_work() ?? */ schedule_work(&dev->adev.wq_trigger); return 0; } static struct em28xx_ops audio_ops = { .id = EM28XX_AUDIO, .name = "Em28xx Audio Extension", .init = em28xx_audio_init, .fini = em28xx_audio_fini, .suspend = em28xx_audio_suspend, .resume = em28xx_audio_resume, }; static int __init em28xx_alsa_register(void) { return em28xx_register_extension(&audio_ops); } static void __exit em28xx_alsa_unregister(void) { em28xx_unregister_extension(&audio_ops); } MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Markus Rechberger <mrechberger@gmail.com>"); MODULE_AUTHOR("Mauro Carvalho Chehab"); MODULE_DESCRIPTION(DRIVER_DESC " - audio interface"); MODULE_VERSION(EM28XX_VERSION); module_init(em28xx_alsa_register); module_exit(em28xx_alsa_unregister);
2 26 25 20 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/userfaultfd_k.h * * Copyright (C) 2015 Red Hat, Inc. * */ #ifndef _LINUX_USERFAULTFD_K_H #define _LINUX_USERFAULTFD_K_H #ifdef CONFIG_USERFAULTFD #include <linux/userfaultfd.h> /* linux/include/uapi/linux/userfaultfd.h */ #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/swapops.h> #include <asm-generic/pgtable_uffd.h> #include <linux/hugetlb_inline.h> /* The set of all possible UFFD-related VM flags. */ #define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR) /* * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining * new flags, since they might collide with O_* ones. We want * to re-use O_* flags that couldn't possibly have a meaning * from userfaultfd, in order to leave a free define-space for * shared O_* flags. */ #define UFFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) /* * Start with fault_pending_wqh and fault_wqh so they're more likely * to be in the same cacheline. * * Locking order: * fd_wqh.lock * fault_pending_wqh.lock * fault_wqh.lock * event_wqh.lock * * To avoid deadlocks, IRQs must be disabled when taking any of the above locks, * since fd_wqh.lock is taken by aio_poll() while it's holding a lock that's * also taken in IRQ context. */ struct userfaultfd_ctx { /* waitqueue head for the pending (i.e. not read) userfaults */ wait_queue_head_t fault_pending_wqh; /* waitqueue head for the userfaults */ wait_queue_head_t fault_wqh; /* waitqueue head for the pseudo fd to wakeup poll/read */ wait_queue_head_t fd_wqh; /* waitqueue head for events */ wait_queue_head_t event_wqh; /* a refile sequence protected by fault_pending_wqh lock */ seqcount_spinlock_t refile_seq; /* pseudo fd refcounting */ refcount_t refcount; /* userfaultfd syscall flags */ unsigned int flags; /* features requested from the userspace */ unsigned int features; /* released */ bool released; /* * Prevents userfaultfd operations (fill/move/wp) from happening while * some non-cooperative event(s) is taking place. Increments are done * in write-mode. Whereas, userfaultfd operations, which includes * reading mmap_changing, is done under read-mode. */ struct rw_semaphore map_changing_lock; /* memory mappings are changing because of non-cooperative event */ atomic_t mmap_changing; /* mm with one ore more vmas attached to this userfaultfd_ctx */ struct mm_struct *mm; }; extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); /* A combined operation mode + behavior flags. */ typedef unsigned int __bitwise uffd_flags_t; /* Mutually exclusive modes of operation. */ enum mfill_atomic_mode { MFILL_ATOMIC_COPY, MFILL_ATOMIC_ZEROPAGE, MFILL_ATOMIC_CONTINUE, MFILL_ATOMIC_POISON, NR_MFILL_ATOMIC_MODES, }; #define MFILL_ATOMIC_MODE_BITS (const_ilog2(NR_MFILL_ATOMIC_MODES - 1) + 1) #define MFILL_ATOMIC_BIT(nr) BIT(MFILL_ATOMIC_MODE_BITS + (nr)) #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) #define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) { return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); } static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) { flags &= ~MFILL_ATOMIC_MODE_MASK; return flags | ((__force uffd_flags_t) mode); } /* Flags controlling behavior. These behavior changes are mode-independent. */ #define MFILL_ATOMIC_WP MFILL_ATOMIC_FLAG(0) extern int mfill_atomic_install_pte(pmd_t *dst_pmd, struct vm_area_struct *dst_vma, unsigned long dst_addr, struct page *page, bool newly_allocated, uffd_flags_t flags); extern ssize_t mfill_atomic_copy(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, uffd_flags_t flags); extern ssize_t mfill_atomic_zeropage(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long len); extern ssize_t mfill_atomic_continue(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long len, uffd_flags_t flags); extern ssize_t mfill_atomic_poison(struct userfaultfd_ctx *ctx, unsigned long start, unsigned long len, uffd_flags_t flags); extern int mwriteprotect_range(struct userfaultfd_ctx *ctx, unsigned long start, unsigned long len, bool enable_wp); extern long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp); /* move_pages */ void double_pt_lock(spinlock_t *ptl1, spinlock_t *ptl2); void double_pt_unlock(spinlock_t *ptl1, spinlock_t *ptl2); ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, unsigned long src_start, unsigned long len, __u64 flags); int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pmd_t dst_pmdval, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, unsigned long dst_addr, unsigned long src_addr); /* mm helpers */ static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx; } /* * Never enable huge pmd sharing on some uffd registered vmas: * * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry. * * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for * VMAs which share huge pmds. (If you have two mappings to the same * underlying pages, and fault in the non-UFFD-registered one with a write, * with huge pmd sharing this would *also* setup the second UFFD-registered * mapping, and we'd not get minor faults.) */ static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); } /* * Don't do fault around for either WP or MINOR registered uffd range. For * MINOR registered range, fault around will be a total disaster and ptes can * be installed without notifications; for WP it should mostly be fine as long * as the fault around checks for pte_none() before the installation, however * to be super safe we just forbid it. */ static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) { return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR); } static inline bool userfaultfd_missing(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_MISSING; } static inline bool userfaultfd_wp(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_WP; } static inline bool userfaultfd_minor(struct vm_area_struct *vma) { return vma->vm_flags & VM_UFFD_MINOR; } static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { return userfaultfd_wp(vma) && pte_uffd_wp(pte); } static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, pmd_t pmd) { return userfaultfd_wp(vma) && pmd_uffd_wp(pmd); } static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return vma->vm_flags & __VM_UFFD_FLAGS; } static inline bool vma_can_userfault(struct vm_area_struct *vma, vm_flags_t vm_flags, bool wp_async) { vm_flags &= __VM_UFFD_FLAGS; if (vma->vm_flags & VM_DROPPABLE) return false; if ((vm_flags & VM_UFFD_MINOR) && (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) return false; /* * If wp async enabled, and WP is the only mode enabled, allow any * memory type. */ if (wp_async && (vm_flags == VM_UFFD_WP)) return true; #ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for * uffd-wp, then shmem & hugetlbfs are not supported but only * anonymous. */ if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; #endif /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || vma_is_shmem(vma); } static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0; } extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *); extern void dup_userfaultfd_complete(struct list_head *); void dup_userfaultfd_fail(struct list_head *); extern void mremap_userfaultfd_prep(struct vm_area_struct *, struct vm_userfaultfd_ctx *); extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *, unsigned long from, unsigned long to, unsigned long len); void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *); extern bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *uf); extern void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf); extern bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma); extern bool userfaultfd_wp_async(struct vm_area_struct *vma); void userfaultfd_reset_ctx(struct vm_area_struct *vma); struct vm_area_struct *userfaultfd_clear_vma(struct vma_iterator *vmi, struct vm_area_struct *prev, struct vm_area_struct *vma, unsigned long start, unsigned long end); int userfaultfd_register_range(struct userfaultfd_ctx *ctx, struct vm_area_struct *vma, vm_flags_t vm_flags, unsigned long start, unsigned long end, bool wp_async); void userfaultfd_release_new(struct userfaultfd_ctx *ctx); void userfaultfd_release_all(struct mm_struct *mm, struct userfaultfd_ctx *ctx); #else /* CONFIG_USERFAULTFD */ /* mm helpers */ static inline vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) { return VM_FAULT_SIGBUS; } static inline long uffd_wp_range(struct vm_area_struct *vma, unsigned long start, unsigned long len, bool enable_wp) { return false; } static inline bool is_mergeable_vm_userfaultfd_ctx(struct vm_area_struct *vma, struct vm_userfaultfd_ctx vm_ctx) { return true; } static inline bool userfaultfd_missing(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_wp(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_minor(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma, pte_t pte) { return false; } static inline bool userfaultfd_huge_pmd_wp(struct vm_area_struct *vma, pmd_t pmd) { return false; } static inline bool userfaultfd_armed(struct vm_area_struct *vma) { return false; } static inline int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *l) { return 0; } static inline void dup_userfaultfd_complete(struct list_head *l) { } static inline void dup_userfaultfd_fail(struct list_head *l) { } static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma, struct vm_userfaultfd_ctx *ctx) { } static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx, unsigned long from, unsigned long to, unsigned long len) { } static inline void mremap_userfaultfd_fail(struct vm_userfaultfd_ctx *ctx) { } static inline bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { return true; } static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start, unsigned long end, struct list_head *uf) { return 0; } static inline void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) { } static inline bool uffd_disable_fault_around(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_wp_unpopulated(struct vm_area_struct *vma) { return false; } static inline bool userfaultfd_wp_async(struct vm_area_struct *vma) { return false; } static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) { return false; } #endif /* CONFIG_USERFAULTFD */ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) { /* Only wr-protect mode uses pte markers */ if (!userfaultfd_wp(vma)) return false; /* File-based uffd-wp always need markers */ if (!vma_is_anonymous(vma)) return true; /* * Anonymous uffd-wp only needs the markers if WP_UNPOPULATED * enabled (to apply markers on zero pages). */ return userfaultfd_wp_unpopulated(vma); } static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) { #ifdef CONFIG_PTE_MARKER_UFFD_WP return is_pte_marker_entry(entry) && (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); #else return false; #endif } static inline bool pte_marker_uffd_wp(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP swp_entry_t entry; if (!is_swap_pte(pte)) return false; entry = pte_to_swp_entry(pte); return pte_marker_entry_uffd_wp(entry); #else return false; #endif } /* * Returns true if this is a swap pte and was uffd-wp wr-protected in either * forms (pte marker or a normal swap pte), false otherwise. */ static inline bool pte_swp_uffd_wp_any(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP if (!is_swap_pte(pte)) return false; if (pte_swp_uffd_wp(pte)) return true; if (pte_marker_uffd_wp(pte)) return true; #endif return false; } #endif /* _LINUX_USERFAULTFD_K_H */
7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 /* * llc_pdu.c - access to PDU internals * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <linux/netdevice.h> #include <net/llc_pdu.h> static void llc_pdu_decode_pdu_type(struct sk_buff *skb, u8 *type); static u8 llc_pdu_get_pf_bit(struct llc_pdu_sn *pdu); void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 pdu_type) { llc_pdu_un_hdr(skb)->ssap |= pdu_type; } /** * llc_pdu_set_pf_bit - sets poll/final bit in LLC header * @skb: Frame to set bit in * @bit_value: poll/final bit (0 or 1). * * This function sets poll/final bit in LLC header (based on type of PDU). * in I or S pdus, p/f bit is right bit of fourth byte in header. in U * pdus p/f bit is fifth bit of third byte. */ void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value) { u8 pdu_type; struct llc_pdu_sn *pdu; llc_pdu_decode_pdu_type(skb, &pdu_type); pdu = llc_pdu_sn_hdr(skb); switch (pdu_type) { case LLC_PDU_TYPE_I: case LLC_PDU_TYPE_S: pdu->ctrl_2 = (pdu->ctrl_2 & 0xFE) | bit_value; break; case LLC_PDU_TYPE_U: pdu->ctrl_1 |= (pdu->ctrl_1 & 0xEF) | (bit_value << 4); break; } } /** * llc_pdu_decode_pf_bit - extracs poll/final bit from LLC header * @skb: input skb that p/f bit must be extracted from it * @pf_bit: poll/final bit (0 or 1) * * This function extracts poll/final bit from LLC header (based on type of * PDU). In I or S pdus, p/f bit is right bit of fourth byte in header. In * U pdus p/f bit is fifth bit of third byte. */ void llc_pdu_decode_pf_bit(struct sk_buff *skb, u8 *pf_bit) { u8 pdu_type; struct llc_pdu_sn *pdu; llc_pdu_decode_pdu_type(skb, &pdu_type); pdu = llc_pdu_sn_hdr(skb); switch (pdu_type) { case LLC_PDU_TYPE_I: case LLC_PDU_TYPE_S: *pf_bit = pdu->ctrl_2 & LLC_S_PF_BIT_MASK; break; case LLC_PDU_TYPE_U: *pf_bit = (pdu->ctrl_1 & LLC_U_PF_BIT_MASK) >> 4; break; } } /** * llc_pdu_init_as_disc_cmd - Builds DISC PDU * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU * * Builds a pdu frame as a DISC command. */ void llc_pdu_init_as_disc_cmd(struct sk_buff *skb, u8 p_bit) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_2_PDU_CMD_DISC; pdu->ctrl_1 |= ((p_bit & 1) << 4) & LLC_U_PF_BIT_MASK; } /** * llc_pdu_init_as_i_cmd - builds I pdu * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU * @ns: The sequence number of the data PDU * @nr: The seq. number of the expected I PDU from the remote * * Builds a pdu frame as an I command. */ void llc_pdu_init_as_i_cmd(struct sk_buff *skb, u8 p_bit, u8 ns, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_I; pdu->ctrl_2 = 0; pdu->ctrl_2 |= (p_bit & LLC_I_PF_BIT_MASK); /* p/f bit */ pdu->ctrl_1 |= (ns << 1) & 0xFE; /* set N(S) in bits 2..8 */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_rej_cmd - builds REJ PDU * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU * @nr: The seq. number of the expected I PDU from the remote * * Builds a pdu frame as a REJ command. */ void llc_pdu_init_as_rej_cmd(struct sk_buff *skb, u8 p_bit, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_S; pdu->ctrl_1 |= LLC_2_PDU_CMD_REJ; pdu->ctrl_2 = 0; pdu->ctrl_2 |= p_bit & LLC_S_PF_BIT_MASK; pdu->ctrl_1 &= 0x0F; /* setting bits 5..8 to zero(reserved) */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_rnr_cmd - builds RNR pdu * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU * @nr: The seq. number of the expected I PDU from the remote * * Builds a pdu frame as an RNR command. */ void llc_pdu_init_as_rnr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_S; pdu->ctrl_1 |= LLC_2_PDU_CMD_RNR; pdu->ctrl_2 = 0; pdu->ctrl_2 |= p_bit & LLC_S_PF_BIT_MASK; pdu->ctrl_1 &= 0x0F; /* setting bits 5..8 to zero(reserved) */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_rr_cmd - Builds RR pdu * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU * @nr: The seq. number of the expected I PDU from the remote * * Builds a pdu frame as an RR command. */ void llc_pdu_init_as_rr_cmd(struct sk_buff *skb, u8 p_bit, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_S; pdu->ctrl_1 |= LLC_2_PDU_CMD_RR; pdu->ctrl_2 = p_bit & LLC_S_PF_BIT_MASK; pdu->ctrl_1 &= 0x0F; /* setting bits 5..8 to zero(reserved) */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_sabme_cmd - builds SABME pdu * @skb: Address of the skb to build * @p_bit: The P bit to set in the PDU * * Builds a pdu frame as an SABME command. */ void llc_pdu_init_as_sabme_cmd(struct sk_buff *skb, u8 p_bit) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_2_PDU_CMD_SABME; pdu->ctrl_1 |= ((p_bit & 1) << 4) & LLC_U_PF_BIT_MASK; } /** * llc_pdu_init_as_dm_rsp - builds DM response pdu * @skb: Address of the skb to build * @f_bit: The F bit to set in the PDU * * Builds a pdu frame as a DM response. */ void llc_pdu_init_as_dm_rsp(struct sk_buff *skb, u8 f_bit) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_2_PDU_RSP_DM; pdu->ctrl_1 |= ((f_bit & 1) << 4) & LLC_U_PF_BIT_MASK; } /** * llc_pdu_init_as_frmr_rsp - builds FRMR response PDU * @skb: Address of the frame to build * @prev_pdu: The rejected PDU frame * @f_bit: The F bit to set in the PDU * @vs: tx state vari value for the data link conn at the rejecting LLC * @vr: rx state var value for the data link conn at the rejecting LLC * @vzyxw: completely described in the IEEE Std 802.2 document (Pg 55) * * Builds a pdu frame as a FRMR response. */ void llc_pdu_init_as_frmr_rsp(struct sk_buff *skb, struct llc_pdu_sn *prev_pdu, u8 f_bit, u8 vs, u8 vr, u8 vzyxw) { struct llc_frmr_info *frmr_info; u8 prev_pf = 0; u8 *ctrl; struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_2_PDU_RSP_FRMR; pdu->ctrl_1 |= ((f_bit & 1) << 4) & LLC_U_PF_BIT_MASK; frmr_info = (struct llc_frmr_info *)&pdu->ctrl_2; ctrl = (u8 *)&prev_pdu->ctrl_1; FRMR_INFO_SET_REJ_CNTRL(frmr_info,ctrl); FRMR_INFO_SET_Vs(frmr_info, vs); FRMR_INFO_SET_Vr(frmr_info, vr); prev_pf = llc_pdu_get_pf_bit(prev_pdu); FRMR_INFO_SET_C_R_BIT(frmr_info, prev_pf); FRMR_INFO_SET_INVALID_PDU_CTRL_IND(frmr_info, vzyxw); FRMR_INFO_SET_INVALID_PDU_INFO_IND(frmr_info, vzyxw); FRMR_INFO_SET_PDU_INFO_2LONG_IND(frmr_info, vzyxw); FRMR_INFO_SET_PDU_INVALID_Nr_IND(frmr_info, vzyxw); FRMR_INFO_SET_PDU_INVALID_Ns_IND(frmr_info, vzyxw); skb_put(skb, sizeof(struct llc_frmr_info)); } /** * llc_pdu_init_as_rr_rsp - builds RR response pdu * @skb: Address of the skb to build * @f_bit: The F bit to set in the PDU * @nr: The seq. number of the expected data PDU from the remote * * Builds a pdu frame as an RR response. */ void llc_pdu_init_as_rr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_S; pdu->ctrl_1 |= LLC_2_PDU_RSP_RR; pdu->ctrl_2 = 0; pdu->ctrl_2 |= f_bit & LLC_S_PF_BIT_MASK; pdu->ctrl_1 &= 0x0F; /* setting bits 5..8 to zero(reserved) */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_rej_rsp - builds REJ response pdu * @skb: Address of the skb to build * @f_bit: The F bit to set in the PDU * @nr: The seq. number of the expected data PDU from the remote * * Builds a pdu frame as a REJ response. */ void llc_pdu_init_as_rej_rsp(struct sk_buff *skb, u8 f_bit, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_S; pdu->ctrl_1 |= LLC_2_PDU_RSP_REJ; pdu->ctrl_2 = 0; pdu->ctrl_2 |= f_bit & LLC_S_PF_BIT_MASK; pdu->ctrl_1 &= 0x0F; /* setting bits 5..8 to zero(reserved) */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_rnr_rsp - builds RNR response pdu * @skb: Address of the frame to build * @f_bit: The F bit to set in the PDU * @nr: The seq. number of the expected data PDU from the remote * * Builds a pdu frame as an RNR response. */ void llc_pdu_init_as_rnr_rsp(struct sk_buff *skb, u8 f_bit, u8 nr) { struct llc_pdu_sn *pdu = llc_pdu_sn_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_S; pdu->ctrl_1 |= LLC_2_PDU_RSP_RNR; pdu->ctrl_2 = 0; pdu->ctrl_2 |= f_bit & LLC_S_PF_BIT_MASK; pdu->ctrl_1 &= 0x0F; /* setting bits 5..8 to zero(reserved) */ pdu->ctrl_2 |= (nr << 1) & 0xFE; /* set N(R) in bits 10..16 */ } /** * llc_pdu_init_as_ua_rsp - builds UA response pdu * @skb: Address of the frame to build * @f_bit: The F bit to set in the PDU * * Builds a pdu frame as a UA response. */ void llc_pdu_init_as_ua_rsp(struct sk_buff *skb, u8 f_bit) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); pdu->ctrl_1 = LLC_PDU_TYPE_U; pdu->ctrl_1 |= LLC_2_PDU_RSP_UA; pdu->ctrl_1 |= ((f_bit & 1) << 4) & LLC_U_PF_BIT_MASK; } /** * llc_pdu_decode_pdu_type - designates PDU type * @skb: input skb that type of it must be designated. * @type: type of PDU (output argument). * * This function designates type of PDU (I, S or U). */ static void llc_pdu_decode_pdu_type(struct sk_buff *skb, u8 *type) { struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); if (pdu->ctrl_1 & 1) { if ((pdu->ctrl_1 & LLC_PDU_TYPE_U) == LLC_PDU_TYPE_U) *type = LLC_PDU_TYPE_U; else *type = LLC_PDU_TYPE_S; } else *type = LLC_PDU_TYPE_I; } /** * llc_pdu_get_pf_bit - extracts p/f bit of input PDU * @pdu: pointer to LLC header. * * This function extracts p/f bit of input PDU. at first examines type of * PDU and then extracts p/f bit. Returns the p/f bit. */ static u8 llc_pdu_get_pf_bit(struct llc_pdu_sn *pdu) { u8 pdu_type; u8 pf_bit = 0; if (pdu->ctrl_1 & 1) { if ((pdu->ctrl_1 & LLC_PDU_TYPE_U) == LLC_PDU_TYPE_U) pdu_type = LLC_PDU_TYPE_U; else pdu_type = LLC_PDU_TYPE_S; } else pdu_type = LLC_PDU_TYPE_I; switch (pdu_type) { case LLC_PDU_TYPE_I: case LLC_PDU_TYPE_S: pf_bit = pdu->ctrl_2 & LLC_S_PF_BIT_MASK; break; case LLC_PDU_TYPE_U: pf_bit = (pdu->ctrl_1 & LLC_U_PF_BIT_MASK) >> 4; break; } return pf_bit; }
41 1 52 52 1 51 51 53 52 1 1 32 33 33 1 1 1 33 33 1 1 1 1 1 1 1 1 9 1 1 1 6 2 4 2 1 1 1 6 6 38 28 28 8 7 1 35 58 3 58 8 16 3 13 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 /* * net/tipc/bcast.c: TIPC broadcast code * * Copyright (c) 2004-2006, 2014-2017, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <linux/tipc_config.h> #include "socket.h" #include "msg.h" #include "bcast.h" #include "link.h" #include "name_table.h" #define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ #define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; unsigned long sysctl_tipc_bc_retruni __read_mostly; /** * struct tipc_bc_base - base structure for keeping broadcast send state * @link: broadcast send link structure * @inputq: data input queue; will only carry SOCK_WAKEUP messages * @dests: array keeping number of reachable destinations per bearer * @primary_bearer: a bearer having links to all broadcast destinations, if any * @bcast_support: indicates if primary bearer, if any, supports broadcast * @force_bcast: forces broadcast for multicast traffic * @rcast_support: indicates if all peer nodes support replicast * @force_rcast: forces replicast for multicast traffic * @rc_ratio: dest count as percentage of cluster size where send method changes * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast */ struct tipc_bc_base { struct tipc_link *link; struct sk_buff_head inputq; int dests[MAX_BEARERS]; int primary_bearer; bool bcast_support; bool force_bcast; bool rcast_support; bool force_rcast; int rc_ratio; int bc_threshold; }; static struct tipc_bc_base *tipc_bc_base(struct net *net) { return tipc_net(net)->bcbase; } /* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link * Note: the MTU is decremented to give room for a tunnel header, in * case the message needs to be sent as replicast */ int tipc_bcast_get_mtu(struct net *net) { return tipc_link_mss(tipc_bc_sndlink(net)); } void tipc_bcast_toggle_rcast(struct net *net, bool supp) { tipc_bc_base(net)->rcast_support = supp; } static void tipc_bcbase_calc_bc_threshold(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); int cluster_size = tipc_link_bc_peers(tipc_bc_sndlink(net)); bb->bc_threshold = 1 + (cluster_size * bb->rc_ratio / 100); } /* tipc_bcbase_select_primary(): find a bearer with links to all destinations, * if any, and make it primary bearer */ static void tipc_bcbase_select_primary(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); int all_dests = tipc_link_bc_peers(bb->link); int max_win = tipc_link_max_win(bb->link); int min_win = tipc_link_min_win(bb->link); int i, mtu, prim; bb->primary_bearer = INVALID_BEARER_ID; bb->bcast_support = true; if (!all_dests) return; for (i = 0; i < MAX_BEARERS; i++) { if (!bb->dests[i]) continue; mtu = tipc_bearer_mtu(net, i); if (mtu < tipc_link_mtu(bb->link)) { tipc_link_set_mtu(bb->link, mtu); tipc_link_set_queue_limits(bb->link, min_win, max_win); } bb->bcast_support &= tipc_bearer_bcast_support(net, i); if (bb->dests[i] < all_dests) continue; bb->primary_bearer = i; /* Reduce risk that all nodes select same primary */ if ((i ^ tipc_own_addr(net)) & 1) break; } prim = bb->primary_bearer; if (prim != INVALID_BEARER_ID) bb->bcast_support = tipc_bearer_bcast_support(net, prim); } void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id) { struct tipc_bc_base *bb = tipc_bc_base(net); tipc_bcast_lock(net); bb->dests[bearer_id]++; tipc_bcbase_select_primary(net); tipc_bcast_unlock(net); } void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id) { struct tipc_bc_base *bb = tipc_bc_base(net); tipc_bcast_lock(net); bb->dests[bearer_id]--; tipc_bcbase_select_primary(net); tipc_bcast_unlock(net); } /* tipc_bcbase_xmit - broadcast a packet queue across one or more bearers * * Note that number of reachable destinations, as indicated in the dests[] * array, may transitionally differ from the number of destinations indicated * in each sent buffer. We can sustain this. Excess destination nodes will * drop and never acknowledge the unexpected packets, and missing destinations * will either require retransmission (if they are just about to be added to * the bearer), or be removed from the buffer's 'ackers' counter (if they * just went down) */ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) { int bearer_id; struct tipc_bc_base *bb = tipc_bc_base(net); struct sk_buff *skb, *_skb; struct sk_buff_head _xmitq; if (skb_queue_empty(xmitq)) return; /* The typical case: at least one bearer has links to all nodes */ bearer_id = bb->primary_bearer; if (bearer_id >= 0) { tipc_bearer_bc_xmit(net, bearer_id, xmitq); return; } /* We have to transmit across all bearers */ __skb_queue_head_init(&_xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { if (!bb->dests[bearer_id]) continue; skb_queue_walk(xmitq, skb) { _skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!_skb) break; __skb_queue_tail(&_xmitq, _skb); } tipc_bearer_bc_xmit(net, bearer_id, &_xmitq); } __skb_queue_purge(xmitq); __skb_queue_purge(&_xmitq); } static void tipc_bcast_select_xmit_method(struct net *net, int dests, struct tipc_mc_method *method) { struct tipc_bc_base *bb = tipc_bc_base(net); unsigned long exp = method->expires; /* Broadcast supported by used bearer/bearers? */ if (!bb->bcast_support) { method->rcast = true; return; } /* Any destinations which don't support replicast ? */ if (!bb->rcast_support) { method->rcast = false; return; } /* Can current method be changed ? */ method->expires = jiffies + TIPC_METHOD_EXPIRE; if (method->mandatory) return; if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL) && time_before(jiffies, exp)) return; /* Configuration as force 'broadcast' method */ if (bb->force_bcast) { method->rcast = false; return; } /* Configuration as force 'replicast' method */ if (bb->force_rcast) { method->rcast = true; return; } /* Configuration as 'autoselect' or default method */ /* Determine method to use now */ method->rcast = dests <= bb->bc_threshold; } /* tipc_bcast_xmit - broadcast the buffer chain to all external nodes * @net: the applicable net namespace * @pkts: chain of buffers containing message * @cong_link_cnt: set to 1 if broadcast link is congested, otherwise 0 * Consumes the buffer chain. * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE */ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, u16 *cong_link_cnt) { struct tipc_link *l = tipc_bc_sndlink(net); struct sk_buff_head xmitq; int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); __skb_queue_purge(pkts); if (rc == -ELINKCONG) { *cong_link_cnt = 1; rc = 0; } return rc; } /* tipc_rcast_xmit - replicate and send a message to given destination nodes * @net: the applicable net namespace * @pkts: chain of buffers containing message * @dests: list of destination nodes * @cong_link_cnt: returns number of congested links * @cong_links: returns identities of congested links * Returns 0 if success, otherwise errno */ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_nlist *dests, u16 *cong_link_cnt) { struct tipc_dest *dst, *tmp; struct sk_buff_head _pkts; u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); __skb_queue_head_init(&_pkts); list_for_each_entry_safe(dst, tmp, &dests->list, list) { dnode = dst->node; if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) return -ENOMEM; /* Any other return value than -ELINKCONG is ignored */ if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) (*cong_link_cnt)++; } return 0; } /* tipc_mcast_send_sync - deliver a dummy message with SYN bit * @net: the applicable net namespace * @skb: socket buffer to copy * @method: send method to be used * @dests: destination nodes for message. * Returns 0 if success, otherwise errno */ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, struct tipc_mc_method *method, struct tipc_nlist *dests) { struct tipc_msg *hdr, *_hdr; struct sk_buff_head tmpq; u16 cong_link_cnt = 0; struct sk_buff *_skb; int rc = 0; /* Is a cluster supporting with new capabilities ? */ if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL)) return 0; hdr = buf_msg(skb); if (msg_user(hdr) == MSG_FRAGMENTER) hdr = msg_inner_hdr(hdr); if (msg_type(hdr) != TIPC_MCAST_MSG) return 0; /* Allocate dummy message */ _skb = tipc_buf_acquire(MCAST_H_SIZE, GFP_KERNEL); if (!_skb) return -ENOMEM; /* Preparing for 'synching' header */ msg_set_syn(hdr, 1); /* Copy skb's header into a dummy header */ skb_copy_to_linear_data(_skb, hdr, MCAST_H_SIZE); skb_orphan(_skb); /* Reverse method for dummy message */ _hdr = buf_msg(_skb); msg_set_size(_hdr, MCAST_H_SIZE); msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); msg_set_errcode(_hdr, TIPC_ERR_NO_PORT); __skb_queue_head_init(&tmpq); __skb_queue_tail(&tmpq, _skb); if (method->rcast) rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt); else rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt); /* This queue should normally be empty by now */ __skb_queue_purge(&tmpq); return rc; } /* tipc_mcast_xmit - deliver message to indicated destination nodes * and to identified node local sockets * @net: the applicable net namespace * @pkts: chain of buffers containing message * @method: send method to be used * @dests: destination nodes for message. * @cong_link_cnt: returns number of encountered congested destination links * Consumes buffer chain. * Returns 0 if success, otherwise errno */ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_mc_method *method, struct tipc_nlist *dests, u16 *cong_link_cnt) { struct sk_buff_head inputq, localq; bool rcast = method->rcast; struct tipc_msg *hdr; struct sk_buff *skb; int rc = 0; skb_queue_head_init(&inputq); __skb_queue_head_init(&localq); /* Clone packets before they are consumed by next call */ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { rc = -ENOMEM; goto exit; } /* Send according to determined transmit method */ if (dests->remote) { tipc_bcast_select_xmit_method(net, dests->remote, method); skb = skb_peek(pkts); hdr = buf_msg(skb); if (msg_user(hdr) == MSG_FRAGMENTER) hdr = msg_inner_hdr(hdr); msg_set_is_rcast(hdr, method->rcast); /* Switch method ? */ if (rcast != method->rcast) { rc = tipc_mcast_send_sync(net, skb, method, dests); if (unlikely(rc)) { pr_err("Unable to send SYN: method %d, rc %d\n", rcast, rc); goto exit; } } if (method->rcast) rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt); else rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } if (dests->local) { tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); return rc; } /* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link * * RCU is locked, no other locks set */ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb) { struct tipc_msg *hdr = buf_msg(skb); struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; int rc; __skb_queue_head_init(&xmitq); if (msg_mc_netid(hdr) != tipc_netid(net) || !tipc_link_is_up(l)) { kfree_skb(skb); return 0; } tipc_bcast_lock(net); if (msg_user(hdr) == BCAST_PROTOCOL) rc = tipc_link_bc_nack_rcv(l, skb, &xmitq); else rc = tipc_link_rcv(l, skb, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); return rc; } /* tipc_bcast_ack_rcv - receive and handle a broadcast acknowledge * * RCU is locked, no other locks set */ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; u16 acked = msg_bcast_ack(hdr); struct sk_buff_head xmitq; /* Ignore bc acks sent by peer before bcast synch point was received */ if (msg_bc_ack_invalid(hdr)) return; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); } /* tipc_bcast_synch_rcv - check and update rcv link with peer's send state * * RCU is locked, no other locks set */ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *retrq) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct tipc_gap_ack_blks *ga; struct sk_buff_head xmitq; int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (msg_type(hdr) != STATE_MSG) { tipc_link_bc_init_rcv(l, hdr); } else if (!msg_bc_ack_invalid(hdr)) { tipc_get_gap_ack_blks(&ga, l, hdr, false); if (!sysctl_tipc_bc_retruni) retrq = &xmitq; rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), msg_bc_gap(hdr), ga, &xmitq, retrq); rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq); } tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); return rc; } /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer * * RCU is locked, node lock is set */ void tipc_bcast_add_peer(struct net *net, struct tipc_link *uc_l, struct sk_buff_head *xmitq) { struct tipc_link *snd_l = tipc_bc_sndlink(net); tipc_bcast_lock(net); tipc_link_add_bc_peer(snd_l, uc_l, xmitq); tipc_bcbase_select_primary(net); tipc_bcbase_calc_bc_threshold(net); tipc_bcast_unlock(net); } /* tipc_bcast_remove_peer - remove a peer node from broadcast link and bearer * * RCU is locked, node lock is set */ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l) { struct tipc_link *snd_l = tipc_bc_sndlink(net); struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); tipc_link_remove_bc_peer(snd_l, rcv_l, &xmitq); tipc_bcbase_select_primary(net); tipc_bcbase_calc_bc_threshold(net); tipc_bcast_unlock(net); tipc_bcbase_xmit(net, &xmitq); /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); } int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l) { if (!l) return -ENOPROTOOPT; tipc_bcast_lock(net); tipc_link_reset_stats(l); tipc_bcast_unlock(net); return 0; } static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win) { struct tipc_link *l = tipc_bc_sndlink(net); if (!l) return -ENOPROTOOPT; if (max_win < BCLINK_WIN_MIN) max_win = BCLINK_WIN_MIN; if (max_win > TIPC_MAX_LINK_WIN) return -EINVAL; tipc_bcast_lock(net); tipc_link_set_queue_limits(l, tipc_link_min_win(l), max_win); tipc_bcast_unlock(net); return 0; } static int tipc_bc_link_set_broadcast_mode(struct net *net, u32 bc_mode) { struct tipc_bc_base *bb = tipc_bc_base(net); switch (bc_mode) { case BCLINK_MODE_BCAST: if (!bb->bcast_support) return -ENOPROTOOPT; bb->force_bcast = true; bb->force_rcast = false; break; case BCLINK_MODE_RCAST: if (!bb->rcast_support) return -ENOPROTOOPT; bb->force_bcast = false; bb->force_rcast = true; break; case BCLINK_MODE_SEL: if (!bb->bcast_support || !bb->rcast_support) return -ENOPROTOOPT; bb->force_bcast = false; bb->force_rcast = false; break; default: return -EINVAL; } return 0; } static int tipc_bc_link_set_broadcast_ratio(struct net *net, u32 bc_ratio) { struct tipc_bc_base *bb = tipc_bc_base(net); if (!bb->bcast_support || !bb->rcast_support) return -ENOPROTOOPT; if (bc_ratio > 100 || bc_ratio <= 0) return -EINVAL; bb->rc_ratio = bc_ratio; tipc_bcast_lock(net); tipc_bcbase_calc_bc_threshold(net); tipc_bcast_unlock(net); return 0; } int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]) { int err; u32 win; u32 bc_mode; u32 bc_ratio; struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; if (!attrs[TIPC_NLA_LINK_PROP]) return -EINVAL; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props); if (err) return err; if (!props[TIPC_NLA_PROP_WIN] && !props[TIPC_NLA_PROP_BROADCAST] && !props[TIPC_NLA_PROP_BROADCAST_RATIO]) { return -EOPNOTSUPP; } if (props[TIPC_NLA_PROP_BROADCAST]) { bc_mode = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST]); err = tipc_bc_link_set_broadcast_mode(net, bc_mode); } if (!err && props[TIPC_NLA_PROP_BROADCAST_RATIO]) { bc_ratio = nla_get_u32(props[TIPC_NLA_PROP_BROADCAST_RATIO]); err = tipc_bc_link_set_broadcast_ratio(net, bc_ratio); } if (!err && props[TIPC_NLA_PROP_WIN]) { win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); err = tipc_bc_link_set_queue_limits(net, win); } return err; } int tipc_bcast_init(struct net *net) { struct tipc_net *tn = tipc_net(net); struct tipc_bc_base *bb = NULL; struct tipc_link *l = NULL; bb = kzalloc(sizeof(*bb), GFP_KERNEL); if (!bb) goto enomem; tn->bcbase = bb; spin_lock_init(&tipc_net(net)->bclock); if (!tipc_link_bc_create(net, 0, 0, NULL, one_page_mtu, BCLINK_WIN_DEFAULT, BCLINK_WIN_DEFAULT, 0, &bb->inputq, NULL, NULL, &l)) goto enomem; bb->link = l; tn->bcl = l; bb->rc_ratio = 10; bb->rcast_support = true; return 0; enomem: kfree(bb); kfree(l); return -ENOMEM; } void tipc_bcast_stop(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); synchronize_net(); kfree(tn->bcbase); kfree(tn->bcl); } void tipc_nlist_init(struct tipc_nlist *nl, u32 self) { memset(nl, 0, sizeof(*nl)); INIT_LIST_HEAD(&nl->list); nl->self = self; } void tipc_nlist_add(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = true; else if (tipc_dest_push(&nl->list, node, 0)) nl->remote++; } void tipc_nlist_del(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = false; else if (tipc_dest_del(&nl->list, node, 0)) nl->remote--; } void tipc_nlist_purge(struct tipc_nlist *nl) { tipc_dest_list_purge(&nl->list); nl->remote = 0; nl->local = false; } u32 tipc_bcast_get_mode(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); if (bb->force_bcast) return BCLINK_MODE_BCAST; if (bb->force_rcast) return BCLINK_MODE_RCAST; if (bb->bcast_support && bb->rcast_support) return BCLINK_MODE_SEL; return 0; } u32 tipc_bcast_get_broadcast_ratio(struct net *net) { struct tipc_bc_base *bb = tipc_bc_base(net); return bb->rc_ratio; } void tipc_mcast_filter_msg(struct net *net, struct sk_buff_head *defq, struct sk_buff_head *inputq) { struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr, *_hdr; bool match = false; u32 node, port; skb = skb_peek(inputq); if (!skb) return; hdr = buf_msg(skb); if (likely(!msg_is_syn(hdr) && skb_queue_empty(defq))) return; node = msg_orignode(hdr); if (node == tipc_own_addr(net)) return; port = msg_origport(hdr); /* Has the twin SYN message already arrived ? */ skb_queue_walk(defq, _skb) { _hdr = buf_msg(_skb); if (msg_orignode(_hdr) != node) continue; if (msg_origport(_hdr) != port) continue; match = true; break; } if (!match) { if (!msg_is_syn(hdr)) return; __skb_dequeue(inputq); __skb_queue_tail(defq, skb); return; } /* Deliver non-SYN message from other link, otherwise queue it */ if (!msg_is_syn(hdr)) { if (msg_is_rcast(hdr) != msg_is_rcast(_hdr)) return; __skb_dequeue(inputq); __skb_queue_tail(defq, skb); return; } /* Queue non-SYN/SYN message from same link */ if (msg_is_rcast(hdr) == msg_is_rcast(_hdr)) { __skb_dequeue(inputq); __skb_queue_tail(defq, skb); return; } /* Matching SYN messages => return the one with data, if any */ __skb_unlink(_skb, defq); if (msg_data_sz(hdr)) { kfree_skb(_skb); } else { __skb_dequeue(inputq); kfree_skb(skb); __skb_queue_tail(inputq, _skb); } /* Deliver subsequent non-SYN messages from same peer */ skb_queue_walk_safe(defq, _skb, tmp) { _hdr = buf_msg(_skb); if (msg_orignode(_hdr) != node) continue; if (msg_origport(_hdr) != port) continue; if (msg_is_syn(_hdr)) break; __skb_unlink(_skb, defq); __skb_queue_tail(inputq, _skb); } }
7 6 8 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/netfilter/ipset/pfxlen.h> /* Prefixlen maps for fast conversions, by Jan Engelhardt. */ #ifdef E #undef E #endif #define PREFIXES_MAP \ E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \ E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \ E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), #define E(a, b, c, d) \ {.ip6 = { \ htonl(a), htonl(b), \ htonl(c), htonl(d), \ } } /* This table works for both IPv4 and IPv6; * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_netmask_map); #undef E #define E(a, b, c, d) \ {.ip6 = { (__force __be32)a, (__force __be32)b, \ (__force __be32)c, (__force __be32)d, \ } } /* This table works for both IPv4 and IPv6; * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); /* Find the largest network which matches the range from left, in host order. */ u32 ip_set_range_to_cidr(u32 from, u32 to, u8 *cidr) { u32 last; u8 i; for (i = 1; i < 32; i++) { if ((from & ip_set_hostmask(i)) != from) continue; last = from | ~ip_set_hostmask(i); if (!after(last, to)) { *cidr = i; return last; } } *cidr = 32; return from; } EXPORT_SYMBOL_GPL(ip_set_range_to_cidr);
2 5 2 3 29 3 65 5 5 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth kernel library. */ #define pr_fmt(fmt) "Bluetooth: " fmt #include <linux/export.h> #include <net/bluetooth/bluetooth.h> /** * baswap() - Swaps the order of a bd address * @dst: Pointer to a bdaddr_t struct that will store the swapped * bd address. * @src: Pointer to the bdaddr_t struct to be swapped. * * This function reverses the byte order of a Bluetooth device * address. */ void baswap(bdaddr_t *dst, const bdaddr_t *src) { const unsigned char *s = (const unsigned char *)src; unsigned char *d = (unsigned char *)dst; unsigned int i; for (i = 0; i < 6; i++) d[i] = s[5 - i]; } EXPORT_SYMBOL(baswap); /** * bt_to_errno() - Bluetooth error codes to standard errno * @code: Bluetooth error code to be converted * * This function takes a Bluetooth error code as input and converts * it to an equivalent Unix/standard errno value. * * Return: * * If the bt error code is known, an equivalent Unix errno value * is returned. * If the given bt error code is not known, ENOSYS is returned. */ int bt_to_errno(__u16 code) { switch (code) { case 0: return 0; case 0x01: return EBADRQC; case 0x02: return ENOTCONN; case 0x03: return EIO; case 0x04: case 0x3c: return EHOSTDOWN; case 0x05: return EACCES; case 0x06: return EBADE; case 0x07: return ENOMEM; case 0x08: return ETIMEDOUT; case 0x09: return EMLINK; case 0x0a: return EMLINK; case 0x0b: return EALREADY; case 0x0c: return EBUSY; case 0x0d: case 0x0e: case 0x0f: return ECONNREFUSED; case 0x10: return ETIMEDOUT; case 0x11: case 0x27: case 0x29: case 0x20: return EOPNOTSUPP; case 0x12: return EINVAL; case 0x13: case 0x14: case 0x15: return ECONNRESET; case 0x16: return ECONNABORTED; case 0x17: return ELOOP; case 0x18: return EACCES; case 0x1a: return EPROTONOSUPPORT; case 0x1b: return ECONNREFUSED; case 0x19: case 0x1e: case 0x23: case 0x24: case 0x25: return EPROTO; default: return ENOSYS; } } EXPORT_SYMBOL(bt_to_errno); /** * bt_status() - Standard errno value to Bluetooth error code * @err: Unix/standard errno value to be converted * * This function converts a standard/Unix errno value to an * equivalent Bluetooth error code. * * Return: Bluetooth error code. * * If the given errno is not found, 0x1f is returned by default * which indicates an unspecified error. * For err >= 0, no conversion is performed, and the same value * is immediately returned. */ __u8 bt_status(int err) { if (err >= 0) return err; switch (err) { case -EBADRQC: return 0x01; case -ENOTCONN: return 0x02; case -EIO: return 0x03; case -EHOSTDOWN: return 0x04; case -EACCES: return 0x05; case -EBADE: return 0x06; case -ENOMEM: return 0x07; case -ETIMEDOUT: return 0x08; case -EMLINK: return 0x09; case -EALREADY: return 0x0b; case -EBUSY: return 0x0c; case -ECONNREFUSED: return 0x0d; case -EOPNOTSUPP: return 0x11; case -EINVAL: return 0x12; case -ECONNRESET: return 0x13; case -ECONNABORTED: return 0x16; case -ELOOP: return 0x17; case -EPROTONOSUPPORT: return 0x1a; case -EPROTO: return 0x19; default: return 0x1f; } } EXPORT_SYMBOL(bt_status); /** * bt_info() - Log Bluetooth information message * @format: Message's format string */ void bt_info(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_info("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_info); /** * bt_warn() - Log Bluetooth warning message * @format: Message's format string */ void bt_warn(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_warn("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_warn); /** * bt_err() - Log Bluetooth error message * @format: Message's format string */ void bt_err(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_err("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_err); #ifdef CONFIG_BT_FEATURE_DEBUG static bool debug_enable; void bt_dbg_set(bool enable) { debug_enable = enable; } bool bt_dbg_get(void) { return debug_enable; } /** * bt_dbg() - Log Bluetooth debugging message * @format: Message's format string */ void bt_dbg(const char *format, ...) { struct va_format vaf; va_list args; if (likely(!debug_enable)) return; va_start(args, format); vaf.fmt = format; vaf.va = &args; printk(KERN_DEBUG pr_fmt("%pV"), &vaf); va_end(args); } EXPORT_SYMBOL(bt_dbg); #endif /** * bt_warn_ratelimited() - Log rate-limited Bluetooth warning message * @format: Message's format string * * This functions works like bt_warn, but it uses rate limiting * to prevent the message from being logged too often. */ void bt_warn_ratelimited(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_warn_ratelimited("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_warn_ratelimited); /** * bt_err_ratelimited() - Log rate-limited Bluetooth error message * @format: Message's format string * * This functions works like bt_err, but it uses rate limiting * to prevent the message from being logged too often. */ void bt_err_ratelimited(const char *format, ...) { struct va_format vaf; va_list args; va_start(args, format); vaf.fmt = format; vaf.va = &args; pr_err_ratelimited("%pV", &vaf); va_end(args); } EXPORT_SYMBOL(bt_err_ratelimited);
1242 1252 5054 5049 5039 1249 6292 6338 6271 6264 281 1537 1540 2 1528 1518 167 2 5041 5105 4 4 3 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 // SPDX-License-Identifier: GPL-2.0 /* * Lockless hierarchical page accounting & limiting * * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner */ #include <linux/page_counter.h> #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/sched.h> #include <linux/bug.h> #include <asm/page.h> static bool track_protection(struct page_counter *c) { return c->protection_support; } static void propagate_protected_usage(struct page_counter *c, unsigned long usage) { unsigned long protected, old_protected; long delta; if (!c->parent) return; protected = min(usage, READ_ONCE(c->min)); old_protected = atomic_long_read(&c->min_usage); if (protected != old_protected) { old_protected = atomic_long_xchg(&c->min_usage, protected); delta = protected - old_protected; if (delta) atomic_long_add(delta, &c->parent->children_min_usage); } protected = min(usage, READ_ONCE(c->low)); old_protected = atomic_long_read(&c->low_usage); if (protected != old_protected) { old_protected = atomic_long_xchg(&c->low_usage, protected); delta = protected - old_protected; if (delta) atomic_long_add(delta, &c->parent->children_low_usage); } } /** * page_counter_cancel - take pages out of the local counter * @counter: counter * @nr_pages: number of pages to cancel */ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) { long new; new = atomic_long_sub_return(nr_pages, &counter->usage); /* More uncharges than charges? */ if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n", new, nr_pages)) { new = 0; atomic_long_set(&counter->usage, new); } if (track_protection(counter)) propagate_protected_usage(counter, new); } /** * page_counter_charge - hierarchically charge pages * @counter: counter * @nr_pages: number of pages to charge * * NOTE: This does not consider any configured counter limits. */ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; bool protection = track_protection(counter); for (c = counter; c; c = c->parent) { long new; new = atomic_long_add_return(nr_pages, &c->usage); if (protection) propagate_protected_usage(c, new); /* * This is indeed racy, but we can live with some * inaccuracy in the watermark. * * Notably, we have two watermarks to allow for both a globally * visible peak and one that can be reset at a smaller scope. * * Since we reset both watermarks when the global reset occurs, * we can guarantee that watermark >= local_watermark, so we * don't need to do both comparisons every time. * * On systems with branch predictors, the inner condition should * be almost free. */ if (new > READ_ONCE(c->local_watermark)) { WRITE_ONCE(c->local_watermark, new); if (new > READ_ONCE(c->watermark)) WRITE_ONCE(c->watermark, new); } } } /** * page_counter_try_charge - try to hierarchically charge pages * @counter: counter * @nr_pages: number of pages to charge * @fail: points first counter to hit its limit, if any * * Returns %true on success, or %false and @fail if the counter or one * of its ancestors has hit its configured limit. */ bool page_counter_try_charge(struct page_counter *counter, unsigned long nr_pages, struct page_counter **fail) { struct page_counter *c; bool protection = track_protection(counter); bool track_failcnt = counter->track_failcnt; for (c = counter; c; c = c->parent) { long new; /* * Charge speculatively to avoid an expensive CAS. If * a bigger charge fails, it might falsely lock out a * racing smaller charge and send it into reclaim * early, but the error is limited to the difference * between the two sizes, which is less than 2M/4M in * case of a THP locking out a regular page charge. * * The atomic_long_add_return() implies a full memory * barrier between incrementing the count and reading * the limit. When racing with page_counter_set_max(), * we either see the new limit or the setter sees the * counter has changed and retries. */ new = atomic_long_add_return(nr_pages, &c->usage); if (new > c->max) { atomic_long_sub(nr_pages, &c->usage); /* * This is racy, but we can live with some * inaccuracy in the failcnt which is only used * to report stats. */ if (track_failcnt) data_race(c->failcnt++); *fail = c; goto failed; } if (protection) propagate_protected_usage(c, new); /* see comment on page_counter_charge */ if (new > READ_ONCE(c->local_watermark)) { WRITE_ONCE(c->local_watermark, new); if (new > READ_ONCE(c->watermark)) WRITE_ONCE(c->watermark, new); } } return true; failed: for (c = counter; c != *fail; c = c->parent) page_counter_cancel(c, nr_pages); return false; } /** * page_counter_uncharge - hierarchically uncharge pages * @counter: counter * @nr_pages: number of pages to uncharge */ void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; for (c = counter; c; c = c->parent) page_counter_cancel(c, nr_pages); } /** * page_counter_set_max - set the maximum number of pages allowed * @counter: counter * @nr_pages: limit to set * * Returns 0 on success, -EBUSY if the current number of pages on the * counter already exceeds the specified limit. * * The caller must serialize invocations on the same counter. */ int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) { for (;;) { unsigned long old; long usage; /* * Update the limit while making sure that it's not * below the concurrently-changing counter value. * * The xchg implies two full memory barriers before * and after, so the read-swap-read is ordered and * ensures coherency with page_counter_try_charge(): * that function modifies the count before checking * the limit, so if it sees the old limit, we see the * modified counter and retry. */ usage = page_counter_read(counter); if (usage > nr_pages) return -EBUSY; old = xchg(&counter->max, nr_pages); if (page_counter_read(counter) <= usage || nr_pages >= old) return 0; counter->max = old; cond_resched(); } } /** * page_counter_set_min - set the amount of protected memory * @counter: counter * @nr_pages: value to set * * The caller must serialize invocations on the same counter. */ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; WRITE_ONCE(counter->min, nr_pages); for (c = counter; c; c = c->parent) propagate_protected_usage(c, atomic_long_read(&c->usage)); } /** * page_counter_set_low - set the amount of protected memory * @counter: counter * @nr_pages: value to set * * The caller must serialize invocations on the same counter. */ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; WRITE_ONCE(counter->low, nr_pages); for (c = counter; c; c = c->parent) propagate_protected_usage(c, atomic_long_read(&c->usage)); } /** * page_counter_memparse - memparse() for page counter limits * @buf: string to parse * @max: string meaning maximum possible value * @nr_pages: returns the result in number of pages * * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be * limited to %PAGE_COUNTER_MAX. */ int page_counter_memparse(const char *buf, const char *max, unsigned long *nr_pages) { char *end; u64 bytes; if (!strcmp(buf, max)) { *nr_pages = PAGE_COUNTER_MAX; return 0; } bytes = memparse(buf, &end); if (*end != '\0') return -EINVAL; *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); return 0; } #if IS_ENABLED(CONFIG_MEMCG) || IS_ENABLED(CONFIG_CGROUP_DMEM) /* * This function calculates an individual page counter's effective * protection which is derived from its own memory.min/low, its * parent's and siblings' settings, as well as the actual memory * distribution in the tree. * * The following rules apply to the effective protection values: * * 1. At the first level of reclaim, effective protection is equal to * the declared protection in memory.min and memory.low. * * 2. To enable safe delegation of the protection configuration, at * subsequent levels the effective protection is capped to the * parent's effective protection. * * 3. To make complex and dynamic subtrees easier to configure, the * user is allowed to overcommit the declared protection at a given * level. If that is the case, the parent's effective protection is * distributed to the children in proportion to how much protection * they have declared and how much of it they are utilizing. * * This makes distribution proportional, but also work-conserving: * if one counter claims much more protection than it uses memory, * the unused remainder is available to its siblings. * * 4. Conversely, when the declared protection is undercommitted at a * given level, the distribution of the larger parental protection * budget is NOT proportional. A counter's protection from a sibling * is capped to its own memory.min/low setting. * * 5. However, to allow protecting recursive subtrees from each other * without having to declare each individual counter's fixed share * of the ancestor's claim to protection, any unutilized - * "floating" - protection from up the tree is distributed in * proportion to each counter's *usage*. This makes the protection * neutral wrt sibling cgroups and lets them compete freely over * the shared parental protection budget, but it protects the * subtree as a whole from neighboring subtrees. * * Note that 4. and 5. are not in conflict: 4. is about protecting * against immediate siblings whereas 5. is about protecting against * neighboring subtrees. */ static unsigned long effective_protection(unsigned long usage, unsigned long parent_usage, unsigned long setting, unsigned long parent_effective, unsigned long siblings_protected, bool recursive_protection) { unsigned long protected; unsigned long ep; protected = min(usage, setting); /* * If all cgroups at this level combined claim and use more * protection than what the parent affords them, distribute * shares in proportion to utilization. * * We are using actual utilization rather than the statically * claimed protection in order to be work-conserving: claimed * but unused protection is available to siblings that would * otherwise get a smaller chunk than what they claimed. */ if (siblings_protected > parent_effective) return protected * parent_effective / siblings_protected; /* * Ok, utilized protection of all children is within what the * parent affords them, so we know whatever this child claims * and utilizes is effectively protected. * * If there is unprotected usage beyond this value, reclaim * will apply pressure in proportion to that amount. * * If there is unutilized protection, the cgroup will be fully * shielded from reclaim, but we do return a smaller value for * protection than what the group could enjoy in theory. This * is okay. With the overcommit distribution above, effective * protection is always dependent on how memory is actually * consumed among the siblings anyway. */ ep = protected; /* * If the children aren't claiming (all of) the protection * afforded to them by the parent, distribute the remainder in * proportion to the (unprotected) memory of each cgroup. That * way, cgroups that aren't explicitly prioritized wrt each * other compete freely over the allowance, but they are * collectively protected from neighboring trees. * * We're using unprotected memory for the weight so that if * some cgroups DO claim explicit protection, we don't protect * the same bytes twice. * * Check both usage and parent_usage against the respective * protected values. One should imply the other, but they * aren't read atomically - make sure the division is sane. */ if (!recursive_protection) return ep; if (parent_effective > siblings_protected && parent_usage > siblings_protected && usage > protected) { unsigned long unclaimed; unclaimed = parent_effective - siblings_protected; unclaimed *= usage - protected; unclaimed /= parent_usage - siblings_protected; ep += unclaimed; } return ep; } /** * page_counter_calculate_protection - check if memory consumption is in the normal range * @root: the top ancestor of the sub-tree being checked * @counter: the page_counter the counter to update * @recursive_protection: Whether to use memory_recursiveprot behavior. * * Calculates elow/emin thresholds for given page_counter. * * WARNING: This function is not stateless! It can only be used as part * of a top-down tree iteration, not for isolated queries. */ void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection) { unsigned long usage, parent_usage; struct page_counter *parent = counter->parent; /* * Effective values of the reclaim targets are ignored so they * can be stale. Have a look at mem_cgroup_protection for more * details. * TODO: calculation should be more robust so that we do not need * that special casing. */ if (root == counter) return; usage = page_counter_read(counter); if (!usage) return; if (parent == root) { counter->emin = READ_ONCE(counter->min); counter->elow = READ_ONCE(counter->low); return; } parent_usage = page_counter_read(parent); WRITE_ONCE(counter->emin, effective_protection(usage, parent_usage, READ_ONCE(counter->min), READ_ONCE(parent->emin), atomic_long_read(&parent->children_min_usage), recursive_protection)); WRITE_ONCE(counter->elow, effective_protection(usage, parent_usage, READ_ONCE(counter->low), READ_ONCE(parent->elow), atomic_long_read(&parent->children_low_usage), recursive_protection)); } #endif /* CONFIG_MEMCG || CONFIG_CGROUP_DMEM */
112 113 54 52 54 158 6 22 103 104 54 55 101 88 186 186 65 170 13 15 132 133 4 66 169 112 112 114 111 113 114 36 23 3 13 10 18 2 20 19 15 4 9 3 20 20 1 18 41 40 6 25 24 24 36 35 6 1 5 36 36 35 5 36 35 5 53 106 105 106 106 106 106 106 13 1 1 1 2 1 43 43 43 30 13 13 34 10 34 43 20 20 137 36 116 20 18 2 20 20 100 99 1 4 8 4 5 78 185 50 27 13 13 73 56 175 176 78 106 28 28 28 2 26 27 28 26 28 26 6 127 91 11 81 148 54 106 106 106 106 106 106 106 105 105 103 105 106 105 106 106 106 87 36 53 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET transport hashtables * * Authors: Lotsa people, from code originally in tcp */ #include <linux/module.h> #include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/memblock.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/inet6_hashtables.h> #endif #include <net/hotdata.h> #include <net/ip.h> #include <net/rps.h> #include <net/secure_seq.h> #include <net/sock_reuseport.h> #include <net/tcp.h> u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); return lport + __inet_ehashfn(laddr, 0, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet_ehashfn); /* This function handles inet_sock, but also timewait and request sockets * for IPv4/IPv6. */ static u32 sk_ehashfn(const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) return inet6_ehashfn(sock_net(sk), &sk->sk_v6_rcv_saddr, sk->sk_num, &sk->sk_v6_daddr, sk->sk_dport); #endif return inet_ehashfn(sock_net(sk), sk->sk_rcv_saddr, sk->sk_num, sk->sk_daddr, sk->sk_dport); } /* * Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, const unsigned short snum, int l3mdev) { struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb) { write_pnet(&tb->ib_net, net); tb->l3mdev = l3mdev; tb->port = snum; tb->fastreuse = 0; tb->fastreuseport = 0; INIT_HLIST_HEAD(&tb->bhash2); hlist_add_head_rcu(&tb->node, &head->chain); } return tb; } /* * Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind_bucket_destroy(struct inet_bind_bucket *tb) { if (hlist_empty(&tb->bhash2)) { hlist_del_rcu(&tb->node); kfree_rcu(tb, rcu); } } bool inet_bind_bucket_match(const struct inet_bind_bucket *tb, const struct net *net, unsigned short port, int l3mdev) { return net_eq(ib_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev; } static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb2, struct net *net, struct inet_bind_hashbucket *head, struct inet_bind_bucket *tb, const struct sock *sk) { write_pnet(&tb2->ib_net, net); tb2->l3mdev = tb->l3mdev; tb2->port = tb->port; #if IS_ENABLED(CONFIG_IPV6) BUILD_BUG_ON(USHRT_MAX < (IPV6_ADDR_ANY | IPV6_ADDR_MAPPED)); if (sk->sk_family == AF_INET6) { tb2->addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr); tb2->v6_rcv_saddr = sk->sk_v6_rcv_saddr; } else { tb2->addr_type = IPV6_ADDR_MAPPED; ipv6_addr_set_v4mapped(sk->sk_rcv_saddr, &tb2->v6_rcv_saddr); } #else tb2->rcv_saddr = sk->sk_rcv_saddr; #endif INIT_HLIST_HEAD(&tb2->owners); hlist_add_head(&tb2->node, &head->chain); hlist_add_head(&tb2->bhash_node, &tb->bhash2); } struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, struct inet_bind_bucket *tb, const struct sock *sk) { struct inet_bind2_bucket *tb2 = kmem_cache_alloc(cachep, GFP_ATOMIC); if (tb2) inet_bind2_bucket_init(tb2, net, head, tb, sk); return tb2; } /* Caller must hold hashbucket lock for this tb with local BH disabled */ void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); __hlist_del(&tb->bhash_node); kmem_cache_free(cachep, tb); } } static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); if (tb2->addr_type != IPV6_ADDR_MAPPED) return false; #endif return tb2->rcv_saddr == sk->sk_rcv_saddr; } void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, struct inet_bind2_bucket *tb2, unsigned short port) { inet_sk(sk)->inet_num = port; inet_csk(sk)->icsk_bind_hash = tb; inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); } /* * Get rid of any references to a local port held by the given sock. */ static void __inet_put_port(struct sock *sk) { struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); struct inet_bind_bucket *tb; int bhash; bhash = inet_bhashfn(net, inet_sk(sk)->inet_num, hashinfo->bhash_size); head = &hashinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hashinfo, sk, net, inet_sk(sk)->inet_num); spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->inet_num = 0; spin_lock(&head2->lock); if (inet_csk(sk)->icsk_bind2_hash) { struct inet_bind2_bucket *tb2 = inet_csk(sk)->icsk_bind2_hash; __sk_del_bind_node(sk); inet_csk(sk)->icsk_bind2_hash = NULL; inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2); } spin_unlock(&head2->lock); inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); } void inet_put_port(struct sock *sk) { local_bh_disable(); __inet_put_port(sk); local_bh_enable(); } EXPORT_SYMBOL(inet_put_port); int __inet_inherit_port(const struct sock *sk, struct sock *child) { struct inet_hashinfo *table = tcp_get_hashinfo(sk); unsigned short port = inet_sk(child)->inet_num; struct inet_bind_hashbucket *head, *head2; bool created_inet_bind_bucket = false; struct net *net = sock_net(sk); bool update_fastreuse = false; struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; int bhash, l3mdev; bhash = inet_bhashfn(net, port, table->bhash_size); head = &table->bhash[bhash]; head2 = inet_bhashfn_portaddr(table, child, net, port); spin_lock(&head->lock); spin_lock(&head2->lock); tb = inet_csk(sk)->icsk_bind_hash; tb2 = inet_csk(sk)->icsk_bind2_hash; if (unlikely(!tb || !tb2)) { spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOENT; } if (tb->port != port) { l3mdev = inet_sk_bound_l3mdev(sk); /* NOTE: using tproxy and redirecting skbs to a proxy * on a different listener port breaks the assumption * that the listener socket's icsk_bind_hash is the same * as that of the child socket. We have to look up or * create a new bind bucket for the child here. */ inet_bind_bucket_for_each(tb, &head->chain) { if (inet_bind_bucket_match(tb, net, port, l3mdev)) break; } if (!tb) { tb = inet_bind_bucket_create(table->bind_bucket_cachep, net, head, port, l3mdev); if (!tb) { spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } created_inet_bind_bucket = true; } update_fastreuse = true; goto bhash2_find; } else if (!inet_bind2_bucket_addr_match(tb2, child)) { l3mdev = inet_sk_bound_l3mdev(sk); bhash2_find: tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, child); if (!tb2) { tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep, net, head2, tb, child); if (!tb2) goto error; } } if (update_fastreuse) inet_csk_update_fastreuse(tb, child); inet_bind_hash(child, tb, tb2, port); spin_unlock(&head2->lock); spin_unlock(&head->lock); return 0; error: if (created_inet_bind_bucket) inet_bind_bucket_destroy(tb); spin_unlock(&head2->lock); spin_unlock(&head->lock); return -ENOMEM; } EXPORT_SYMBOL_GPL(__inet_inherit_port); static struct inet_listen_hashbucket * inet_lhash2_bucket_sk(struct inet_hashinfo *h, struct sock *sk) { u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, inet_sk(sk)->inet_num); else #endif hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); return inet_lhash2_bucket(h, hash); } static inline int compute_score(struct sock *sk, const struct net *net, const unsigned short hnum, const __be32 daddr, const int dif, const int sdif) { int score = -1; if (net_eq(sock_net(sk), net) && sk->sk_num == hnum && !ipv6_only_sock(sk)) { if (sk->sk_rcv_saddr != daddr) return -1; if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; score = sk->sk_bound_dev_if ? 2 : 1; if (sk->sk_family == PF_INET) score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /** * inet_lookup_reuseport() - execute reuseport logic on AF_INET socket if necessary. * @net: network namespace. * @sk: AF_INET socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. * @skb: context for a potential SK_REUSEPORT program. * @doff: header offset. * @saddr: source address. * @sport: source port. * @daddr: destination address. * @hnum: destination port in host byte order. * @ehashfn: hash function used to generate the fallback hash. * * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to * the selected sock or an error. */ struct sock *inet_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, unsigned short hnum, inet_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { phash = INDIRECT_CALL_2(ehashfn, udp_ehashfn, inet_ehashfn, net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } EXPORT_SYMBOL_GPL(inet_lookup_reuseport); /* * Here are some nice properties to exploit here. The BSD API * does not allow a listening sock to specify the remote port nor the * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ /* called with rcu_read_lock() : No refcount taken on the socket */ static struct sock *inet_lhash2_lookup(const struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, inet_ehashfn); if (result) return result; result = sk; hiscore = score; } } return result; } struct sock *inet_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, __be32 saddr, __be16 sport, __be32 daddr, u16 hnum, const int dif, inet_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; no_reuseport = bpf_sk_lookup_run_v4(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; reuse_sk = inet_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } struct sock *__inet_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && hashinfo == net->ipv4.tcp_death_row.hashinfo) { result = inet_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet_ehashfn); if (result) goto done; } hash2 = ipv4_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with INADDR_ANY */ hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, htonl(INADDR_ANY), hnum, dif, sdif); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* All sockets share common refcount, but have different destructors */ void sock_gen_put(struct sock *sk) { if (!refcount_dec_and_test(&sk->sk_refcnt)) return; if (sk->sk_state == TCP_TIME_WAIT) inet_twsk_free(inet_twsk(sk)); else if (sk->sk_state == TCP_NEW_SYN_RECV) reqsk_free(inet_reqsk(sk)); else sk_free(sk); } EXPORT_SYMBOL_GPL(sock_gen_put); void sock_edemux(struct sk_buff *skb) { sock_gen_put(skb->sk); } EXPORT_SYMBOL(sock_edemux); struct sock *__inet_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, const int dif, const int sdif) { INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_nulls_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (likely(inet_match(net, sk, acookie, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!inet_match(net, sk, acookie, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL_GPL(__inet_lookup_established); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, __u16 lport, struct inet_timewait_sock **twp, bool rcu_lookup, u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_timewait_sock *tw = NULL; const struct hlist_nulls_node *node; struct sock *sk2; spinlock_t *lock; if (rcu_lookup) { sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash || !inet_match(net, sk2, acookie, ports, dif, sdif)) continue; if (sk2->sk_state == TCP_TIME_WAIT) break; return -EADDRNOTAVAIL; } return 0; } lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(inet_match(net, sk2, acookie, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (sk->sk_protocol == IPPROTO_TCP && tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr, inet->inet_daddr, inet->inet_dport); } /* Searches for an exsiting socket in the ehash bucket list. * Returns true if found, false otherwise. */ static bool inet_ehash_lookup_by_sk(struct sock *sk, struct hlist_nulls_head *list) { const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); const int sdif = sk->sk_bound_dev_if; const int dif = sk->sk_bound_dev_if; const struct hlist_nulls_node *node; struct net *net = sock_net(sk); struct sock *esk; INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); sk_nulls_for_each_rcu(esk, node, list) { if (esk->sk_hash != sk->sk_hash) continue; if (sk->sk_family == AF_INET) { if (unlikely(inet_match(net, esk, acookie, ports, dif, sdif))) { return true; } } #if IS_ENABLED(CONFIG_IPV6) else if (sk->sk_family == AF_INET6) { if (unlikely(inet6_match(net, esk, &sk->sk_v6_daddr, &sk->sk_v6_rcv_saddr, ports, dif, sdif))) { return true; } } #endif } return false; } /* Insert a socket into ehash, and eventually remove another one * (The another one can be a SYN_RECV or TIMEWAIT) * If an existing socket already exists, socket sk is not inserted, * and sets found_dup_sk parameter to true. */ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); struct inet_ehash_bucket *head; struct hlist_nulls_head *list; spinlock_t *lock; bool ret = true; WARN_ON_ONCE(!sk_unhashed(sk)); sk->sk_hash = sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock(lock); if (osk) { WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); ret = sk_nulls_del_node_init_rcu(osk); } else if (found_dup_sk) { *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); if (*found_dup_sk) ret = false; } if (ret) __sk_nulls_add_node_rcu(sk, list); spin_unlock(lock); return ret; } bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) { bool ok = inet_ehash_insert(sk, osk, found_dup_sk); if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { this_cpu_inc(*sk->sk_prot->orphan_count); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); } return ok; } EXPORT_IPV6_MOD(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) { struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; const struct hlist_nulls_node *node; kuid_t uid = sk_uid(sk); struct sock *sk2; sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) { if (sk2 != sk && sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sk_uid(sk2)) && inet_rcv_saddr_equal(sk, sk2, false)) return reuseport_add_sock(sk, sk2, inet_rcv_saddr_any(sk)); } return reuseport_alloc(sk, inet_rcv_saddr_any(sk)); } int __inet_hash(struct sock *sk, struct sock *osk) { struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); struct inet_listen_hashbucket *ilb2; int err = 0; if (sk->sk_state != TCP_LISTEN) { local_bh_disable(); inet_ehash_nolisten(sk, osk, NULL); local_bh_enable(); return 0; } WARN_ON(!sk_unhashed(sk)); ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); spin_lock(&ilb2->lock); if (sk->sk_reuseport) { err = inet_reuseport_add_sock(sk, ilb2); if (err) goto unlock; } sock_set_flag(sk, SOCK_RCU_FREE); if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && sk->sk_family == AF_INET6) __sk_nulls_add_node_tail_rcu(sk, &ilb2->nulls_head); else __sk_nulls_add_node_rcu(sk, &ilb2->nulls_head); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); unlock: spin_unlock(&ilb2->lock); return err; } EXPORT_IPV6_MOD(__inet_hash); int inet_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); return err; } void inet_unhash(struct sock *sk) { struct inet_hashinfo *hashinfo = tcp_get_hashinfo(sk); if (sk_unhashed(sk)) return; sock_rps_delete_flow(sk); if (sk->sk_state == TCP_LISTEN) { struct inet_listen_hashbucket *ilb2; ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); /* Don't disable bottom halves while acquiring the lock to * avoid circular locking dependency on PREEMPT_RT. */ spin_lock(&ilb2->lock); if (sk_unhashed(sk)) { spin_unlock(&ilb2->lock); return; } if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_stop_listen_sock(sk); __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock(&ilb2->lock); } else { spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spin_lock_bh(lock); if (sk_unhashed(sk)) { spin_unlock_bh(lock); return; } __sk_nulls_del_node_init_rcu(sk); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); spin_unlock_bh(lock); } } EXPORT_IPV6_MOD(inet_unhash); static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { if (!net_eq(ib2_net(tb), net) || tb->port != port || tb->l3mdev != l3mdev) return false; return inet_bind2_bucket_addr_match(tb, sk); } bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { if (!net_eq(ib2_net(tb), net) || tb->port != port || tb->l3mdev != l3mdev) return false; #if IS_ENABLED(CONFIG_IPV6) if (tb->addr_type == IPV6_ADDR_ANY) return true; if (tb->addr_type != IPV6_ADDR_MAPPED) return false; if (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) return false; #endif return tb->rcv_saddr == 0; } /* The socket's bhash2 hashbucket spinlock must be held when this is called */ struct inet_bind2_bucket * inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net *net, unsigned short port, int l3mdev, const struct sock *sk) { struct inet_bind2_bucket *bhash2 = NULL; inet_bind_bucket_for_each(bhash2, &head->chain) if (inet_bind2_bucket_match(bhash2, net, port, l3mdev, sk)) break; return bhash2; } struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) { struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk); u32 hash; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) hash = ipv6_portaddr_hash(net, &in6addr_any, port); else #endif hash = ipv4_portaddr_hash(net, 0, port); return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)]; } static void inet_update_saddr(struct sock *sk, void *saddr, int family) { if (family == AF_INET) { inet_sk(sk)->inet_saddr = *(__be32 *)saddr; sk_rcv_saddr_set(sk, inet_sk(sk)->inet_saddr); } #if IS_ENABLED(CONFIG_IPV6) else { sk->sk_v6_rcv_saddr = *(struct in6_addr *)saddr; } #endif } static int __inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family, bool reset) { struct inet_hashinfo *hinfo = tcp_get_hashinfo(sk); struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); int bhash; if (!inet_csk(sk)->icsk_bind2_hash) { /* Not bind()ed before. */ if (reset) inet_reset_saddr(sk); else inet_update_saddr(sk, saddr, family); return 0; } /* Allocate a bind2 bucket ahead of time to avoid permanently putting * the bhash2 table in an inconsistent state if a new tb2 bucket * allocation fails. */ new_tb2 = kmem_cache_alloc(hinfo->bind2_bucket_cachep, GFP_ATOMIC); if (!new_tb2) { if (reset) { /* The (INADDR_ANY, port) bucket might have already * been freed, then we cannot fixup icsk_bind2_hash, * so we give up and unlink sk from bhash/bhash2 not * to leave inconsistency in bhash2. */ inet_put_port(sk); inet_reset_saddr(sk); } return -ENOMEM; } bhash = inet_bhashfn(net, port, hinfo->bhash_size); head = &hinfo->bhash[bhash]; head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); /* If we change saddr locklessly, another thread * iterating over bhash might see corrupted address. */ spin_lock_bh(&head->lock); spin_lock(&head2->lock); __sk_del_bind_node(sk); inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep, inet_csk(sk)->icsk_bind2_hash); spin_unlock(&head2->lock); if (reset) inet_reset_saddr(sk); else inet_update_saddr(sk, saddr, family); head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = new_tb2; inet_bind2_bucket_init(tb2, net, head2, inet_csk(sk)->icsk_bind_hash, sk); } inet_csk(sk)->icsk_bind2_hash = tb2; sk_add_bind_node(sk, &tb2->owners); spin_unlock(&head2->lock); spin_unlock_bh(&head->lock); if (tb2 != new_tb2) kmem_cache_free(hinfo->bind2_bucket_cachep, new_tb2); return 0; } int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { return __inet_bhash2_update_saddr(sk, saddr, family, false); } EXPORT_IPV6_MOD(inet_bhash2_update_saddr); void inet_bhash2_reset_saddr(struct sock *sk) { if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) __inet_bhash2_update_saddr(sk, NULL, 0, true); } EXPORT_IPV6_MOD(inet_bhash2_reset_saddr); /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') * because 2^16 is not a multiple of num_ephemeral and this * property might be used by clever attacker. * * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, though * attacks were since demonstrated, thus we use 65536 by default instead * to really give more isolation and privacy, at the expense of 256kB * of kernel memory. */ #define INET_TABLE_PERTURB_SIZE (1 << CONFIG_INET_TABLE_PERTURB_ORDER) static u32 *table_perturb; int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk, u64 port_offset, u32 hash_port0, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **, bool rcu_lookup, u32 hash)) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_bind_hashbucket *head, *head2; struct inet_timewait_sock *tw = NULL; int port = inet_sk(sk)->inet_num; struct net *net = sock_net(sk); struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; bool tb_created = false; u32 remaining, offset; int ret, i, low, high; bool local_ports; int step, l3mdev; u32 index; if (port) { local_bh_disable(); ret = check_established(death_row, sk, port, NULL, false, hash_port0 + port); local_bh_enable(); return ret; } l3mdev = inet_sk_bound_l3mdev(sk); local_ports = inet_sk_get_local_port_range(sk, &low, &high); step = local_ports ? 1 : 2; high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; if (!local_ports && remaining > 1) remaining &= ~1U; get_random_sleepable_once(table_perturb, INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); offset %= remaining; /* In first pass we try ports of @low parity. * inet_csk_get_port() does the opposite choice. */ if (!local_ports) offset &= ~1U; other_parity_scan: port = low + offset; for (i = 0; i < remaining; i += step, port += step) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) continue; head = &hinfo->bhash[inet_bhashfn(net, port, hinfo->bhash_size)]; rcu_read_lock(); hlist_for_each_entry_rcu(tb, &head->chain, node) { if (!inet_bind_bucket_match(tb, net, port, l3mdev)) continue; if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) { rcu_read_unlock(); goto next_port; } if (!check_established(death_row, sk, port, &tw, true, hash_port0 + port)) break; rcu_read_unlock(); goto next_port; } rcu_read_unlock(); spin_lock_bh(&head->lock); /* Does not bother with rcv_saddr checks, because * the established check is already unique enough. */ inet_bind_bucket_for_each(tb, &head->chain) { if (inet_bind_bucket_match(tb, net, port, l3mdev)) { if (tb->fastreuse >= 0 || tb->fastreuseport >= 0) goto next_port_unlock; WARN_ON(hlist_empty(&tb->bhash2)); if (!check_established(death_row, sk, port, &tw, false, hash_port0 + port)) goto ok; goto next_port_unlock; } } tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net, head, port, l3mdev); if (!tb) { spin_unlock_bh(&head->lock); return -ENOMEM; } tb_created = true; tb->fastreuse = -1; tb->fastreuseport = -1; goto ok; next_port_unlock: spin_unlock_bh(&head->lock); next_port: cond_resched(); } if (!local_ports) { offset++; if ((offset & 1) && remaining > 1) goto other_parity_scan; } return -EADDRNOTAVAIL; ok: /* Find the corresponding tb2 bucket since we need to * add the socket to the bhash2 table as well */ head2 = inet_bhashfn_portaddr(hinfo, sk, net, port); spin_lock(&head2->lock); tb2 = inet_bind2_bucket_find(head2, net, port, l3mdev, sk); if (!tb2) { tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net, head2, tb, sk); if (!tb2) goto error; } /* Here we want to add a little bit of randomness to the next source * port that will be chosen. We use a max() with a random here so that * on low contention the randomness is maximal and on high contention * it may be inexistent. */ i = max_t(int, i, get_random_u32_below(8) * step); WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + step); /* Head lock still held and bh's disabled */ inet_bind_hash(sk, tb, tb2, port); if (sk_unhashed(sk)) { inet_sk(sk)->inet_sport = htons(port); inet_ehash_nolisten(sk, (struct sock *)tw, NULL); } if (tw) inet_twsk_bind_unhash(tw, hinfo); spin_unlock(&head2->lock); spin_unlock(&head->lock); if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); return 0; error: if (sk_hashed(sk)) { spinlock_t *lock = inet_ehash_lockp(hinfo, sk->sk_hash); sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(lock); __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); sk->sk_hash = 0; inet_sk(sk)->inet_sport = 0; inet_sk(sk)->inet_num = 0; if (tw) inet_twsk_bind_unhash(tw, hinfo); } spin_unlock(&head2->lock); if (tb_created) inet_bind_bucket_destroy(tb); spin_unlock(&head->lock); if (tw) inet_twsk_deschedule_put(tw); local_bh_enable(); return -ENOMEM; } /* * Bind a port for a connect operation and hash it. */ int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct net *net = sock_net(sk); u64 port_offset = 0; u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, inet->inet_daddr, inet->inet_dport); return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet_check_established); } static void init_hashinfo_lhash2(struct inet_hashinfo *h) { int i; for (i = 0; i <= h->lhash2_mask; i++) { spin_lock_init(&h->lhash2[i].lock); INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, i + LISTENING_NULLS_BASE); } } void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit) { h->lhash2 = alloc_large_system_hash(name, sizeof(*h->lhash2), numentries, scale, 0, NULL, &h->lhash2_mask, low_limit, high_limit); init_hashinfo_lhash2(h); /* this one is used for source ports of outgoing connections */ table_perturb = alloc_large_system_hash("Table-perturb", sizeof(*table_perturb), INET_TABLE_PERTURB_SIZE, 0, 0, NULL, NULL, INET_TABLE_PERTURB_SIZE, INET_TABLE_PERTURB_SIZE); } int inet_hashinfo2_init_mod(struct inet_hashinfo *h) { h->lhash2 = kmalloc_array(INET_LHTABLE_SIZE, sizeof(*h->lhash2), GFP_KERNEL); if (!h->lhash2) return -ENOMEM; h->lhash2_mask = INET_LHTABLE_SIZE - 1; /* INET_LHTABLE_SIZE must be a power of 2 */ BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); init_hashinfo_lhash2(h); return 0; } int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); unsigned int i, nblocks = 1; spinlock_t *ptr = NULL; if (locksz == 0) goto set_mask; /* Allocate 2 cache lines or at least one spinlock per cpu. */ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus(); /* At least one page per NUMA node. */ nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz); nblocks = roundup_pow_of_two(nblocks); /* No more locks than number of hash buckets. */ nblocks = min(nblocks, hashinfo->ehash_mask + 1); if (num_online_nodes() > 1) { /* Use vmalloc() to allow NUMA policy to spread pages * on all available nodes if desired. */ ptr = vmalloc_array(nblocks, locksz); } if (!ptr) { ptr = kvmalloc_array(nblocks, locksz, GFP_KERNEL); if (!ptr) return -ENOMEM; } for (i = 0; i < nblocks; i++) spin_lock_init(&ptr[i]); hashinfo->ehash_locks = ptr; set_mask: hashinfo->ehash_locks_mask = nblocks - 1; return 0; } struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, unsigned int ehash_entries) { struct inet_hashinfo *new_hashinfo; int i; new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL); if (!new_hashinfo) goto err; new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket), GFP_KERNEL_ACCOUNT); if (!new_hashinfo->ehash) goto free_hashinfo; new_hashinfo->ehash_mask = ehash_entries - 1; if (inet_ehash_locks_alloc(new_hashinfo)) goto free_ehash; for (i = 0; i < ehash_entries; i++) INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i); new_hashinfo->pernet = true; return new_hashinfo; free_ehash: vfree(new_hashinfo->ehash); free_hashinfo: kfree(new_hashinfo); err: return NULL; } void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) { if (!hashinfo->pernet) return; inet_ehash_locks_free(hashinfo); vfree(hashinfo->ehash); kfree(hashinfo); }
5 10 2 8 9 9 5 5 5 5 5 5 5 5 7 2 5 5 4 1 3 3 3 1 2 2 2 9 9 1 5 4 1 3 3 6 6 1 10 6 5 4 5 6 2 4 9 9 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 // SPDX-License-Identifier: GPL-2.0-or-later /* * CCM: Counter with CBC-MAC * * (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com> */ #include <crypto/internal/aead.h> #include <crypto/internal/cipher.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <crypto/utils.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/string.h> struct ccm_instance_ctx { struct crypto_skcipher_spawn ctr; struct crypto_ahash_spawn mac; }; struct crypto_ccm_ctx { struct crypto_ahash *mac; struct crypto_skcipher *ctr; }; struct crypto_rfc4309_ctx { struct crypto_aead *child; u8 nonce[3]; }; struct crypto_rfc4309_req_ctx { struct scatterlist src[3]; struct scatterlist dst[3]; struct aead_request subreq; }; struct crypto_ccm_req_priv_ctx { u8 odata[16]; u8 idata[16]; u8 auth_tag[16]; u32 flags; struct scatterlist src[3]; struct scatterlist dst[3]; union { struct ahash_request ahreq; struct skcipher_request skreq; }; }; struct cbcmac_tfm_ctx { struct crypto_cipher *child; }; static inline struct crypto_ccm_req_priv_ctx *crypto_ccm_reqctx( struct aead_request *req) { unsigned long align = crypto_aead_alignmask(crypto_aead_reqtfm(req)); return (void *)PTR_ALIGN((u8 *)aead_request_ctx(req), align + 1); } static int set_msg_len(u8 *block, unsigned int msglen, int csize) { __be32 data; memset(block, 0, csize); block += csize; if (csize >= 4) csize = 4; else if (msglen > (1 << (8 * csize))) return -EOVERFLOW; data = cpu_to_be32(msglen); memcpy(block - csize, (u8 *)&data + 4 - csize, csize); return 0; } static int crypto_ccm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_skcipher *ctr = ctx->ctr; struct crypto_ahash *mac = ctx->mac; int err; crypto_skcipher_clear_flags(ctr, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(ctr, crypto_aead_get_flags(aead) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(ctr, key, keylen); if (err) return err; crypto_ahash_clear_flags(mac, CRYPTO_TFM_REQ_MASK); crypto_ahash_set_flags(mac, crypto_aead_get_flags(aead) & CRYPTO_TFM_REQ_MASK); return crypto_ahash_setkey(mac, key, keylen); } static int crypto_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { switch (authsize) { case 4: case 6: case 8: case 10: case 12: case 14: case 16: break; default: return -EINVAL; } return 0; } static int format_input(u8 *info, struct aead_request *req, unsigned int cryptlen) { struct crypto_aead *aead = crypto_aead_reqtfm(req); unsigned int lp = req->iv[0]; unsigned int l = lp + 1; unsigned int m; m = crypto_aead_authsize(aead); memcpy(info, req->iv, 16); /* format control info per RFC 3610 and * NIST Special Publication 800-38C */ *info |= (8 * ((m - 2) / 2)); if (req->assoclen) *info |= 64; return set_msg_len(info + 16 - l, cryptlen, l); } static int format_adata(u8 *adata, unsigned int a) { int len = 0; /* add control info for associated data * RFC 3610 and NIST Special Publication 800-38C */ if (a < 65280) { *(__be16 *)adata = cpu_to_be16(a); len = 2; } else { *(__be16 *)adata = cpu_to_be16(0xfffe); *(__be32 *)&adata[2] = cpu_to_be32(a); len = 6; } return len; } static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain, unsigned int cryptlen) { struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct ahash_request *ahreq = &pctx->ahreq; unsigned int assoclen = req->assoclen; struct scatterlist sg[3]; u8 *odata = pctx->odata; u8 *idata = pctx->idata; int ilen, err; /* format control data for input */ err = format_input(odata, req, cryptlen); if (err) goto out; sg_init_table(sg, 3); sg_set_buf(&sg[0], odata, 16); /* format associated data and compute into mac */ if (assoclen) { ilen = format_adata(idata, assoclen); sg_set_buf(&sg[1], idata, ilen); sg_chain(sg, 3, req->src); } else { ilen = 0; sg_chain(sg, 2, req->src); } ahash_request_set_tfm(ahreq, ctx->mac); ahash_request_set_callback(ahreq, pctx->flags, NULL, NULL); ahash_request_set_crypt(ahreq, sg, NULL, assoclen + ilen + 16); err = crypto_ahash_init(ahreq); if (err) goto out; err = crypto_ahash_update(ahreq); if (err) goto out; /* we need to pad the MAC input to a round multiple of the block size */ ilen = 16 - (assoclen + ilen) % 16; if (ilen < 16) { memset(idata, 0, ilen); sg_init_table(sg, 2); sg_set_buf(&sg[0], idata, ilen); if (plain) sg_chain(sg, 2, plain); plain = sg; cryptlen += ilen; } ahash_request_set_crypt(ahreq, plain, odata, cryptlen); err = crypto_ahash_finup(ahreq); out: return err; } static void crypto_ccm_encrypt_done(void *data, int err) { struct aead_request *req = data; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); u8 *odata = pctx->odata; if (!err) scatterwalk_map_and_copy(odata, req->dst, req->assoclen + req->cryptlen, crypto_aead_authsize(aead), 1); aead_request_complete(req, err); } static inline int crypto_ccm_check_iv(const u8 *iv) { /* 2 <= L <= 8, so 1 <= L' <= 7. */ if (1 > iv[0] || iv[0] > 7) return -EINVAL; return 0; } static int crypto_ccm_init_crypt(struct aead_request *req, u8 *tag) { struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct scatterlist *sg; u8 *iv = req->iv; int err; err = crypto_ccm_check_iv(iv); if (err) return err; pctx->flags = aead_request_flags(req); /* Note: rfc 3610 and NIST 800-38C require counter of * zero to encrypt auth tag. */ memset(iv + 15 - iv[0], 0, iv[0] + 1); sg_init_table(pctx->src, 3); sg_set_buf(pctx->src, tag, 16); sg = scatterwalk_ffwd(pctx->src + 1, req->src, req->assoclen); if (sg != pctx->src + 1) sg_chain(pctx->src, 2, sg); if (req->src != req->dst) { sg_init_table(pctx->dst, 3); sg_set_buf(pctx->dst, tag, 16); sg = scatterwalk_ffwd(pctx->dst + 1, req->dst, req->assoclen); if (sg != pctx->dst + 1) sg_chain(pctx->dst, 2, sg); } return 0; } static int crypto_ccm_encrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct skcipher_request *skreq = &pctx->skreq; struct scatterlist *dst; unsigned int cryptlen = req->cryptlen; u8 *odata = pctx->odata; u8 *iv = req->iv; int err; err = crypto_ccm_init_crypt(req, odata); if (err) return err; err = crypto_ccm_auth(req, sg_next(pctx->src), cryptlen); if (err) return err; dst = pctx->src; if (req->src != req->dst) dst = pctx->dst; skcipher_request_set_tfm(skreq, ctx->ctr); skcipher_request_set_callback(skreq, pctx->flags, crypto_ccm_encrypt_done, req); skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + 16, iv); err = crypto_skcipher_encrypt(skreq); if (err) return err; /* copy authtag to end of dst */ scatterwalk_map_and_copy(odata, sg_next(dst), cryptlen, crypto_aead_authsize(aead), 1); return err; } static void crypto_ccm_decrypt_done(void *data, int err) { struct aead_request *req = data; struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct crypto_aead *aead = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(aead); unsigned int cryptlen = req->cryptlen - authsize; struct scatterlist *dst; pctx->flags = 0; dst = sg_next(req->src == req->dst ? pctx->src : pctx->dst); if (!err) { err = crypto_ccm_auth(req, dst, cryptlen); if (!err && crypto_memneq(pctx->auth_tag, pctx->odata, authsize)) err = -EBADMSG; } aead_request_complete(req, err); } static int crypto_ccm_decrypt(struct aead_request *req) { struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(aead); struct crypto_ccm_req_priv_ctx *pctx = crypto_ccm_reqctx(req); struct skcipher_request *skreq = &pctx->skreq; struct scatterlist *dst; unsigned int authsize = crypto_aead_authsize(aead); unsigned int cryptlen = req->cryptlen; u8 *authtag = pctx->auth_tag; u8 *odata = pctx->odata; u8 *iv = pctx->idata; int err; cryptlen -= authsize; err = crypto_ccm_init_crypt(req, authtag); if (err) return err; scatterwalk_map_and_copy(authtag, sg_next(pctx->src), cryptlen, authsize, 0); dst = pctx->src; if (req->src != req->dst) dst = pctx->dst; memcpy(iv, req->iv, 16); skcipher_request_set_tfm(skreq, ctx->ctr); skcipher_request_set_callback(skreq, pctx->flags, crypto_ccm_decrypt_done, req); skcipher_request_set_crypt(skreq, pctx->src, dst, cryptlen + 16, iv); err = crypto_skcipher_decrypt(skreq); if (err) return err; err = crypto_ccm_auth(req, sg_next(dst), cryptlen); if (err) return err; /* verify */ if (crypto_memneq(authtag, odata, authsize)) return -EBADMSG; return err; } static int crypto_ccm_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct ccm_instance_ctx *ictx = aead_instance_ctx(inst); struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_ahash *mac; struct crypto_skcipher *ctr; unsigned long align; int err; mac = crypto_spawn_ahash(&ictx->mac); if (IS_ERR(mac)) return PTR_ERR(mac); ctr = crypto_spawn_skcipher(&ictx->ctr); err = PTR_ERR(ctr); if (IS_ERR(ctr)) goto err_free_mac; ctx->mac = mac; ctx->ctr = ctr; align = crypto_aead_alignmask(tfm); align &= ~(crypto_tfm_ctx_alignment() - 1); crypto_aead_set_reqsize( tfm, align + sizeof(struct crypto_ccm_req_priv_ctx) + max(crypto_ahash_reqsize(mac), crypto_skcipher_reqsize(ctr))); return 0; err_free_mac: crypto_free_ahash(mac); return err; } static void crypto_ccm_exit_tfm(struct crypto_aead *tfm) { struct crypto_ccm_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_ahash(ctx->mac); crypto_free_skcipher(ctx->ctr); } static void crypto_ccm_free(struct aead_instance *inst) { struct ccm_instance_ctx *ctx = aead_instance_ctx(inst); crypto_drop_ahash(&ctx->mac); crypto_drop_skcipher(&ctx->ctr); kfree(inst); } static int crypto_ccm_create_common(struct crypto_template *tmpl, struct rtattr **tb, const char *ctr_name, const char *mac_name) { struct skcipher_alg_common *ctr; u32 mask; struct aead_instance *inst; struct ccm_instance_ctx *ictx; struct hash_alg_common *mac; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*ictx), GFP_KERNEL); if (!inst) return -ENOMEM; ictx = aead_instance_ctx(inst); err = crypto_grab_ahash(&ictx->mac, aead_crypto_instance(inst), mac_name, 0, mask | CRYPTO_ALG_ASYNC); if (err) goto err_free_inst; mac = crypto_spawn_ahash_alg(&ictx->mac); err = -EINVAL; if (strncmp(mac->base.cra_name, "cbcmac(", 7) != 0 || mac->digestsize != 16) goto err_free_inst; err = crypto_grab_skcipher(&ictx->ctr, aead_crypto_instance(inst), ctr_name, 0, mask); if (err) goto err_free_inst; ctr = crypto_spawn_skcipher_alg_common(&ictx->ctr); /* The skcipher algorithm must be CTR mode, using 16-byte blocks. */ err = -EINVAL; if (strncmp(ctr->base.cra_name, "ctr(", 4) != 0 || ctr->ivsize != 16 || ctr->base.cra_blocksize != 1) goto err_free_inst; /* ctr and cbcmac must use the same underlying block cipher. */ if (strcmp(ctr->base.cra_name + 4, mac->base.cra_name + 7) != 0) goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "ccm(%s", ctr->base.cra_name + 4) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; if (snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "ccm_base(%s,%s)", ctr->base.cra_driver_name, mac->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = (mac->base.cra_priority + ctr->base.cra_priority) / 2; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = ctr->base.cra_alignmask; inst->alg.ivsize = 16; inst->alg.chunksize = ctr->chunksize; inst->alg.maxauthsize = 16; inst->alg.base.cra_ctxsize = sizeof(struct crypto_ccm_ctx); inst->alg.init = crypto_ccm_init_tfm; inst->alg.exit = crypto_ccm_exit_tfm; inst->alg.setkey = crypto_ccm_setkey; inst->alg.setauthsize = crypto_ccm_setauthsize; inst->alg.encrypt = crypto_ccm_encrypt; inst->alg.decrypt = crypto_ccm_decrypt; inst->free = crypto_ccm_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_ccm_free(inst); } return err; } static int crypto_ccm_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *cipher_name; char ctr_name[CRYPTO_MAX_ALG_NAME]; char mac_name[CRYPTO_MAX_ALG_NAME]; cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(cipher_name)) return PTR_ERR(cipher_name); if (snprintf(ctr_name, CRYPTO_MAX_ALG_NAME, "ctr(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; if (snprintf(mac_name, CRYPTO_MAX_ALG_NAME, "cbcmac(%s)", cipher_name) >= CRYPTO_MAX_ALG_NAME) return -ENAMETOOLONG; return crypto_ccm_create_common(tmpl, tb, ctr_name, mac_name); } static int crypto_ccm_base_create(struct crypto_template *tmpl, struct rtattr **tb) { const char *ctr_name; const char *mac_name; ctr_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(ctr_name)) return PTR_ERR(ctr_name); mac_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(mac_name)) return PTR_ERR(mac_name); return crypto_ccm_create_common(tmpl, tb, ctr_name, mac_name); } static int crypto_rfc4309_setkey(struct crypto_aead *parent, const u8 *key, unsigned int keylen) { struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(parent); struct crypto_aead *child = ctx->child; if (keylen < 3) return -EINVAL; keylen -= 3; memcpy(ctx->nonce, key + keylen, 3); crypto_aead_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_aead_set_flags(child, crypto_aead_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_aead_setkey(child, key, keylen); } static int crypto_rfc4309_setauthsize(struct crypto_aead *parent, unsigned int authsize) { struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(parent); switch (authsize) { case 8: case 12: case 16: break; default: return -EINVAL; } return crypto_aead_setauthsize(ctx->child, authsize); } static struct aead_request *crypto_rfc4309_crypt(struct aead_request *req) { struct crypto_rfc4309_req_ctx *rctx = aead_request_ctx(req); struct aead_request *subreq = &rctx->subreq; struct crypto_aead *aead = crypto_aead_reqtfm(req); struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(aead); struct crypto_aead *child = ctx->child; struct scatterlist *sg; u8 *iv = PTR_ALIGN((u8 *)(subreq + 1) + crypto_aead_reqsize(child), crypto_aead_alignmask(child) + 1); /* L' */ iv[0] = 3; memcpy(iv + 1, ctx->nonce, 3); memcpy(iv + 4, req->iv, 8); scatterwalk_map_and_copy(iv + 16, req->src, 0, req->assoclen - 8, 0); sg_init_table(rctx->src, 3); sg_set_buf(rctx->src, iv + 16, req->assoclen - 8); sg = scatterwalk_ffwd(rctx->src + 1, req->src, req->assoclen); if (sg != rctx->src + 1) sg_chain(rctx->src, 2, sg); if (req->src != req->dst) { sg_init_table(rctx->dst, 3); sg_set_buf(rctx->dst, iv + 16, req->assoclen - 8); sg = scatterwalk_ffwd(rctx->dst + 1, req->dst, req->assoclen); if (sg != rctx->dst + 1) sg_chain(rctx->dst, 2, sg); } aead_request_set_tfm(subreq, child); aead_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); aead_request_set_crypt(subreq, rctx->src, req->src == req->dst ? rctx->src : rctx->dst, req->cryptlen, iv); aead_request_set_ad(subreq, req->assoclen - 8); return subreq; } static int crypto_rfc4309_encrypt(struct aead_request *req) { if (req->assoclen != 16 && req->assoclen != 20) return -EINVAL; req = crypto_rfc4309_crypt(req); return crypto_aead_encrypt(req); } static int crypto_rfc4309_decrypt(struct aead_request *req) { if (req->assoclen != 16 && req->assoclen != 20) return -EINVAL; req = crypto_rfc4309_crypt(req); return crypto_aead_decrypt(req); } static int crypto_rfc4309_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct crypto_aead_spawn *spawn = aead_instance_ctx(inst); struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(tfm); struct crypto_aead *aead; unsigned long align; aead = crypto_spawn_aead(spawn); if (IS_ERR(aead)) return PTR_ERR(aead); ctx->child = aead; align = crypto_aead_alignmask(aead); align &= ~(crypto_tfm_ctx_alignment() - 1); crypto_aead_set_reqsize( tfm, sizeof(struct crypto_rfc4309_req_ctx) + ALIGN(crypto_aead_reqsize(aead), crypto_tfm_ctx_alignment()) + align + 32); return 0; } static void crypto_rfc4309_exit_tfm(struct crypto_aead *tfm) { struct crypto_rfc4309_ctx *ctx = crypto_aead_ctx(tfm); crypto_free_aead(ctx->child); } static void crypto_rfc4309_free(struct aead_instance *inst) { crypto_drop_aead(aead_instance_ctx(inst)); kfree(inst); } static int crypto_rfc4309_create(struct crypto_template *tmpl, struct rtattr **tb) { u32 mask; struct aead_instance *inst; struct crypto_aead_spawn *spawn; struct aead_alg *alg; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_AEAD, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = aead_instance_ctx(inst); err = crypto_grab_aead(spawn, aead_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_aead_alg(spawn); err = -EINVAL; /* We only support 16-byte blocks. */ if (crypto_aead_alg_ivsize(alg) != 16) goto err_free_inst; /* Not a stream cipher? */ if (alg->base.cra_blocksize != 1) goto err_free_inst; err = -ENAMETOOLONG; if (snprintf(inst->alg.base.cra_name, CRYPTO_MAX_ALG_NAME, "rfc4309(%s)", alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME || snprintf(inst->alg.base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "rfc4309(%s)", alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = 1; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; inst->alg.ivsize = 8; inst->alg.chunksize = crypto_aead_alg_chunksize(alg); inst->alg.maxauthsize = 16; inst->alg.base.cra_ctxsize = sizeof(struct crypto_rfc4309_ctx); inst->alg.init = crypto_rfc4309_init_tfm; inst->alg.exit = crypto_rfc4309_exit_tfm; inst->alg.setkey = crypto_rfc4309_setkey; inst->alg.setauthsize = crypto_rfc4309_setauthsize; inst->alg.encrypt = crypto_rfc4309_encrypt; inst->alg.decrypt = crypto_rfc4309_decrypt; inst->free = crypto_rfc4309_free; err = aead_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_rfc4309_free(inst); } return err; } static int crypto_cbcmac_digest_setkey(struct crypto_shash *parent, const u8 *inkey, unsigned int keylen) { struct cbcmac_tfm_ctx *ctx = crypto_shash_ctx(parent); return crypto_cipher_setkey(ctx->child, inkey, keylen); } static int crypto_cbcmac_digest_init(struct shash_desc *pdesc) { int bs = crypto_shash_digestsize(pdesc->tfm); u8 *dg = shash_desc_ctx(pdesc); memset(dg, 0, bs); return 0; } static int crypto_cbcmac_digest_update(struct shash_desc *pdesc, const u8 *p, unsigned int len) { struct crypto_shash *parent = pdesc->tfm; struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_digestsize(parent); u8 *dg = shash_desc_ctx(pdesc); do { crypto_xor(dg, p, bs); crypto_cipher_encrypt_one(tfm, dg, dg); p += bs; len -= bs; } while (len >= bs); return len; } static int crypto_cbcmac_digest_finup(struct shash_desc *pdesc, const u8 *src, unsigned int len, u8 *out) { struct crypto_shash *parent = pdesc->tfm; struct cbcmac_tfm_ctx *tctx = crypto_shash_ctx(parent); struct crypto_cipher *tfm = tctx->child; int bs = crypto_shash_digestsize(parent); u8 *dg = shash_desc_ctx(pdesc); if (len) { crypto_xor(dg, src, len); crypto_cipher_encrypt_one(tfm, out, dg); return 0; } memcpy(out, dg, bs); return 0; } static int cbcmac_init_tfm(struct crypto_tfm *tfm) { struct crypto_cipher *cipher; struct crypto_instance *inst = (void *)tfm->__crt_alg; struct crypto_cipher_spawn *spawn = crypto_instance_ctx(inst); struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); cipher = crypto_spawn_cipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; return 0; }; static void cbcmac_exit_tfm(struct crypto_tfm *tfm) { struct cbcmac_tfm_ctx *ctx = crypto_tfm_ctx(tfm); crypto_free_cipher(ctx->child); } static int cbcmac_create(struct crypto_template *tmpl, struct rtattr **tb) { struct shash_instance *inst; struct crypto_cipher_spawn *spawn; struct crypto_alg *alg; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SHASH, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = shash_instance_ctx(inst); err = crypto_grab_cipher(spawn, shash_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_cipher_alg(spawn); err = crypto_inst_setname(shash_crypto_instance(inst), tmpl->name, alg); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->cra_priority; inst->alg.base.cra_blocksize = alg->cra_blocksize; inst->alg.digestsize = alg->cra_blocksize; inst->alg.descsize = alg->cra_blocksize; inst->alg.base.cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY; inst->alg.base.cra_ctxsize = sizeof(struct cbcmac_tfm_ctx); inst->alg.base.cra_init = cbcmac_init_tfm; inst->alg.base.cra_exit = cbcmac_exit_tfm; inst->alg.init = crypto_cbcmac_digest_init; inst->alg.update = crypto_cbcmac_digest_update; inst->alg.finup = crypto_cbcmac_digest_finup; inst->alg.setkey = crypto_cbcmac_digest_setkey; inst->free = shash_free_singlespawn_instance; err = shash_register_instance(tmpl, inst); if (err) { err_free_inst: shash_free_singlespawn_instance(inst); } return err; } static struct crypto_template crypto_ccm_tmpls[] = { { .name = "cbcmac", .create = cbcmac_create, .module = THIS_MODULE, }, { .name = "ccm_base", .create = crypto_ccm_base_create, .module = THIS_MODULE, }, { .name = "ccm", .create = crypto_ccm_create, .module = THIS_MODULE, }, { .name = "rfc4309", .create = crypto_rfc4309_create, .module = THIS_MODULE, }, }; static int __init crypto_ccm_module_init(void) { return crypto_register_templates(crypto_ccm_tmpls, ARRAY_SIZE(crypto_ccm_tmpls)); } static void __exit crypto_ccm_module_exit(void) { crypto_unregister_templates(crypto_ccm_tmpls, ARRAY_SIZE(crypto_ccm_tmpls)); } module_init(crypto_ccm_module_init); module_exit(crypto_ccm_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Counter with CBC MAC"); MODULE_ALIAS_CRYPTO("ccm_base"); MODULE_ALIAS_CRYPTO("rfc4309"); MODULE_ALIAS_CRYPTO("ccm"); MODULE_ALIAS_CRYPTO("cbcmac"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 // SPDX-License-Identifier: GPL-2.0-only /* DVB USB framework compliant Linux driver for the AVerMedia AverTV DVB-T * USB2.0 (A800) DVB-T receiver. * * Copyright (C) 2005 Patrick Boettcher (patrick.boettcher@posteo.de) * * Thanks to * - AVerMedia who kindly provided information and * - Glen Harris who suffered from my mistakes during development. * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "dibusb.h" static int debug; module_param(debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (rc=1 (or-able))." DVB_USB_DEBUG_STATUS); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); #define deb_rc(args...) dprintk(debug,0x01,args) static int a800_power_ctrl(struct dvb_usb_device *d, int onoff) { /* do nothing for the AVerMedia */ return 0; } /* assure to put cold to 0 for iManufacturer == 1 */ static int a800_identify_state(struct usb_device *udev, const struct dvb_usb_device_properties *props, const struct dvb_usb_device_description **desc, int *cold) { *cold = udev->descriptor.iManufacturer != 1; return 0; } static int a800_rc_query(struct dvb_usb_device *d) { int ret = 0; u8 *key = kmalloc(5, GFP_KERNEL); if (!key) return -ENOMEM; if (usb_control_msg(d->udev,usb_rcvctrlpipe(d->udev,0), 0x04, USB_TYPE_VENDOR | USB_DIR_IN, 0, 0, key, 5, 2000) != 5) { ret = -ENODEV; goto out; } /* Note that extended nec and nec32 are dropped */ if (key[0] == 1) rc_keydown(d->rc_dev, RC_PROTO_NEC, RC_SCANCODE_NEC(key[1], key[3]), 0); else if (key[0] == 2) rc_repeat(d->rc_dev); out: kfree(key); return ret; } /* USB Driver stuff */ static struct dvb_usb_device_properties a800_properties; static int a800_probe(struct usb_interface *intf, const struct usb_device_id *id) { return dvb_usb_device_init(intf, &a800_properties, THIS_MODULE, NULL, adapter_nr); } /* do not change the order of the ID table */ enum { AVERMEDIA_DVBT_USB2_COLD, AVERMEDIA_DVBT_USB2_WARM, }; static const struct usb_device_id a800_table[] = { DVB_USB_DEV(AVERMEDIA, AVERMEDIA_DVBT_USB2_COLD), DVB_USB_DEV(AVERMEDIA, AVERMEDIA_DVBT_USB2_WARM), { } }; MODULE_DEVICE_TABLE (usb, a800_table); static struct dvb_usb_device_properties a800_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = CYPRESS_FX2, .firmware = "dvb-usb-avertv-a800-02.fw", .num_adapters = 1, .adapter = { { .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 32, .streaming_ctrl = dibusb2_0_streaming_ctrl, .pid_filter = dibusb_pid_filter, .pid_filter_ctrl = dibusb_pid_filter_ctrl, .frontend_attach = dibusb_dib3000mc_frontend_attach, .tuner_attach = dibusb_dib3000mc_tuner_attach, /* parameter for the MPEG2-data transfer */ .stream = { .type = USB_BULK, .count = 7, .endpoint = 0x06, .u = { .bulk = { .buffersize = 4096, } } }, }}, .size_of_priv = sizeof(struct dibusb_state), }, }, .power_ctrl = a800_power_ctrl, .identify_state = a800_identify_state, .rc.core = { .rc_interval = DEFAULT_RC_INTERVAL, .rc_codes = RC_MAP_AVERMEDIA_M135A, .module_name = KBUILD_MODNAME, .rc_query = a800_rc_query, .allowed_protos = RC_PROTO_BIT_NEC, }, .i2c_algo = &dibusb_i2c_algo, .generic_bulk_ctrl_endpoint = 0x01, .num_device_descs = 1, .devices = { { "AVerMedia AverTV DVB-T USB 2.0 (A800)", { &a800_table[AVERMEDIA_DVBT_USB2_COLD], NULL }, { &a800_table[AVERMEDIA_DVBT_USB2_WARM], NULL }, }, } }; static struct usb_driver a800_driver = { .name = "dvb_usb_a800", .probe = a800_probe, .disconnect = dvb_usb_device_exit, .id_table = a800_table, }; module_usb_driver(a800_driver); MODULE_AUTHOR("Patrick Boettcher <patrick.boettcher@posteo.de>"); MODULE_DESCRIPTION("AVerMedia AverTV DVB-T USB 2.0 (A800)"); MODULE_VERSION("1.0"); MODULE_LICENSE("GPL");
2 10 1 2 2 2 2 2 2 5 1 1 1 2 3 1 2 2 175 2 173 236 236 238 171 4 152 169 1 35 237 236 150 10 1 9 7 10 136 1 1 68 52 2 3 3 7 7 162 1 6 4 1 1 130 115 10 33 22 18 18 11 24 9 18 16 2 29 30 40 54 10 3 4 34 17 6 9 12 6 3 50 74 23 23 23 18 8 21 1 23 6 20 1 14 1 10 19 7 14 7 22 1 21 5 22 22 23 23 23 12 17 8 21 2 21 2 20 1 22 3 1 2 42 15 1 31 33 21 34 15 35 12 37 10 37 8 36 10 44 3 43 3 44 23 2 11 11 11 11 13 3 11 2 8 8 8 8 30 10 38 1 1 38 33 4 1 4 1 24 15 14 2 12 5 6 7 3 2 8 10 8 2 7 3 1 3 3 4 1 2 1 2 14 25 15 1 15 15 59 60 32 15 14 1 3 14 15 8 1 2 5 4 8 1 2 5 4 3 3 3 3 3 3 5 1 1 1 2 3 10 9 6 7 7 2 4 1 4 4 2 7 7 10 9 1 3 2 4 3 1 1 1 1 1 3 3 13 1 12 11 1 4 5 3 15 1 6 2 13 50 51 51 50 50 50 50 8 43 50 16 9 15 16 15 9 9 44 7 1 40 13 2 1 1 1 9 8 1 9 5 4 8 1 8 7 1 9 1 4 8 2 1 1 4 3 1 4 3 1 3 3 1 1 7 2 5 4 1 2 1 1 1 9 2 1 1 4 1 4 3 1 4 1 3 1 3 4 1 1 2 4 1 1 3 2 4 4 4 4 5 4 3 3 172 157 156 16 1 1 1 13 3 13 13 13 13 3 3 3 3 3 3 13 13 13 1 13 3 3 3 3 3 3 3 48 46 27 16 16 6 56 56 50 48 1 5 33 32 29 4 33 25 12 13 3 25 25 25 25 12 13 3 25 32 10 8 7 7 1 1 1 1 13 3 1 2 9 9 3 8 8 10 7 3 10 7 10 10 9 9 2 9 2 4 175 1 1 174 19 172 171 9 1 1 3 5 8 1 7 7 7 2 1 2 2 2 2 2 2 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/key/af_key.c An implementation of PF_KEYv2 sockets. * * Authors: Maxim Giryaev <gem@asplinux.ru> * David S. Miller <davem@redhat.com> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * Kazunori MIYAZAWA / USAGI Project <miyazawa@linux-ipv6.org> * Derek Atkins <derek@ihtfp.com> */ #include <linux/capability.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/socket.h> #include <linux/pfkeyv2.h> #include <linux/ipsec.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/slab.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/xfrm.h> #include <net/sock.h> #define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x)) #define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x)) static unsigned int pfkey_net_id __read_mostly; struct netns_pfkey { /* List of all pfkey sockets. */ struct hlist_head table; atomic_t socks_nr; }; static DEFINE_MUTEX(pfkey_mutex); #define DUMMY_MARK 0 static const struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; int registered; int promisc; struct { uint8_t msg_version; uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; struct sk_buff *skb; } dump; struct mutex dump_lock; }; static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family); static inline struct pfkey_sock *pfkey_sk(struct sock *sk) { return (struct pfkey_sock *)sk; } static int pfkey_can_dump(const struct sock *sk) { if (3 * atomic_read(&sk->sk_rmem_alloc) <= 2 * sk->sk_rcvbuf) return 1; return 0; } static void pfkey_terminate_dump(struct pfkey_sock *pfk) { if (pfk->dump.dump) { if (pfk->dump.skb) { kfree_skb(pfk->dump.skb); pfk->dump.skb = NULL; } pfk->dump.done(pfk); pfk->dump.dump = NULL; pfk->dump.done = NULL; } } static void pfkey_sock_destruct(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { pr_err("Attempt to release alive pfkey socket: %p\n", sk); return; } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(refcount_read(&sk->sk_wmem_alloc)); atomic_dec(&net_pfkey->socks_nr); } static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) { struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); mutex_lock(&pfkey_mutex); sk_add_node_rcu(sk, &net_pfkey->table); mutex_unlock(&pfkey_mutex); } static void pfkey_remove(struct sock *sk) { mutex_lock(&pfkey_mutex); sk_del_node_init_rcu(sk); mutex_unlock(&pfkey_mutex); } static struct proto key_proto = { .name = "KEY", .owner = THIS_MODULE, .obj_size = sizeof(struct pfkey_sock), }; static int pfkey_create(struct net *net, struct socket *sock, int protocol, int kern) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; struct pfkey_sock *pfk; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; if (protocol != PF_KEY_V2) return -EPROTONOSUPPORT; sk = sk_alloc(net, PF_KEY, GFP_KERNEL, &key_proto, kern); if (sk == NULL) return -ENOMEM; pfk = pfkey_sk(sk); mutex_init(&pfk->dump_lock); sock->ops = &pfkey_ops; sock_init_data(sock, sk); sk->sk_family = PF_KEY; sk->sk_destruct = pfkey_sock_destruct; atomic_inc(&net_pfkey->socks_nr); pfkey_insert(sk); return 0; } static int pfkey_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return 0; pfkey_remove(sk); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_write_queue); synchronize_rcu(); sock_put(sk); return 0; } static int pfkey_broadcast_one(struct sk_buff *skb, gfp_t allocation, struct sock *sk) { int err = -ENOBUFS; if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) return err; skb = skb_clone(skb, allocation); if (skb) { skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); err = 0; } return err; } /* Send SKB to all pfkey sockets matching selected criteria. */ #define BROADCAST_ALL 0 #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); struct sock *sk; int err = -ESRCH; /* XXX Do we need something like netlink_overrun? I think * XXX PF_KEY socket apps will not mind current behavior. */ if (!skb) return -ENOMEM; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; /* Yes, it means that if you are meant to receive this * pfkey message you receive it twice as promiscuous * socket. */ if (pfk->promisc) pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* the exact target will be processed later */ if (sk == one_sk) continue; if (broadcast_flags != BROADCAST_ALL) { if (broadcast_flags & BROADCAST_PROMISC_ONLY) continue; if ((broadcast_flags & BROADCAST_REGISTERED) && !pfk->registered) continue; if (broadcast_flags & BROADCAST_ONE) continue; } err2 = pfkey_broadcast_one(skb, GFP_ATOMIC, sk); /* Error is cleared after successful sending to at least one * registered KM */ if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; } rcu_read_unlock(); if (one_sk != NULL) err = pfkey_broadcast_one(skb, allocation, one_sk); kfree_skb(skb); return err; } static int pfkey_do_dump(struct pfkey_sock *pfk) { struct sadb_msg *hdr; int rc; mutex_lock(&pfk->dump_lock); if (!pfk->dump.dump) { rc = 0; goto out; } rc = pfk->dump.dump(pfk); if (rc == -ENOBUFS) { rc = 0; goto out; } if (pfk->dump.skb) { if (!pfkey_can_dump(&pfk->sk)) { rc = 0; goto out; } hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } pfkey_terminate_dump(pfk); out: mutex_unlock(&pfk->dump_lock); return rc; } static inline void pfkey_hdr_dup(struct sadb_msg *new, const struct sadb_msg *orig) { *new = *orig; } static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) { struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL); struct sadb_msg *hdr; if (!skb) return -ENOBUFS; /* Woe be to the platform trying to support PFKEY yet * having normal errnos outside the 1-255 range, inclusive. */ err = -err; if (err == ERESTARTSYS || err == ERESTARTNOHAND || err == ERESTARTNOINTR) err = EINTR; if (err >= 512) err = EINVAL; BUG_ON(err <= 0 || err >= 256); hdr = skb_put(skb, sizeof(struct sadb_msg)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = (uint8_t) err; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static const u8 sadb_ext_min_len[] = { [SADB_EXT_RESERVED] = (u8) 0, [SADB_EXT_SA] = (u8) sizeof(struct sadb_sa), [SADB_EXT_LIFETIME_CURRENT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_HARD] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_LIFETIME_SOFT] = (u8) sizeof(struct sadb_lifetime), [SADB_EXT_ADDRESS_SRC] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_DST] = (u8) sizeof(struct sadb_address), [SADB_EXT_ADDRESS_PROXY] = (u8) sizeof(struct sadb_address), [SADB_EXT_KEY_AUTH] = (u8) sizeof(struct sadb_key), [SADB_EXT_KEY_ENCRYPT] = (u8) sizeof(struct sadb_key), [SADB_EXT_IDENTITY_SRC] = (u8) sizeof(struct sadb_ident), [SADB_EXT_IDENTITY_DST] = (u8) sizeof(struct sadb_ident), [SADB_EXT_SENSITIVITY] = (u8) sizeof(struct sadb_sens), [SADB_EXT_PROPOSAL] = (u8) sizeof(struct sadb_prop), [SADB_EXT_SUPPORTED_AUTH] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SUPPORTED_ENCRYPT] = (u8) sizeof(struct sadb_supported), [SADB_EXT_SPIRANGE] = (u8) sizeof(struct sadb_spirange), [SADB_X_EXT_KMPRIVATE] = (u8) sizeof(struct sadb_x_kmprivate), [SADB_X_EXT_POLICY] = (u8) sizeof(struct sadb_x_policy), [SADB_X_EXT_SA2] = (u8) sizeof(struct sadb_x_sa2), [SADB_X_EXT_NAT_T_TYPE] = (u8) sizeof(struct sadb_x_nat_t_type), [SADB_X_EXT_NAT_T_SPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), [SADB_X_EXT_FILTER] = (u8) sizeof(struct sadb_x_filter), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ static int verify_address_len(const void *p) { const struct sadb_address *sp = p; const struct sockaddr *addr = (const struct sockaddr *)(sp + 1); const struct sockaddr_in *sin; #if IS_ENABLED(CONFIG_IPV6) const struct sockaddr_in6 *sin6; #endif int len; if (sp->sadb_address_len < DIV_ROUND_UP(sizeof(*sp) + offsetofend(typeof(*addr), sa_family), sizeof(uint64_t))) return -EINVAL; switch (addr->sa_family) { case AF_INET: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 32) return -EINVAL; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t)); if (sp->sadb_address_len != len || sp->sadb_address_prefixlen > 128) return -EINVAL; break; #endif default: /* It is user using kernel to keep track of security * associations for another protocol, such as * OSPF/RSVP/RIPV2/MIP. It is user's job to verify * lengths. * * XXX Actually, association/policy database is not yet * XXX able to cope with arbitrary sockaddr families. * XXX When it can, remove this -EINVAL. -DaveM */ return -EINVAL; } return 0; } static inline int sadb_key_len(const struct sadb_key *key) { int key_bytes = DIV_ROUND_UP(key->sadb_key_bits, 8); return DIV_ROUND_UP(sizeof(struct sadb_key) + key_bytes, sizeof(uint64_t)); } static int verify_key_len(const void *p) { const struct sadb_key *key = p; if (sadb_key_len(key) > key->sadb_key_len) return -EINVAL; return 0; } static inline int pfkey_sec_ctx_len(const struct sadb_x_sec_ctx *sec_ctx) { return DIV_ROUND_UP(sizeof(struct sadb_x_sec_ctx) + sec_ctx->sadb_x_ctx_len, sizeof(uint64_t)); } static inline int verify_sec_ctx_len(const void *p) { const struct sadb_x_sec_ctx *sec_ctx = p; int len = sec_ctx->sadb_x_ctx_len; if (len > PAGE_SIZE) return -EINVAL; len = pfkey_sec_ctx_len(sec_ctx); if (sec_ctx->sadb_x_sec_len != len) return -EINVAL; return 0; } static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(const struct sadb_x_sec_ctx *sec_ctx, gfp_t gfp) { struct xfrm_user_sec_ctx *uctx = NULL; int ctx_size = sec_ctx->sadb_x_ctx_len; uctx = kmalloc((sizeof(*uctx)+ctx_size), gfp); if (!uctx) return NULL; uctx->len = pfkey_sec_ctx_len(sec_ctx); uctx->exttype = sec_ctx->sadb_x_sec_exttype; uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi; uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg; uctx->ctx_len = sec_ctx->sadb_x_ctx_len; memcpy(uctx + 1, sec_ctx + 1, uctx->ctx_len); return uctx; } static int present_and_same_family(const struct sadb_address *src, const struct sadb_address *dst) { const struct sockaddr *s_addr, *d_addr; if (!src || !dst) return 0; s_addr = (const struct sockaddr *)(src + 1); d_addr = (const struct sockaddr *)(dst + 1); if (s_addr->sa_family != d_addr->sa_family) return 0; if (s_addr->sa_family != AF_INET #if IS_ENABLED(CONFIG_IPV6) && s_addr->sa_family != AF_INET6 #endif ) return 0; return 1; } static int parse_exthdrs(struct sk_buff *skb, const struct sadb_msg *hdr, void **ext_hdrs) { const char *p = (char *) hdr; int len = skb->len; len -= sizeof(*hdr); p += sizeof(*hdr); while (len > 0) { const struct sadb_ext *ehdr = (const struct sadb_ext *) p; uint16_t ext_type; int ext_len; if (len < sizeof(*ehdr)) return -EINVAL; ext_len = ehdr->sadb_ext_len; ext_len *= sizeof(uint64_t); ext_type = ehdr->sadb_ext_type; if (ext_len < sizeof(uint64_t) || ext_len > len || ext_type == SADB_EXT_RESERVED) return -EINVAL; if (ext_type <= SADB_EXT_MAX) { int min = (int) sadb_ext_min_len[ext_type]; if (ext_len < min) return -EINVAL; if (ext_hdrs[ext_type-1] != NULL) return -EINVAL; switch (ext_type) { case SADB_EXT_ADDRESS_SRC: case SADB_EXT_ADDRESS_DST: case SADB_EXT_ADDRESS_PROXY: case SADB_X_EXT_NAT_T_OA: if (verify_address_len(p)) return -EINVAL; break; case SADB_X_EXT_SEC_CTX: if (verify_sec_ctx_len(p)) return -EINVAL; break; case SADB_EXT_KEY_AUTH: case SADB_EXT_KEY_ENCRYPT: if (verify_key_len(p)) return -EINVAL; break; default: break; } ext_hdrs[ext_type-1] = (void *) p; } p += ext_len; len -= ext_len; } return 0; } static uint16_t pfkey_satype2proto(uint8_t satype) { switch (satype) { case SADB_SATYPE_UNSPEC: return IPSEC_PROTO_ANY; case SADB_SATYPE_AH: return IPPROTO_AH; case SADB_SATYPE_ESP: return IPPROTO_ESP; case SADB_X_SATYPE_IPCOMP: return IPPROTO_COMP; default: return 0; } /* NOTREACHED */ } static uint8_t pfkey_proto2satype(uint16_t proto) { switch (proto) { case IPPROTO_AH: return SADB_SATYPE_AH; case IPPROTO_ESP: return SADB_SATYPE_ESP; case IPPROTO_COMP: return SADB_X_SATYPE_IPCOMP; default: return 0; } /* NOTREACHED */ } /* BTW, this scheme means that there is no way with PFKEY2 sockets to * say specifically 'just raw sockets' as we encode them as 255. */ static uint8_t pfkey_proto_to_xfrm(uint8_t proto) { return proto == IPSEC_PROTO_ANY ? 0 : proto; } static uint8_t pfkey_proto_from_xfrm(uint8_t proto) { return proto ? proto : IPSEC_PROTO_ANY; } static inline int pfkey_sockaddr_len(sa_family_t family) { switch (family) { case AF_INET: return sizeof(struct sockaddr_in); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: return sizeof(struct sockaddr_in6); #endif } return 0; } static int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr) { switch (sa->sa_family) { case AF_INET: xaddr->a4 = ((struct sockaddr_in *)sa)->sin_addr.s_addr; return AF_INET; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: memcpy(xaddr->a6, &((struct sockaddr_in6 *)sa)->sin6_addr, sizeof(struct in6_addr)); return AF_INET6; #endif } return 0; } static int pfkey_sadb_addr2xfrm_addr(const struct sadb_address *addr, xfrm_address_t *xaddr) { return pfkey_sockaddr_extract((struct sockaddr *)(addr + 1), xaddr); } static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { const struct sadb_sa *sa; const struct sadb_address *addr; uint16_t proto; unsigned short family; xfrm_address_t *xaddr; sa = ext_hdrs[SADB_EXT_SA - 1]; if (sa == NULL) return NULL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return NULL; /* sadb_address_len should be checked by caller */ addr = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; if (addr == NULL) return NULL; family = ((const struct sockaddr *)(addr + 1))->sa_family; switch (family) { case AF_INET: xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr; break; #endif default: xaddr = NULL; } if (!xaddr) return NULL; return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) static int pfkey_sockaddr_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family)); } static inline int pfkey_mode_from_xfrm(int mode) { switch(mode) { case XFRM_MODE_TRANSPORT: return IPSEC_MODE_TRANSPORT; case XFRM_MODE_TUNNEL: return IPSEC_MODE_TUNNEL; case XFRM_MODE_BEET: return IPSEC_MODE_BEET; default: return -1; } } static inline int pfkey_mode_to_xfrm(int mode) { switch(mode) { case IPSEC_MODE_ANY: /*XXX*/ case IPSEC_MODE_TRANSPORT: return XFRM_MODE_TRANSPORT; case IPSEC_MODE_TUNNEL: return XFRM_MODE_TUNNEL; case IPSEC_MODE_BEET: return XFRM_MODE_BEET; default: return -1; } } static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port, struct sockaddr *sa, unsigned short family) { switch (family) { case AF_INET: { struct sockaddr_in *sin = (struct sockaddr_in *)sa; sin->sin_family = AF_INET; sin->sin_port = port; sin->sin_addr.s_addr = xaddr->a4; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); return 32; } #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: { struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa; sin6->sin6_family = AF_INET6; sin6->sin6_port = port; sin6->sin6_flowinfo = 0; sin6->sin6_addr = xaddr->in6; sin6->sin6_scope_id = 0; return 128; } #endif } return 0; } static struct sk_buff *__pfkey_xfrm_state2msg(const struct xfrm_state *x, int add_keys, int hsc) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_lifetime *lifetime; struct sadb_address *addr; struct sadb_key *key; struct sadb_x_sa2 *sa2; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int size; int auth_key_size = 0; int encrypt_key_size = 0; int sockaddr_size; struct xfrm_encap_tmpl *natt = NULL; int mode; /* address family check */ sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return ERR_PTR(-EINVAL); /* base, SA, (lifetime (HSC),) address(SD), (address(P),) key(AE), (identity(SD),) (sensitivity)> */ size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + sizeof(struct sadb_lifetime) + ((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) + ((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) + sizeof(struct sadb_address)*2 + sockaddr_size*2 + sizeof(struct sadb_x_sa2); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } /* identity & sensitivity */ if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) size += sizeof(struct sadb_address) + sockaddr_size; if (add_keys) { if (x->aalg && x->aalg->alg_key_len) { auth_key_size = PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); size += sizeof(struct sadb_key) + auth_key_size; } if (x->ealg && x->ealg->alg_key_len) { encrypt_key_size = PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); size += sizeof(struct sadb_key) + encrypt_key_size; } } if (x->encap) natt = x->encap; if (natt && natt->encap_type) { size += sizeof(struct sadb_x_nat_t_type); size += sizeof(struct sadb_x_nat_t_port); size += sizeof(struct sadb_x_nat_t_port); } skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ hdr->sadb_msg_len = size / sizeof(uint64_t); /* sa */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = x->props.replay_window; switch (x->km.state) { case XFRM_STATE_VALID: sa->sadb_sa_state = x->km.dying ? SADB_SASTATE_DYING : SADB_SASTATE_MATURE; break; case XFRM_STATE_ACQ: sa->sadb_sa_state = SADB_SASTATE_LARVAL; break; default: sa->sadb_sa_state = SADB_SASTATE_DEAD; break; } sa->sadb_sa_auth = 0; if (x->aalg) { struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name, 0); sa->sadb_sa_auth = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_encrypt = 0; BUG_ON(x->ealg && x->calg); if (x->ealg) { struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } /* KAME compatible: sadb_sa_encrypt is overloaded with calg id */ if (x->calg) { struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name, 0); sa->sadb_sa_encrypt = (a && a->pfkey_supported) ? a->desc.sadb_alg_id : 0; } sa->sadb_sa_flags = 0; if (x->props.flags & XFRM_STATE_NOECN) sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN; if (x->props.flags & XFRM_STATE_DECAP_DSCP) sa->sadb_sa_flags |= SADB_SAFLAGS_DECAP_DSCP; if (x->props.flags & XFRM_STATE_NOPMTUDISC) sa->sadb_sa_flags |= SADB_SAFLAGS_NOPMTUDISC; /* hard time */ if (hsc & 2) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds; } /* soft time */ if (hsc & 1) { lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(x->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds; } /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = x->curlft.packets; lifetime->sadb_lifetime_bytes = x->curlft.bytes; lifetime->sadb_lifetime_addtime = x->curlft.add_time; lifetime->sadb_lifetime_usetime = x->curlft.use_time; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; /* "if the ports are non-zero, then the sadb_address_proto field, normally zero, MUST be filled in with the transport protocol's number." - RFC2367 */ addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); BUG_ON(!addr->sadb_address_prefixlen); if (!xfrm_addr_equal(&x->sel.saddr, &x->props.saddr, x->props.family)) { addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY; addr->sadb_address_proto = pfkey_proto_from_xfrm(x->sel.proto); addr->sadb_address_prefixlen = x->sel.prefixlen_s; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&x->sel.saddr, x->sel.sport, (struct sockaddr *) (addr + 1), x->props.family); } /* auth key */ if (add_keys && auth_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + auth_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_AUTH; key->sadb_key_bits = x->aalg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8); } /* encrypt key */ if (add_keys && encrypt_key_size) { key = skb_put(skb, sizeof(struct sadb_key) + encrypt_key_size); key->sadb_key_len = (sizeof(struct sadb_key) + encrypt_key_size) / sizeof(uint64_t); key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT; key->sadb_key_bits = x->ealg->alg_key_len; key->sadb_key_reserved = 0; memcpy(key + 1, x->ealg->alg_key, (x->ealg->alg_key_len+7)/8); } /* sa */ sa2 = skb_put(skb, sizeof(struct sadb_x_sa2)); sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t); sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2; if ((mode = pfkey_mode_from_xfrm(x->props.mode)) < 0) { kfree_skb(skb); return ERR_PTR(-EINVAL); } sa2->sadb_x_sa2_mode = mode; sa2->sadb_x_sa2_reserved1 = 0; sa2->sadb_x_sa2_reserved2 = 0; sa2->sadb_x_sa2_sequence = 0; sa2->sadb_x_sa2_reqid = x->props.reqid; if (natt && natt->encap_type) { struct sadb_x_nat_t_type *n_type; struct sadb_x_nat_t_port *n_port; /* type */ n_type = skb_put(skb, sizeof(*n_type)); n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t); n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE; n_type->sadb_x_nat_t_type_type = natt->encap_type; n_type->sadb_x_nat_t_type_reserved[0] = 0; n_type->sadb_x_nat_t_type_reserved[1] = 0; n_type->sadb_x_nat_t_type_reserved[2] = 0; /* source port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* dest port */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = natt->encap_dport; n_port->sadb_x_nat_t_port_reserved = 0; } /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg(const struct xfrm_state *x) { struct sk_buff *skb; skb = __pfkey_xfrm_state2msg(x, 1, 3); return skb; } static inline struct sk_buff *pfkey_xfrm_state2msg_expire(const struct xfrm_state *x, int hsc) { return __pfkey_xfrm_state2msg(x, 0, hsc); } static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct xfrm_state *x; const struct sadb_lifetime *lifetime; const struct sadb_sa *sa; const struct sadb_key *key; const struct sadb_x_sec_ctx *sec_ctx; uint16_t proto; int err; sa = ext_hdrs[SADB_EXT_SA - 1]; if (!sa || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_ESP && !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]) return ERR_PTR(-EINVAL); if (hdr->sadb_msg_satype == SADB_SATYPE_AH && !ext_hdrs[SADB_EXT_KEY_AUTH-1]) return ERR_PTR(-EINVAL); if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] != !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) return ERR_PTR(-EINVAL); proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return ERR_PTR(-EINVAL); /* default error is no buffer space */ err = -ENOBUFS; /* RFC2367: Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message. SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state. Therefore, the sadb_sa_state field of all submitted SAs MUST be SADB_SASTATE_MATURE and the kernel MUST return an error if this is not true. However, KAME setkey always uses SADB_SASTATE_LARVAL. Hence, we have to _ignore_ sadb_sa_state, which is also reasonable. */ if (sa->sadb_sa_auth > SADB_AALG_MAX || (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP && sa->sadb_sa_encrypt > SADB_X_CALG_MAX) || sa->sadb_sa_encrypt > SADB_EALG_MAX) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (key != NULL && sa->sadb_sa_auth != SADB_X_AALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key != NULL && sa->sadb_sa_encrypt != SADB_EALG_NULL && key->sadb_key_bits == 0) return ERR_PTR(-EINVAL); x = xfrm_state_alloc(net); if (x == NULL) return ERR_PTR(-ENOBUFS); x->id.proto = proto; x->id.spi = sa->sadb_sa_spi; x->props.replay_window = min_t(unsigned int, sa->sadb_sa_replay, (sizeof(x->replay.bitmap) * 8)); if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN) x->props.flags |= XFRM_STATE_NOECN; if (sa->sadb_sa_flags & SADB_SAFLAGS_DECAP_DSCP) x->props.flags |= XFRM_STATE_DECAP_DSCP; if (sa->sadb_sa_flags & SADB_SAFLAGS_NOPMTUDISC) x->props.flags |= XFRM_STATE_NOPMTUDISC; lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD - 1]; if (lifetime != NULL) { x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT - 1]; if (lifetime != NULL) { x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) goto out; err = security_xfrm_state_alloc(x, uctx); kfree(uctx); if (err) goto out; } err = -ENOBUFS; key = ext_hdrs[SADB_EXT_KEY_AUTH - 1]; if (sa->sadb_sa_auth) { int keysize = 0; struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } if (key) keysize = (key->sadb_key_bits + 7) / 8; x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL); if (!x->aalg) { err = -ENOMEM; goto out; } strcpy(x->aalg->alg_name, a->name); x->aalg->alg_key_len = 0; if (key) { x->aalg->alg_key_len = key->sadb_key_bits; memcpy(x->aalg->alg_key, key+1, keysize); } x->aalg->alg_trunc_len = a->uinfo.auth.icv_truncbits; x->props.aalgo = sa->sadb_sa_auth; /* x->algo.flags = sa->sadb_sa_flags; */ } if (sa->sadb_sa_encrypt) { if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) { struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL); if (!x->calg) { err = -ENOMEM; goto out; } strcpy(x->calg->alg_name, a->name); x->props.calgo = sa->sadb_sa_encrypt; } else { int keysize = 0; struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt); if (!a || !a->pfkey_supported) { err = -ENOSYS; goto out; } key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1]; if (key) keysize = (key->sadb_key_bits + 7) / 8; x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL); if (!x->ealg) { err = -ENOMEM; goto out; } strcpy(x->ealg->alg_name, a->name); x->ealg->alg_key_len = 0; if (key) { x->ealg->alg_key_len = key->sadb_key_bits; memcpy(x->ealg->alg_key, key+1, keysize); } x->props.ealgo = sa->sadb_sa_encrypt; x->geniv = a->uinfo.encr.geniv; } } /* x->algo.flags = sa->sadb_sa_flags; */ x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], &x->props.saddr); pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], &x->id.daddr); if (ext_hdrs[SADB_X_EXT_SA2-1]) { const struct sadb_x_sa2 *sa2 = ext_hdrs[SADB_X_EXT_SA2-1]; int mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) { err = -EINVAL; goto out; } x->props.mode = mode; x->props.reqid = sa2->sadb_x_sa2_reqid; } if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) { const struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]; /* Nobody uses this, but we try. */ x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr); x->sel.prefixlen_s = addr->sadb_address_prefixlen; } if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { const struct sadb_x_nat_t_type* n_type; struct xfrm_encap_tmpl *natt; x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL); if (!x->encap) { err = -ENOMEM; goto out; } natt = x->encap; n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]; natt->encap_type = n_type->sadb_x_nat_t_type_type; if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]; natt->encap_sport = n_port->sadb_x_nat_t_port_port; } if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) { const struct sadb_x_nat_t_port *n_port = ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]; natt->encap_dport = n_port->sadb_x_nat_t_port_port; } } err = xfrm_init_state(x); if (err) goto out; x->km.seq = hdr->sadb_msg_seq; return x; out: x->km.state = XFRM_STATE_DEAD; xfrm_state_put(x); return ERR_PTR(err); } static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -EOPNOTSUPP; } static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct sk_buff *resp_skb; struct sadb_x_sa2 *sa2; struct sadb_address *saddr, *daddr; struct sadb_msg *out_hdr; struct sadb_spirange *range; struct xfrm_state *x = NULL; int mode; int err; u32 min_spi, max_spi; u32 reqid; u8 proto; unsigned short family; xfrm_address_t *xsaddr = NULL, *xdaddr = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) { mode = pfkey_mode_to_xfrm(sa2->sadb_x_sa2_mode); if (mode < 0) return -EINVAL; reqid = sa2->sadb_x_sa2_reqid; } else { mode = 0; reqid = 0; } saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; family = ((struct sockaddr *)(saddr + 1))->sa_family; switch (family) { case AF_INET: xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr; xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr; break; #endif } if (hdr->sadb_msg_seq) { x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x && !xfrm_addr_equal(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; } } if (!x) x = xfrm_find_acq(net, &dummy_mark, mode, reqid, 0, UINT_MAX, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; min_spi = 0x100; max_spi = 0x0fffffff; range = ext_hdrs[SADB_EXT_SPIRANGE-1]; if (range) { min_spi = range->sadb_spirange_min; max_spi = range->sadb_spirange_max; } err = verify_spi_info(x->id.proto, min_spi, max_spi, NULL); if (err) { xfrm_state_put(x); return err; } err = xfrm_alloc_spi(x, min_spi, max_spi, NULL); resp_skb = err ? ERR_PTR(err) : pfkey_xfrm_state2msg(x); if (IS_ERR(resp_skb)) { xfrm_state_put(x); return PTR_ERR(resp_skb); } out_hdr = (struct sadb_msg *) resp_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GETSPI; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; xfrm_state_put(x); pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8) return -EOPNOTSUPP; if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq, UINT_MAX); if (x == NULL) return 0; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_ACQ) x->km.state = XFRM_STATE_ERROR; spin_unlock_bh(&x->lock); xfrm_state_put(x); return 0; } static inline int event2poltype(int event) { switch (event) { case XFRM_MSG_DELPOLICY: return SADB_X_SPDDELETE; case XFRM_MSG_NEWPOLICY: return SADB_X_SPDADD; case XFRM_MSG_UPDPOLICY: return SADB_X_SPDUPDATE; case XFRM_MSG_POLEXPIRE: // return SADB_X_SPDEXPIRE; default: pr_err("pfkey: Unknown policy event %d\n", event); break; } return 0; } static inline int event2keytype(int event) { switch (event) { case XFRM_MSG_DELSA: return SADB_DELETE; case XFRM_MSG_NEWSA: return SADB_ADD; case XFRM_MSG_UPDSA: return SADB_UPDATE; case XFRM_MSG_EXPIRE: return SADB_EXPIRE; default: pr_err("pfkey: Unknown SA event %d\n", event); break; } return 0; } /* ADD/UPD/DEL */ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = pfkey_xfrm_state2msg(x); if (IS_ERR(skb)) return PTR_ERR(skb); hdr = (struct sadb_msg *) skb->data; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = event2keytype(c->event); hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; int err; struct km_event c; x = pfkey_msg2xfrm_state(net, hdr, ext_hdrs); if (IS_ERR(x)) return PTR_ERR(x); xfrm_state_hold(x); if (hdr->sadb_msg_type == SADB_ADD) err = xfrm_state_add(x); else err = xfrm_state_update(x); xfrm_audit_state_add(x, err ? 0 : 1, true); if (err < 0) { x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); goto out; } if (hdr->sadb_msg_type == SADB_ADD) c.event = XFRM_MSG_NEWSA; else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); return err; } static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct xfrm_state *x; struct km_event c; int err; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; if ((err = security_xfrm_state_delete(x))) goto out; if (xfrm_state_kern(x)) { err = -EPERM; goto out; } err = xfrm_state_delete(x); if (err < 0) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: xfrm_audit_state_delete(x, err ? 0 : 1, true); xfrm_state_put(x); return err; } static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); __u8 proto; struct sk_buff *out_skb; struct sadb_msg *out_hdr; struct xfrm_state *x; if (!ext_hdrs[SADB_EXT_SA-1] || !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1])) return -EINVAL; x = pfkey_xfrm_state_lookup(net, hdr, ext_hdrs); if (x == NULL) return -ESRCH; out_skb = pfkey_xfrm_state2msg(x); proto = x->id.proto; xfrm_state_put(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = SADB_GET; out_hdr->sadb_msg_satype = pfkey_proto2satype(proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } static struct sk_buff *compose_sadb_supported(const struct sadb_msg *orig, gfp_t allocation) { struct sk_buff *skb; struct sadb_msg *hdr; int len, auth_len, enc_len, i; auth_len = xfrm_count_pfkey_auth_supported(); if (auth_len) { auth_len *= sizeof(struct sadb_alg); auth_len += sizeof(struct sadb_supported); } enc_len = xfrm_count_pfkey_enc_supported(); if (enc_len) { enc_len *= sizeof(struct sadb_alg); enc_len += sizeof(struct sadb_supported); } len = enc_len + auth_len + sizeof(struct sadb_msg); skb = alloc_skb(len + 16, allocation); if (!skb) goto out_put_algs; hdr = skb_put(skb, sizeof(*hdr)); pfkey_hdr_dup(hdr, orig); hdr->sadb_msg_errno = 0; hdr->sadb_msg_len = len / sizeof(uint64_t); if (auth_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, auth_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = auth_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH; for (i = 0; ; i++) { struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg->available) *ap++ = aalg->desc; } } if (enc_len) { struct sadb_supported *sp; struct sadb_alg *ap; sp = skb_put(skb, enc_len); ap = (struct sadb_alg *) (sp + 1); sp->sadb_supported_len = enc_len / sizeof(uint64_t); sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT; for (i = 0; ; i++) { struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (ealg->available) *ap++ = ealg->desc; } } out_put_algs: return skb; } static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *supp_skb; if (hdr->sadb_msg_satype > SADB_SATYPE_MAX) return -EINVAL; if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) { if (pfk->registered&(1<<hdr->sadb_msg_satype)) return -EEXIST; pfk->registered |= (1<<hdr->sadb_msg_satype); } mutex_lock(&pfkey_mutex); xfrm_probe_algs(); supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO); mutex_unlock(&pfkey_mutex); if (!supp_skb) { if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) pfk->registered &= ~(1<<hdr->sadb_msg_satype); return -ENOBUFS; } pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, sock_net(sk)); return 0; } static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put_data(skb, ihdr, sizeof(struct sadb_msg)); hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) { struct sk_buff *skb; struct sadb_msg *hdr; skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb) return -ENOBUFS; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int proto; struct km_event c; int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) return -EINVAL; err = xfrm_state_flush(net, proto, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - go quietly */ err = 0; return err ? err : err2; } c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); return 0; } static int dump_sa(struct xfrm_state *x, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_state2msg(x); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_DUMP; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sa(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_state_walk(net, &pfk->dump.u.state, dump_sa, (void *) pfk); } static void pfkey_dump_sa_done(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); xfrm_state_walk_done(&pfk->dump.u.state, net); } static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { u8 proto; struct xfrm_address_filter *filter = NULL; struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; if ((xfilter->sadb_x_filter_splen > (sizeof(xfrm_address_t) << 3)) || (xfilter->sadb_x_filter_dplen > (sizeof(xfrm_address_t) << 3))) { mutex_unlock(&pfk->dump_lock); return -EINVAL; } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); return -ENOMEM; } memcpy(&filter->saddr, &xfilter->sadb_x_filter_saddr, sizeof(xfrm_address_t)); memcpy(&filter->daddr, &xfilter->sadb_x_filter_daddr, sizeof(xfrm_address_t)); filter->family = xfilter->sadb_x_filter_family; filter->splen = xfilter->sadb_x_filter_splen; filter->dplen = xfilter->sadb_x_filter_dplen; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto, filter); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); int satype = hdr->sadb_msg_satype; bool reset_errno = false; if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) { reset_errno = true; if (satype != 0 && satype != 1) return -EINVAL; pfk->promisc = satype; } if (reset_errno && skb_cloned(skb)) skb = skb_copy(skb, GFP_KERNEL); else skb = skb_clone(skb, GFP_KERNEL); if (reset_errno && skb) { struct sadb_msg *new_hdr = (struct sadb_msg *) skb->data; new_hdr->sadb_msg_errno = 0; } pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr) { int i; u32 reqid = *(u32*)ptr; for (i=0; i<xp->xfrm_nr; i++) { if (xp->xfrm_vec[i].reqid == reqid) return -EEXIST; } return 0; } static u32 gen_reqid(struct net *net) { struct xfrm_policy_walk walk; u32 start; int rc; static u32 reqid = IPSEC_MANUAL_REQID_MAX; start = reqid; do { ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; xfrm_policy_walk_init(&walk, XFRM_POLICY_TYPE_MAIN); rc = xfrm_policy_walk(net, &walk, check_reqid, (void*)&reqid); xfrm_policy_walk_done(&walk, net); if (rc != -EEXIST) return reqid; } while (reqid != start); return 0; } static int parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_policy *pol, struct sadb_x_ipsecrequest *rq) { struct net *net = xp_net(xp); struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr; int mode; if (xp->xfrm_nr >= XFRM_MAX_DEPTH) return -ELOOP; if (rq->sadb_x_ipsecrequest_mode == 0) return -EINVAL; if (!xfrm_id_proto_valid(rq->sadb_x_ipsecrequest_proto)) return -EINVAL; t->id.proto = rq->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; t->mode = mode; if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE) { if ((mode == XFRM_MODE_TUNNEL || mode == XFRM_MODE_BEET) && pol->sadb_x_policy_dir == IPSEC_DIR_OUTBOUND) return -EINVAL; t->optional = 1; } else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) { t->reqid = rq->sadb_x_ipsecrequest_reqid; if (t->reqid > IPSEC_MANUAL_REQID_MAX) t->reqid = 0; if (!t->reqid && !(t->reqid = gen_reqid(net))) return -ENOBUFS; } /* addresses present only in tunnel mode */ if (t->mode == XFRM_MODE_TUNNEL) { int err; err = parse_sockaddr_pair( (struct sockaddr *)(rq + 1), rq->sadb_x_ipsecrequest_len - sizeof(*rq), &t->saddr, &t->id.daddr, &t->encap_family); if (err) return err; } else t->encap_family = xp->family; /* No way to set this via kame pfkey */ t->allalgs = 1; xp->xfrm_nr++; return 0; } static int parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol) { int err; int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy); struct sadb_x_ipsecrequest *rq = (void*)(pol+1); if (pol->sadb_x_policy_len * 8 < sizeof(struct sadb_x_policy)) return -EINVAL; while (len >= sizeof(*rq)) { if (len < rq->sadb_x_ipsecrequest_len || rq->sadb_x_ipsecrequest_len < sizeof(*rq)) return -EINVAL; if ((err = parse_ipsecrequest(xp, pol, rq)) < 0) return err; len -= rq->sadb_x_ipsecrequest_len; rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len); } return 0; } static inline int pfkey_xfrm_policy2sec_ctx_size(const struct xfrm_policy *xp) { struct xfrm_sec_ctx *xfrm_ctx = xp->security; if (xfrm_ctx) { int len = sizeof(struct sadb_x_sec_ctx); len += xfrm_ctx->ctx_len; return PFKEY_ALIGN8(len); } return 0; } static int pfkey_xfrm_policy2msg_size(const struct xfrm_policy *xp) { const struct xfrm_tmpl *t; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = 0; int i; for (i=0; i<xp->xfrm_nr; i++) { t = xp->xfrm_vec + i; socklen += pfkey_sockaddr_len(t->encap_family); } return sizeof(struct sadb_msg) + (sizeof(struct sadb_lifetime) * 3) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy) + (xp->xfrm_nr * sizeof(struct sadb_x_ipsecrequest)) + (socklen * 2) + pfkey_xfrm_policy2sec_ctx_size(xp); } static struct sk_buff * pfkey_xfrm_policy2msg_prep(const struct xfrm_policy *xp) { struct sk_buff *skb; int size; size = pfkey_xfrm_policy2msg_size(xp); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return ERR_PTR(-ENOBUFS); return skb; } static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *xp, int dir) { struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_lifetime *lifetime; struct sadb_x_policy *pol; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int i; int size; int sockaddr_size = pfkey_sockaddr_size(xp->family); int socklen = pfkey_sockaddr_len(xp->family); size = pfkey_xfrm_policy2msg_size(xp); /* call should fill header later */ hdr = skb_put(skb, sizeof(struct sadb_msg)); memset(hdr, 0, size); /* XXX do we need this ? */ /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_s; addr->sadb_address_reserved = 0; if (!pfkey_sockaddr_fill(&xp->selector.saddr, xp->selector.sport, (struct sockaddr *) (addr + 1), xp->family)) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto); addr->sadb_address_prefixlen = xp->selector.prefixlen_d; addr->sadb_address_reserved = 0; pfkey_sockaddr_fill(&xp->selector.daddr, xp->selector.dport, (struct sockaddr *) (addr + 1), xp->family); /* hard time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.hard_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds; /* soft time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT; lifetime->sadb_lifetime_allocations = _X2KEY(xp->lft.soft_packet_limit); lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit); lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds; lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds; /* current time */ lifetime = skb_put(skb, sizeof(struct sadb_lifetime)); lifetime->sadb_lifetime_len = sizeof(struct sadb_lifetime)/sizeof(uint64_t); lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT; lifetime->sadb_lifetime_allocations = xp->curlft.packets; lifetime->sadb_lifetime_bytes = xp->curlft.bytes; lifetime->sadb_lifetime_addtime = xp->curlft.add_time; lifetime->sadb_lifetime_usetime = xp->curlft.use_time; pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD; if (xp->action == XFRM_POLICY_ALLOW) { if (xp->xfrm_nr) pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; else pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; for (i=0; i<xp->xfrm_nr; i++) { const struct xfrm_tmpl *t = xp->xfrm_vec + i; struct sadb_x_ipsecrequest *rq; int req_size; int mode; req_size = sizeof(struct sadb_x_ipsecrequest); if (t->mode == XFRM_MODE_TUNNEL) { socklen = pfkey_sockaddr_len(t->encap_family); req_size += socklen * 2; } else { size -= 2*socklen; } rq = skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; memset(rq, 0, sizeof(*rq)); rq->sadb_x_ipsecrequest_len = req_size; rq->sadb_x_ipsecrequest_proto = t->id.proto; if ((mode = pfkey_mode_from_xfrm(t->mode)) < 0) return -EINVAL; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE; if (t->reqid) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE; if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; if (t->mode == XFRM_MODE_TUNNEL) { u8 *sa = (void *)(rq + 1); pfkey_sockaddr_fill(&t->saddr, 0, (struct sockaddr *)sa, t->encap_family); pfkey_sockaddr_fill(&t->id.daddr, 0, (struct sockaddr *) (sa + socklen), t->encap_family); } } /* security context */ if ((xfrm_ctx = xp->security)) { int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp); sec_ctx = skb_put(skb, ctx_size); sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_reserved = refcount_read(&xp->refcnt); return 0; } static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; if (c->data.byid && c->event == XFRM_MSG_DELPOLICY) out_hdr->sadb_msg_type = SADB_X_SPDDELETE2; else out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err = 0; struct sadb_lifetime *lifetime; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC) return -EINVAL; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; xp = xfrm_policy_alloc(net, GFP_KERNEL); if (xp == NULL) return -ENOBUFS; xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->priority = pol->sadb_x_policy_priority; sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr); xp->selector.family = xp->family; xp->selector.prefixlen_s = sa->sadb_address_prefixlen; xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.sport) xp->selector.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr); xp->selector.prefixlen_d = sa->sadb_address_prefixlen; /* Amusing, we set this twice. KAME apps appear to set same value * in both addresses. */ xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (xp->selector.dport) xp->selector.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) { err = -ENOBUFS; goto out; } err = security_xfrm_policy_alloc(&xp->security, uctx, GFP_KERNEL); kfree(uctx); if (err) goto out; } xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) { xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime; } if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) { xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations); xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes); xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime; xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime; } xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (err = parse_ipsecrequests(xp, pol)) < 0) goto out; err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp, hdr->sadb_msg_type != SADB_X_SPDUPDATE); xfrm_audit_policy_add(xp, err ? 0 : 1, true); if (err) goto out; if (hdr->sadb_msg_type == SADB_X_SPDUPDATE) c.event = XFRM_MSG_UPDPOLICY; else c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); return 0; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); int err; struct sadb_address *sa; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct xfrm_selector sel; struct km_event c; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *pol_ctx = NULL; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1], ext_hdrs[SADB_EXT_ADDRESS_DST-1]) || !ext_hdrs[SADB_X_EXT_POLICY-1]) return -EINVAL; pol = ext_hdrs[SADB_X_EXT_POLICY-1]; if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) return -EINVAL; memset(&sel, 0, sizeof(sel)); sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); sec_ctx = ext_hdrs[SADB_X_EXT_SEC_CTX - 1]; if (sec_ctx != NULL) { struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_KERNEL); if (!uctx) return -ENOMEM; err = security_xfrm_policy_alloc(&pol_ctx, uctx, GFP_KERNEL); kfree(uctx); if (err) return err; } xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); if (xp == NULL) return -ENOENT; xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); out: xfrm_pol_put(xp); return err; } static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struct sadb_msg *hdr, int dir) { int err; struct sk_buff *out_skb; struct sadb_msg *out_hdr; err = 0; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) { err = PTR_ERR(out_skb); goto out; } err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); goto out; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = hdr->sadb_msg_version; out_hdr->sadb_msg_type = hdr->sadb_msg_type; out_hdr->sadb_msg_satype = 0; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: return err; } static int pfkey_sockaddr_pair_size(sa_family_t family) { return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { int af, socklen; if (ext_len < 2 || ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; *family = af; return 0; } #ifdef CONFIG_NET_KEY_MIGRATE static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, struct xfrm_migrate *m) { int err; struct sadb_x_ipsecrequest *rq2; int mode; if (len < sizeof(*rq1) || len < rq1->sadb_x_ipsecrequest_len || rq1->sadb_x_ipsecrequest_len < sizeof(*rq1)) return -EINVAL; /* old endoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), rq1->sadb_x_ipsecrequest_len - sizeof(*rq1), &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; rq2 = (struct sadb_x_ipsecrequest *)((u8 *)rq1 + rq1->sadb_x_ipsecrequest_len); len -= rq1->sadb_x_ipsecrequest_len; if (len <= sizeof(*rq2) || len < rq2->sadb_x_ipsecrequest_len || rq2->sadb_x_ipsecrequest_len < sizeof(*rq2)) return -EINVAL; /* new endpoints */ err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), rq2->sadb_x_ipsecrequest_len - sizeof(*rq2), &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; if (rq1->sadb_x_ipsecrequest_proto != rq2->sadb_x_ipsecrequest_proto || rq1->sadb_x_ipsecrequest_mode != rq2->sadb_x_ipsecrequest_mode || rq1->sadb_x_ipsecrequest_reqid != rq2->sadb_x_ipsecrequest_reqid) return -EINVAL; m->proto = rq1->sadb_x_ipsecrequest_proto; if ((mode = pfkey_mode_to_xfrm(rq1->sadb_x_ipsecrequest_mode)) < 0) return -EINVAL; m->mode = mode; m->reqid = rq1->sadb_x_ipsecrequest_reqid; return ((int)(rq1->sadb_x_ipsecrequest_len + rq2->sadb_x_ipsecrequest_len)); } static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; struct xfrm_kmaddress k; struct net *net = sock_net(sk); if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } if (kma) { /* convert sadb_x_kmaddress to xfrm_kmaddress */ k.reserved = kma->sadb_x_kmaddress_reserved; ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), &k.local, &k.remote, &k.family); if (ret < 0) { err = ret; goto out; } } dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); /* set source address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_SRC - 1]; sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr); sel.prefixlen_s = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.sport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.sport) sel.sport_mask = htons(0xffff); /* set destination address info of selector */ sa = ext_hdrs[SADB_EXT_ADDRESS_DST - 1]; pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr); sel.prefixlen_d = sa->sadb_address_prefixlen; sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto); sel.dport = ((struct sockaddr_in *)(sa + 1))->sin_port; if (sel.dport) sel.dport_mask = htons(0xffff); rq = (struct sadb_x_ipsecrequest *)(pol + 1); /* extract ipsecrequests */ i = 0; len = pol->sadb_x_policy_len * 8 - sizeof(struct sadb_x_policy); while (len > 0 && i < XFRM_MAX_DEPTH) { ret = ipsecrequests_to_migrate(rq, len, &m[i]); if (ret < 0) { err = ret; goto out; } else { rq = (struct sadb_x_ipsecrequest *)((u8 *)rq + ret); len -= ret; i++; } } if (!i || len > 0) { err = -EINVAL; goto out; } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, kma ? &k : NULL, net, NULL, 0, NULL, NULL); out: return err; } #else static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { return -ENOPROTOOPT; } #endif static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); unsigned int dir; int err = 0, delete; struct sadb_x_policy *pol; struct xfrm_policy *xp; struct km_event c; if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL) return -EINVAL; dir = xfrm_policy_id2dir(pol->sadb_x_policy_id); if (dir >= XFRM_POLICY_MAX) return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; if (delete) { xfrm_audit_policy_delete(xp, err ? 0 : 1, true); if (err) goto out; c.seq = hdr->sadb_msg_seq; c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); } else { err = key_pol_get_resp(sk, xp, hdr, dir); } out: xfrm_pol_put(xp); return err; } static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct pfkey_sock *pfk = ptr; struct sk_buff *out_skb; struct sadb_msg *out_hdr; int err; if (!pfkey_can_dump(&pfk->sk)) return -ENOBUFS; out_skb = pfkey_xfrm_policy2msg_prep(xp); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); err = pfkey_xfrm_policy2msg(out_skb, xp, dir); if (err < 0) { kfree_skb(out_skb); return err; } out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = pfk->dump.msg_version; out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; return 0; } static int pfkey_dump_sp(struct pfkey_sock *pfk) { struct net *net = sock_net(&pfk->sk); return xfrm_policy_walk(net, &pfk->dump.u.policy, dump_sp, (void *) pfk); } static void pfkey_dump_sp_done(struct pfkey_sock *pfk) { struct net *net = sock_net((struct sock *)pfk); xfrm_policy_walk_done(&pfk->dump.u.policy, net); } static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct pfkey_sock *pfk = pfkey_sk(sk); mutex_lock(&pfk->dump_lock); if (pfk->dump.dump != NULL) { mutex_unlock(&pfk->dump_lock); return -EBUSY; } pfk->dump.msg_version = hdr->sadb_msg_version; pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); mutex_unlock(&pfk->dump_lock); return pfkey_do_dump(pfk); } static int key_notify_policy_flush(const struct km_event *c) { struct sk_buff *skb_out; struct sadb_msg *hdr; skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); if (!skb_out) return -ENOBUFS; hdr = skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs) { struct net *net = sock_net(sk); struct km_event c; int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ return 0; return err; } c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); return 0; } typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr, void * const *ext_hdrs); static const pfkey_handler pfkey_funcs[SADB_MAX + 1] = { [SADB_RESERVED] = pfkey_reserved, [SADB_GETSPI] = pfkey_getspi, [SADB_UPDATE] = pfkey_add, [SADB_ADD] = pfkey_add, [SADB_DELETE] = pfkey_delete, [SADB_GET] = pfkey_get, [SADB_ACQUIRE] = pfkey_acquire, [SADB_REGISTER] = pfkey_register, [SADB_EXPIRE] = NULL, [SADB_FLUSH] = pfkey_flush, [SADB_DUMP] = pfkey_dump, [SADB_X_PROMISC] = pfkey_promisc, [SADB_X_PCHANGE] = NULL, [SADB_X_SPDUPDATE] = pfkey_spdadd, [SADB_X_SPDADD] = pfkey_spdadd, [SADB_X_SPDDELETE] = pfkey_spddelete, [SADB_X_SPDGET] = pfkey_spdget, [SADB_X_SPDACQUIRE] = NULL, [SADB_X_SPDDUMP] = pfkey_spddump, [SADB_X_SPDFLUSH] = pfkey_spdflush, [SADB_X_SPDSETIDX] = pfkey_spdadd, [SADB_X_SPDDELETE2] = pfkey_spdget, [SADB_X_MIGRATE] = pfkey_migrate, }; static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb_msg *hdr) { void *ext_hdrs[SADB_EXT_MAX]; int err; /* Non-zero return value of pfkey_broadcast() does not always signal * an error and even on an actual error we may still want to process * the message so rather ignore the return value. */ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); err = parse_exthdrs(skb, hdr, ext_hdrs); if (!err) { err = -EOPNOTSUPP; if (pfkey_funcs[hdr->sadb_msg_type]) err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs); } return err; } static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp) { struct sadb_msg *hdr = NULL; if (skb->len < sizeof(*hdr)) { *errp = -EMSGSIZE; } else { hdr = (struct sadb_msg *) skb->data; if (hdr->sadb_msg_version != PF_KEY_V2 || hdr->sadb_msg_reserved != 0 || (hdr->sadb_msg_type <= SADB_RESERVED || hdr->sadb_msg_type > SADB_MAX)) { hdr = NULL; *errp = -EINVAL; } else if (hdr->sadb_msg_len != (skb->len / sizeof(uint64_t)) || hdr->sadb_msg_len < (sizeof(struct sadb_msg) / sizeof(uint64_t))) { hdr = NULL; *errp = -EMSGSIZE; } else { *errp = 0; } } return hdr; } static inline int aalg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->aalgos) * 8) return 0; return (t->aalgos >> id) & 1; } static inline int ealg_tmpl_set(const struct xfrm_tmpl *t, const struct xfrm_algo_desc *d) { unsigned int id = d->desc.sadb_alg_id; if (id >= sizeof(t->ealgos) * 8) return 0; return (t->ealgos >> id) & 1; } static int count_ah_combs(const struct xfrm_tmpl *t) { int i, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } return sz + sizeof(struct sadb_prop); } static int count_esp_combs(const struct xfrm_tmpl *t) { int i, k, sz = 0; for (i = 0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg))) continue; for (k = 1; ; k++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg)) sz += sizeof(struct sadb_comb); } } return sz + sizeof(struct sadb_prop); } static int dump_ah_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i = 0; ; i++) { const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (aalg_tmpl_set(t, aalg) && aalg->available) { struct sadb_comb *c; c = skb_put_zero(skb, sizeof(struct sadb_comb)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int dump_esp_combs(struct sk_buff *skb, const struct xfrm_tmpl *t) { struct sadb_prop *p; int sz = 0; int i, k; p = skb_put(skb, sizeof(struct sadb_prop)); p->sadb_prop_len = sizeof(struct sadb_prop)/8; p->sadb_prop_exttype = SADB_EXT_PROPOSAL; p->sadb_prop_replay = 32; memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved)); for (i=0; ; i++) { const struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i); if (!ealg) break; if (!ealg->pfkey_supported) continue; if (!(ealg_tmpl_set(t, ealg) && ealg->available)) continue; for (k = 1; ; k++) { struct sadb_comb *c; const struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k); if (!aalg) break; if (!aalg->pfkey_supported) continue; if (!(aalg_tmpl_set(t, aalg) && aalg->available)) continue; c = skb_put(skb, sizeof(struct sadb_comb)); memset(c, 0, sizeof(*c)); p->sadb_prop_len += sizeof(struct sadb_comb)/8; c->sadb_comb_auth = aalg->desc.sadb_alg_id; c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits; c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits; c->sadb_comb_encrypt = ealg->desc.sadb_alg_id; c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits; c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits; c->sadb_comb_hard_addtime = 24*60*60; c->sadb_comb_soft_addtime = 20*60*60; c->sadb_comb_hard_usetime = 8*60*60; c->sadb_comb_soft_usetime = 7*60*60; sz += sizeof(*c); } } return sz + sizeof(*p); } static int key_notify_policy_expire(struct xfrm_policy *xp, const struct km_event *c) { return 0; } static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) { struct sk_buff *out_skb; struct sadb_msg *out_hdr; int hard; int hsc; hard = c->data.hard; if (hard) hsc = 2; else hsc = 1; out_skb = pfkey_xfrm_state2msg_expire(x, hsc); if (IS_ERR(out_skb)) return PTR_ERR(out_skb); out_hdr = (struct sadb_msg *) out_skb->data; out_hdr->sadb_msg_version = PF_KEY_V2; out_hdr->sadb_msg_type = SADB_EXPIRE; out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); return 0; } static int pfkey_send_notify(struct xfrm_state *x, const struct km_event *c) { struct net *net = x ? xs_net(x) : c->net; struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); if (atomic_read(&net_pfkey->socks_nr) == 0) return 0; switch (c->event) { case XFRM_MSG_EXPIRE: return key_notify_sa_expire(x, c); case XFRM_MSG_DELSA: case XFRM_MSG_NEWSA: case XFRM_MSG_UPDSA: return key_notify_sa(x, c); case XFRM_MSG_FLUSHSA: return key_notify_sa_flush(c); case XFRM_MSG_NEWAE: /* not yet supported */ break; default: pr_err("pfkey: Unknown SA event %d\n", c->event); break; } return 0; } static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c) { if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) return 0; switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); case XFRM_MSG_DELPOLICY: case XFRM_MSG_NEWPOLICY: case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: if (c->data.type != XFRM_POLICY_TYPE_MAIN) break; return key_notify_policy_flush(c); default: pr_err("pfkey: Unknown policy event %d\n", c->event); break; } return 0; } static u32 get_acqseq(void) { u32 res; static atomic_t acqseq; do { res = atomic_inc_return(&acqseq); } while (!res); return res; } static bool pfkey_is_alive(const struct km_event *c) { struct netns_pfkey *net_pfkey = net_generic(c->net, pfkey_net_id); struct sock *sk; bool is_alive = false; rcu_read_lock(); sk_for_each_rcu(sk, &net_pfkey->table) { if (pfkey_sk(sk)->registered) { is_alive = true; break; } } rcu_read_unlock(); return is_alive; } static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_address *addr; struct sadb_x_policy *pol; int sockaddr_size; int size; struct sadb_x_sec_ctx *sec_ctx; struct xfrm_sec_ctx *xfrm_ctx; int ctx_size = 0; int alg_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; size = sizeof(struct sadb_msg) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + sizeof(struct sadb_x_policy); if (x->id.proto == IPPROTO_AH) alg_size = count_ah_combs(t); else if (x->id.proto == IPPROTO_ESP) alg_size = count_esp_combs(t); if ((xfrm_ctx = x->security)) { ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); size += sizeof(struct sadb_x_sec_ctx) + ctx_size; } skb = alloc_skb(size + alg_size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_ACQUIRE; hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq = get_acqseq(); hdr->sadb_msg_pid = 0; /* src address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* dst address */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->id.daddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t); pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ alg_size = 0; if (x->id.proto == IPPROTO_AH) alg_size = dump_ah_combs(skb, t); else if (x->id.proto == IPPROTO_ESP) alg_size = dump_esp_combs(skb, t); hdr->sadb_msg_len += alg_size / 8; /* security context */ if (xfrm_ctx) { sec_ctx = skb_put(skb, sizeof(struct sadb_x_sec_ctx) + ctx_size); sec_ctx->sadb_x_sec_len = (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, xfrm_ctx->ctx_len); } return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct net *net = sock_net(sk); struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (opt != IPV6_IPSEC_POLICY) { *dir = -EOPNOTSUPP; return NULL; } break; #endif default: *dir = -EINVAL; return NULL; } *dir = -EINVAL; if (len < sizeof(struct sadb_x_policy) || pol->sadb_x_policy_len*8 > len || pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS || (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND)) return NULL; xp = xfrm_policy_alloc(net, GFP_ATOMIC); if (xp == NULL) { *dir = -ENOBUFS; return NULL; } xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ? XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW); xp->lft.soft_byte_limit = XFRM_INF; xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && (*dir = parse_ipsecrequests(xp, pol)) < 0) goto out; /* security context too */ if (len >= (pol->sadb_x_policy_len*8 + sizeof(struct sadb_x_sec_ctx))) { char *p = (char *)pol; struct xfrm_user_sec_ctx *uctx; p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + sec_ctx->sadb_x_sec_len*8) { *dir = -EINVAL; goto out; } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx, GFP_ATOMIC); *dir = security_xfrm_policy_alloc(&xp->security, uctx, GFP_ATOMIC); kfree(uctx); if (*dir) goto out; } *dir = pol->sadb_x_policy_dir-1; return xp; out: xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_sa *sa; struct sadb_address *addr; struct sadb_x_nat_t_port *n_port; int sockaddr_size; int size; __u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0); struct xfrm_encap_tmpl *natt = NULL; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) return -EINVAL; if (!satype) return -EINVAL; if (!x->encap) return -EINVAL; natt = x->encap; /* Build an SADB_X_NAT_T_NEW_MAPPING message: * * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) | * ADDRESS_DST (new addr) | NAT_T_DPORT (new port) */ size = sizeof(struct sadb_msg) + sizeof(struct sadb_sa) + (sizeof(struct sadb_address) * 2) + (sockaddr_size * 2) + (sizeof(struct sadb_x_nat_t_port) * 2); skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING; hdr->sadb_msg_satype = satype; hdr->sadb_msg_len = size / sizeof(uint64_t); hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = x->km.seq; hdr->sadb_msg_pid = 0; /* SA */ sa = skb_put(skb, sizeof(struct sadb_sa)); sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t); sa->sadb_sa_exttype = SADB_EXT_SA; sa->sadb_sa_spi = x->id.spi; sa->sadb_sa_replay = 0; sa->sadb_sa_state = 0; sa->sadb_sa_auth = 0; sa->sadb_sa_encrypt = 0; sa->sadb_sa_flags = 0; /* ADDRESS_SRC (old addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(&x->props.saddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_SPORT (old port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT; n_port->sadb_x_nat_t_port_port = natt->encap_sport; n_port->sadb_x_nat_t_port_reserved = 0; /* ADDRESS_DST (new addr) */ addr = skb_put(skb, sizeof(struct sadb_address) + sockaddr_size); addr->sadb_address_len = (sizeof(struct sadb_address)+sockaddr_size)/ sizeof(uint64_t); addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST; addr->sadb_address_proto = 0; addr->sadb_address_reserved = 0; addr->sadb_address_prefixlen = pfkey_sockaddr_fill(ipaddr, 0, (struct sockaddr *) (addr + 1), x->props.family); if (!addr->sadb_address_prefixlen) BUG(); /* NAT_T_DPORT (new port) */ n_port = skb_put(skb, sizeof(*n_port)); n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t); n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT; n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE static int set_sadb_address(struct sk_buff *skb, int sasize, int type, const struct xfrm_selector *sel) { struct sadb_address *addr; addr = skb_put(skb, sizeof(struct sadb_address) + sasize); addr->sadb_address_len = (sizeof(struct sadb_address) + sasize)/8; addr->sadb_address_exttype = type; addr->sadb_address_proto = sel->proto; addr->sadb_address_reserved = 0; switch (type) { case SADB_EXT_ADDRESS_SRC: addr->sadb_address_prefixlen = sel->prefixlen_s; pfkey_sockaddr_fill(&sel->saddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; case SADB_EXT_ADDRESS_DST: addr->sadb_address_prefixlen = sel->prefixlen_d; pfkey_sockaddr_fill(&sel->daddr, 0, (struct sockaddr *)(addr + 1), sel->family); break; default: return -EINVAL; } return 0; } static int set_sadb_kmaddress(struct sk_buff *skb, const struct xfrm_kmaddress *k) { struct sadb_x_kmaddress *kma; u8 *sa; int family = k->family; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = (sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(family)); kma = skb_put_zero(skb, size_req); kma->sadb_x_kmaddress_len = size_req / 8; kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; kma->sadb_x_kmaddress_reserved = k->reserved; sa = (u8 *)(kma + 1); if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) return -EINVAL; return 0; } static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, const xfrm_address_t *src, const xfrm_address_t *dst) { struct sadb_x_ipsecrequest *rq; u8 *sa; int socklen = pfkey_sockaddr_len(family); int size_req; size_req = sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(family); rq = skb_put_zero(skb, size_req); rq->sadb_x_ipsecrequest_len = size_req; rq->sadb_x_ipsecrequest_proto = proto; rq->sadb_x_ipsecrequest_mode = mode; rq->sadb_x_ipsecrequest_level = level; rq->sadb_x_ipsecrequest_reqid = reqid; sa = (u8 *) (rq + 1); if (!pfkey_sockaddr_fill(src, 0, (struct sockaddr *)sa, family) || !pfkey_sockaddr_fill(dst, 0, (struct sockaddr *)(sa + socklen), family)) return -EINVAL; return 0; } #endif #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { int i; int sasize_sel; int size = 0; int size_pol = 0; struct sk_buff *skb; struct sadb_msg *hdr; struct sadb_x_policy *pol; const struct xfrm_migrate *mp; if (type != XFRM_POLICY_TYPE_MAIN) return 0; if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; if (k != NULL) { /* addresses for KM */ size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + pfkey_sockaddr_pair_size(k->family)); } /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) return -EINVAL; size += (sizeof(struct sadb_address) + sasize_sel) * 2; /* policy info */ size_pol += sizeof(struct sadb_x_policy); /* ipsecrequests */ for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->old_family); /* new locator pair */ size_pol += sizeof(struct sadb_x_ipsecrequest) + pfkey_sockaddr_pair_size(mp->new_family); } size += sizeof(struct sadb_msg) + size_pol; /* alloc buffer */ skb = alloc_skb(size, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; hdr = skb_put(skb, sizeof(struct sadb_msg)); hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_type = SADB_X_MIGRATE; hdr->sadb_msg_satype = pfkey_proto2satype(m->proto); hdr->sadb_msg_len = size / 8; hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; /* Addresses to be used by KM for negotiation, if ext is available */ if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) goto err; /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); /* selector dst */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_DST, sel); /* policy information */ pol = skb_put(skb, sizeof(struct sadb_x_policy)); pol->sadb_x_policy_len = size_pol / 8; pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; for (i = 0, mp = m; i < num_bundles; i++, mp++) { /* old ipsecrequest */ int mode = pfkey_mode_from_xfrm(mp->mode); if (mode < 0) goto err; if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->old_family, &mp->old_saddr, &mp->old_daddr) < 0) goto err; /* new ipsecrequest */ if (set_ipsecrequest(skb, mp->proto, mode, (mp->reqid ? IPSEC_LEVEL_UNIQUE : IPSEC_LEVEL_REQUIRE), mp->reqid, mp->new_family, &mp->new_saddr, &mp->new_daddr) < 0) goto err; } /* broadcast migrate message to sockets */ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; err: kfree_skb(skb); return -EINVAL; } #else static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap) { return -ENOPROTOOPT; } #endif static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct sk_buff *skb = NULL; struct sadb_msg *hdr = NULL; int err; struct net *net = sock_net(sk); err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) goto out; err = -EMSGSIZE; if ((unsigned int)len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; skb = alloc_skb(len, GFP_KERNEL); if (skb == NULL) goto out; err = -EFAULT; if (memcpy_from_msg(skb_put(skb,len), msg, len)) goto out; hdr = pfkey_get_base_msg(skb, &err); if (!hdr) goto out; mutex_lock(&net->xfrm.xfrm_cfg_mutex); err = pfkey_process(sk, skb, hdr); mutex_unlock(&net->xfrm.xfrm_cfg_mutex); out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) err = 0; kfree_skb(skb); return err ? : len; } static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct pfkey_sock *pfk = pfkey_sk(sk); struct sk_buff *skb; int copied, err; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; skb = skb_recv_datagram(sk, flags, &err); if (skb == NULL) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free; sock_recv_cmsgs(msg, sk, skb); err = (flags & MSG_TRUNC) ? skb->len : copied; if (pfk->dump.dump != NULL && 3 * atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) pfkey_do_dump(pfk); out_free: skb_free_datagram(sk, skb); out: return err; } static const struct proto_ops pfkey_ops = { .family = PF_KEY, .owner = THIS_MODULE, /* Operations that make no sense on pfkey sockets. */ .bind = sock_no_bind, .connect = sock_no_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .mmap = sock_no_mmap, /* Now the operations that really occur. */ .release = pfkey_release, .poll = datagram_poll, .sendmsg = pfkey_sendmsg, .recvmsg = pfkey_recvmsg, }; static const struct net_proto_family pfkey_family_ops = { .family = PF_KEY, .create = pfkey_create, .owner = THIS_MODULE, }; #ifdef CONFIG_PROC_FS static int pfkey_seq_show(struct seq_file *f, void *v) { struct sock *s = sk_entry(v); if (v == SEQ_START_TOKEN) seq_printf(f ,"sk RefCnt Rmem Wmem User Inode\n"); else seq_printf(f, "%pK %-6d %-6u %-6u %-6u %-6lu\n", s, refcount_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), from_kuid_munged(seq_user_ns(f), sk_uid(s)), sock_i_ino(s) ); return 0; } static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) __acquires(rcu) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); rcu_read_lock(); return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos); } static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) { struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); return seq_hlist_next_rcu(v, &net_pfkey->table, ppos); } static void pfkey_seq_stop(struct seq_file *f, void *v) __releases(rcu) { rcu_read_unlock(); } static const struct seq_operations pfkey_seq_ops = { .start = pfkey_seq_start, .next = pfkey_seq_next, .stop = pfkey_seq_stop, .show = pfkey_seq_show, }; static int __net_init pfkey_init_proc(struct net *net) { struct proc_dir_entry *e; e = proc_create_net("pfkey", 0, net->proc_net, &pfkey_seq_ops, sizeof(struct seq_net_private)); if (e == NULL) return -ENOMEM; return 0; } static void __net_exit pfkey_exit_proc(struct net *net) { remove_proc_entry("pfkey", net->proc_net); } #else static inline int pfkey_init_proc(struct net *net) { return 0; } static inline void pfkey_exit_proc(struct net *net) { } #endif static struct xfrm_mgr pfkeyv2_mgr = { .notify = pfkey_send_notify, .acquire = pfkey_send_acquire, .compile_policy = pfkey_compile_policy, .new_mapping = pfkey_send_new_mapping, .notify_policy = pfkey_send_policy_notify, .migrate = pfkey_send_migrate, .is_alive = pfkey_is_alive, }; static int __net_init pfkey_net_init(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); int rv; INIT_HLIST_HEAD(&net_pfkey->table); atomic_set(&net_pfkey->socks_nr, 0); rv = pfkey_init_proc(net); return rv; } static void __net_exit pfkey_net_exit(struct net *net) { struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_exit_proc(net); WARN_ON(!hlist_empty(&net_pfkey->table)); } static struct pernet_operations pfkey_net_ops = { .init = pfkey_net_init, .exit = pfkey_net_exit, .id = &pfkey_net_id, .size = sizeof(struct netns_pfkey), }; static void __exit ipsec_pfkey_exit(void) { xfrm_unregister_km(&pfkeyv2_mgr); sock_unregister(PF_KEY); unregister_pernet_subsys(&pfkey_net_ops); proto_unregister(&key_proto); } static int __init ipsec_pfkey_init(void) { int err = proto_register(&key_proto, 0); if (err != 0) goto out; err = register_pernet_subsys(&pfkey_net_ops); if (err != 0) goto out_unregister_key_proto; err = sock_register(&pfkey_family_ops); if (err != 0) goto out_unregister_pernet; xfrm_register_km(&pfkeyv2_mgr); out: return err; out_unregister_pernet: unregister_pernet_subsys(&pfkey_net_ops); out_unregister_key_proto: proto_unregister(&key_proto); goto out; } module_init(ipsec_pfkey_init); module_exit(ipsec_pfkey_exit); MODULE_DESCRIPTION("PF_KEY socket helpers"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_KEY);
10 125 16 88 103 78 78 68 103 180 23 103 103 2 55 81 55 26 1 79 54 26 69 1 6 4 57 1 55 1 57 5 5 31 89 57 32 6 2 4 91 55 36 6 1 84 21 21 27 27 13 13 13 2 1 36 36 2 31 1 34 27 27 27 27 27 27 7 27 27 20 1 7 20 3 21 58 58 56 13 13 1 13 13 167 134 9 134 2 165 168 51 50 92 93 93 93 25 65 60 7 93 82 84 29 32 33 33 33 33 33 30 4 29 30 30 4 58 3 54 53 54 1 51 53 53 53 53 54 54 52 53 54 23 22 23 23 20 6 1 2 2 2 4 2 20 117 119 112 53 55 56 49 12 12 12 12 67 3 64 65 65 64 65 66 64 65 65 3 75 76 77 26 69 68 3 66 20 1 66 17 5 3 11 10 7 12 20 16 4 20 2 12 88 87 3 5 86 7 12 80 6 74 46 18 18 59 11 88 59 55 21 17 59 58 3 3 4 1 1 2 1 1 1 1 1 1 2 2 24 24 24 22 24 24 22 3 23 24 3 74 70 24 10 10 1 1 3 3 3 5 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 // SPDX-License-Identifier: GPL-2.0-only /* Connection state tracking for netfilter. This is separated from, but required by, the NAT layer; it can also be used by an iptables extension. */ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org> * (C) 2005-2012 Patrick McHardy <kaber@trash.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/sched.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/vmalloc.h> #include <linux/stddef.h> #include <linux/slab.h> #include <linux/random.h> #include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> #include <linux/moduleparam.h> #include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/mm.h> #include <linux/nsproxy.h> #include <linux/rculist_nulls.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_bpf.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_timestamp.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_labels.h> #include <net/netfilter/nf_conntrack_synproxy.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_helper.h> #include <net/netns/hash.h> #include <net/ip.h> #include "nf_internals.h" __cacheline_aligned_in_smp spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; EXPORT_SYMBOL_GPL(nf_conntrack_locks); __cacheline_aligned_in_smp DEFINE_SPINLOCK(nf_conntrack_expect_lock); EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); struct hlist_nulls_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; u32 avg_timeout; u32 count; u32 start_time; bool exiting; bool early_drop; }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* serialize hash resizes and nf_ct_iterate_cleanup */ static DEFINE_MUTEX(nf_conntrack_mutex); #define GC_SCAN_INTERVAL_MAX (60ul * HZ) #define GC_SCAN_INTERVAL_MIN (1ul * HZ) /* clamp timeouts to this value (TCP unacked) */ #define GC_SCAN_INTERVAL_CLAMP (300ul * HZ) /* Initial bias pretending we have 100 entries at the upper bound so we don't * wakeup often just because we have three entries with a 1s timeout while still * allowing non-idle machines to wakeup more often when needed. */ #define GC_SCAN_INITIAL_COUNT 100 #define GC_SCAN_INTERVAL_INIT GC_SCAN_INTERVAL_MAX #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) #define MIN_CHAINLEN 50u #define MAX_CHAINLEN (80u - MIN_CHAINLEN) static struct conntrack_gc_work conntrack_gc_work; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { /* 1) Acquire the lock */ spin_lock(lock); /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics * It pairs with the smp_store_release() in nf_conntrack_all_unlock() */ if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) return; /* fast path failed, unlock */ spin_unlock(lock); /* Slow path 1) get global lock */ spin_lock(&nf_conntrack_locks_all_lock); /* Slow path 2) get the lock we want */ spin_lock(lock); /* Slow path 3) release the global lock */ spin_unlock(&nf_conntrack_locks_all_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_lock); static void nf_conntrack_double_unlock(unsigned int h1, unsigned int h2) { h1 %= CONNTRACK_LOCKS; h2 %= CONNTRACK_LOCKS; spin_unlock(&nf_conntrack_locks[h1]); if (h1 != h2) spin_unlock(&nf_conntrack_locks[h2]); } /* return true if we need to recompute hashes (in case hash table was resized) */ static bool nf_conntrack_double_lock(unsigned int h1, unsigned int h2, unsigned int sequence) { h1 %= CONNTRACK_LOCKS; h2 %= CONNTRACK_LOCKS; if (h1 <= h2) { nf_conntrack_lock(&nf_conntrack_locks[h1]); if (h1 != h2) spin_lock_nested(&nf_conntrack_locks[h2], SINGLE_DEPTH_NESTING); } else { nf_conntrack_lock(&nf_conntrack_locks[h2]); spin_lock_nested(&nf_conntrack_locks[h1], SINGLE_DEPTH_NESTING); } if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { nf_conntrack_double_unlock(h1, h2); return true; } return false; } static void nf_conntrack_all_lock(void) __acquires(&nf_conntrack_locks_all_lock) { int i; spin_lock(&nf_conntrack_locks_all_lock); /* For nf_contrack_locks_all, only the latest time when another * CPU will see an update is controlled, by the "release" of the * spin_lock below. * The earliest time is not controlled, an thus KCSAN could detect * a race when nf_conntract_lock() reads the variable. * WRITE_ONCE() is used to ensure the compiler will not * optimize the write. */ WRITE_ONCE(nf_conntrack_locks_all, true); for (i = 0; i < CONNTRACK_LOCKS; i++) { spin_lock(&nf_conntrack_locks[i]); /* This spin_unlock provides the "release" to ensure that * nf_conntrack_locks_all==true is visible to everyone that * acquired spin_lock(&nf_conntrack_locks[]). */ spin_unlock(&nf_conntrack_locks[i]); } } static void nf_conntrack_all_unlock(void) __releases(&nf_conntrack_locks_all_lock) { /* All prior stores must be complete before we clear * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() * might observe the false value but not the entire * critical section. * It pairs with the smp_load_acquire() in nf_conntrack_lock() */ smp_store_release(&nf_conntrack_locks_all, false); spin_unlock(&nf_conntrack_locks_all_lock); } unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); seqcount_spinlock_t nf_conntrack_generation __read_mostly; static siphash_aligned_key_t nf_conntrack_hash_rnd; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, unsigned int zoneid, const struct net *net) { siphash_key_t key; get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); key = nf_conntrack_hash_rnd; key.key[0] ^= zoneid; key.key[1] ^= net_hash_mix(net); return siphash((void *)tuple, offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend), &key); } static u32 scale_hash(u32 hash) { return reciprocal_scale(hash, nf_conntrack_htable_size); } static u32 __hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple, unsigned int zoneid, unsigned int size) { return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size); } static u32 hash_conntrack(const struct net *net, const struct nf_conntrack_tuple *tuple, unsigned int zoneid) { return scale_hash(hash_conntrack_raw(tuple, zoneid, net)); } static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { struct { __be16 sport; __be16 dport; } _inet_hdr, *inet_hdr; /* Actually only need first 4 bytes to get ports. */ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); if (!inet_hdr) return false; tuple->src.u.udp.port = inet_hdr->sport; tuple->dst.u.udp.port = inet_hdr->dport; return true; } static bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, struct net *net, struct nf_conntrack_tuple *tuple) { unsigned int size; const __be32 *ap; __be32 _addrs[8]; memset(tuple, 0, sizeof(*tuple)); tuple->src.l3num = l3num; switch (l3num) { case NFPROTO_IPV4: nhoff += offsetof(struct iphdr, saddr); size = 2 * sizeof(__be32); break; case NFPROTO_IPV6: nhoff += offsetof(struct ipv6hdr, saddr); size = sizeof(_addrs); break; default: return true; } ap = skb_header_pointer(skb, nhoff, size, _addrs); if (!ap) return false; switch (l3num) { case NFPROTO_IPV4: tuple->src.u3.ip = ap[0]; tuple->dst.u3.ip = ap[1]; break; case NFPROTO_IPV6: memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); break; } tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL; switch (protonum) { #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); #endif case IPPROTO_ICMP: return icmp_pkt_to_tuple(skb, dataoff, net, tuple); #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return gre_pkt_to_tuple(skb, dataoff, net, tuple); #endif case IPPROTO_TCP: case IPPROTO_UDP: #ifdef CONFIG_NF_CT_PROTO_UDPLITE case IPPROTO_UDPLITE: #endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: #endif /* fallthrough */ return nf_ct_get_tuple_ports(skb, dataoff, tuple); default: break; } return true; } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, u_int8_t *protonum) { int dataoff = -1; const struct iphdr *iph; struct iphdr _iph; iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); if (!iph) return -1; /* Conntrack defragments packets, we might still see fragments * inside ICMP packets though. */ if (iph->frag_off & htons(IP_OFFSET)) return -1; dataoff = nhoff + (iph->ihl << 2); *protonum = iph->protocol; /* Check bogus IP headers */ if (dataoff > skb->len) { pr_debug("bogus IPv4 packet: nhoff %u, ihl %u, skblen %u\n", nhoff, iph->ihl << 2, skb->len); return -1; } return dataoff; } #if IS_ENABLED(CONFIG_IPV6) static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, u8 *protonum) { int protoff = -1; unsigned int extoff = nhoff + sizeof(struct ipv6hdr); __be16 frag_off; u8 nexthdr; if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), &nexthdr, sizeof(nexthdr)) != 0) { pr_debug("can't get nexthdr\n"); return -1; } protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* * (protoff == skb->len) means the packet has not data, just * IPv6 and possibly extensions headers, but it is tracked anyway */ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("can't find proto in pkt\n"); return -1; } *protonum = nexthdr; return protoff; } #endif static int get_l4proto(const struct sk_buff *skb, unsigned int nhoff, u8 pf, u8 *l4num) { switch (pf) { case NFPROTO_IPV4: return ipv4_get_l4proto(skb, nhoff, l4num); #if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: return ipv6_get_l4proto(skb, nhoff, l4num); #endif default: *l4num = 0; break; } return -1; } bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { u8 protonum; int protoff; protoff = get_l4proto(skb, nhoff, l3num, &protonum); if (protoff <= 0) return false; return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr); bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_tuple *orig) { memset(inverse, 0, sizeof(*inverse)); inverse->src.l3num = orig->src.l3num; switch (orig->src.l3num) { case NFPROTO_IPV4: inverse->src.u3.ip = orig->dst.u3.ip; inverse->dst.u3.ip = orig->src.u3.ip; break; case NFPROTO_IPV6: inverse->src.u3.in6 = orig->dst.u3.in6; inverse->dst.u3.in6 = orig->src.u3.in6; break; default: break; } inverse->dst.dir = !orig->dst.dir; inverse->dst.protonum = orig->dst.protonum; switch (orig->dst.protonum) { case IPPROTO_ICMP: return nf_conntrack_invert_icmp_tuple(inverse, orig); #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_invert_icmpv6_tuple(inverse, orig); #endif } inverse->src.u.all = orig->dst.u.all; inverse->dst.u.all = orig->src.u.all; return true; } EXPORT_SYMBOL_GPL(nf_ct_invert_tuple); /* Generate a almost-unique pseudo-id for a given conntrack. * * intentionally doesn't re-use any of the seeds used for hash * table location, we assume id gets exposed to userspace. * * Following nf_conn items do not change throughout lifetime * of the nf_conn: * * 1. nf_conn address * 2. nf_conn->master address (normally NULL) * 3. the associated net namespace * 4. the original direction tuple */ u32 nf_ct_get_id(const struct nf_conn *ct) { static siphash_aligned_key_t ct_id_seed; unsigned long a, b, c, d; net_get_random_once(&ct_id_seed, sizeof(ct_id_seed)); a = (unsigned long)ct; b = (unsigned long)ct->master; c = (unsigned long)nf_ct_net(ct); d = (unsigned long)siphash(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple), &ct_id_seed); #ifdef CONFIG_64BIT return siphash_4u64((u64)a, (u64)b, (u64)c, (u64)d, &ct_id_seed); #else return siphash_4u32((u32)a, (u32)b, (u32)c, (u32)d, &ct_id_seed); #endif } EXPORT_SYMBOL_GPL(nf_ct_get_id); static u32 nf_conntrack_get_id(const struct nf_conntrack *nfct) { return nf_ct_get_id(nf_ct_to_nf_conn(nfct)); } static void clean_from_lists(struct nf_conn *ct) { hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); } #define NFCT_ALIGN(len) (((len) + NFCT_INFOMASK) & ~NFCT_INFOMASK) /* Released via nf_ct_destroy() */ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags) { struct nf_conn *tmpl, *p; if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) { tmpl = kzalloc(sizeof(*tmpl) + NFCT_INFOMASK, flags); if (!tmpl) return NULL; p = tmpl; tmpl = (struct nf_conn *)NFCT_ALIGN((unsigned long)p); if (tmpl != p) tmpl->proto.tmpl_padto = (char *)tmpl - (char *)p; } else { tmpl = kzalloc(sizeof(*tmpl), flags); if (!tmpl) return NULL; } tmpl->status = IPS_TEMPLATE; write_pnet(&tmpl->ct_net, net); nf_ct_zone_add(tmpl, zone); refcount_set(&tmpl->ct_general.use, 1); return tmpl; } EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc); void nf_ct_tmpl_free(struct nf_conn *tmpl) { kfree(tmpl->ext); if (ARCH_KMALLOC_MINALIGN <= NFCT_INFOMASK) kfree((char *)tmpl - tmpl->proto.tmpl_padto); else kfree(tmpl); } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free); static void destroy_gre_conntrack(struct nf_conn *ct) { #ifdef CONFIG_NF_CT_PROTO_GRE struct nf_conn *master = ct->master; if (master) nf_ct_gre_keymap_destroy(master); #endif } void nf_ct_destroy(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; WARN_ON(refcount_read(&nfct->use) != 0); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); return; } if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) destroy_gre_conntrack(ct); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ nf_ct_remove_expectations(ct); if (ct->master) nf_ct_put(ct->master); nf_conntrack_free(ct); } EXPORT_SYMBOL(nf_ct_destroy); static void __nf_ct_delete_from_lists(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; unsigned int sequence; do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL)); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(hash, reply_hash, sequence)); clean_from_lists(ct); nf_conntrack_double_unlock(hash, reply_hash); } static void nf_ct_delete_from_lists(struct nf_conn *ct) { nf_ct_helper_destroy(ct); local_bh_disable(); __nf_ct_delete_from_lists(ct); local_bh_enable(); } static void nf_ct_add_to_ecache_list(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_net *cnet = nf_ct_pernet(nf_ct_net(ct)); spin_lock(&cnet->ecache.dying_lock); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &cnet->ecache.dying_list); spin_unlock(&cnet->ecache.dying_lock); #endif } bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; struct net *net; if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) return false; tstamp = nf_conn_tstamp_find(ct); if (tstamp) { s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; tstamp->stop = ktime_get_real_ns(); if (timeout < 0) tstamp->stop -= jiffies_to_nsecs(-timeout); } if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { /* destroy event was not delivered. nf_ct_put will * be done by event cache worker on redelivery. */ nf_ct_helper_destroy(ct); local_bh_disable(); __nf_ct_delete_from_lists(ct); nf_ct_add_to_ecache_list(ct); local_bh_enable(); nf_conntrack_ecache_work(nf_ct_net(ct), NFCT_ECACHE_DESTROY_FAIL); return false; } net = nf_ct_net(ct); if (nf_conntrack_ecache_dwork_pending(net)) nf_conntrack_ecache_work(net, NFCT_ECACHE_DESTROY_SENT); nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; } EXPORT_SYMBOL_GPL(nf_ct_delete); static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone, const struct net *net) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); /* A conntrack can be recreated with the equal tuple, * so we need to check that the conntrack is confirmed */ return nf_ct_tuple_equal(tuple, &h->tuple) && nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) && nf_ct_is_confirmed(ct) && net_eq(net, nf_ct_net(ct)); } static inline bool nf_ct_match(const struct nf_conn *ct1, const struct nf_conn *ct2) { return nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, &ct2->tuplehash[IP_CT_DIR_REPLY].tuple) && nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL) && nf_ct_zone_equal(ct1, nf_ct_zone(ct2), IP_CT_DIR_REPLY) && net_eq(nf_ct_net(ct1), nf_ct_net(ct2)); } /* caller must hold rcu readlock and none of the nf_conntrack_locks */ static void nf_ct_gc_expired(struct nf_conn *ct) { if (!refcount_inc_not_zero(&ct->ct_general.use)) return; /* load ->status after refcount increase */ smp_acquire__after_ctrl_dep(); if (nf_ct_should_gc(ct)) nf_ct_kill(ct); nf_ct_put(ct); } /* * Warning : * - Caller must take a reference on returned object * and recheck nf_ct_tuple_equal(tuple, &h->tuple) */ static struct nf_conntrack_tuple_hash * ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; unsigned int bucket, hsize; begin: nf_conntrack_get_ht(&ct_hash, &hsize); bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { struct nf_conn *ct; ct = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(ct)) { nf_ct_gc_expired(ct); continue; } if (nf_ct_key_equal(h, tuple, zone, net)) return h; } /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (get_nulls_value(n) != bucket) { NF_CT_STAT_INC_ATOMIC(net, search_restart); goto begin; } return NULL; } /* Find a connection corresponding to a tuple. */ static struct nf_conntrack_tuple_hash * __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple, u32 hash) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; h = ____nf_conntrack_find(net, zone, tuple, hash); if (h) { /* We have a candidate that matches the tuple we're interested * in, try to obtain a reference and re-check tuple */ ct = nf_ct_tuplehash_to_ctrack(h); if (likely(refcount_inc_not_zero(&ct->ct_general.use))) { /* re-check key after refcount */ smp_acquire__after_ctrl_dep(); if (likely(nf_ct_key_equal(h, tuple, zone, net))) return h; /* TYPESAFE_BY_RCU recycled the candidate */ nf_ct_put(ct); } h = NULL; } return h; } struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *tuple) { unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL); struct nf_conntrack_tuple_hash *thash; rcu_read_lock(); thash = __nf_conntrack_find_get(net, zone, tuple, hash_conntrack_raw(tuple, zone_id, net)); if (thash) goto out_unlock; rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY); if (rid != zone_id) thash = __nf_conntrack_find_get(net, zone, tuple, hash_conntrack_raw(tuple, rid, net)); out_unlock: rcu_read_unlock(); return thash; } EXPORT_SYMBOL_GPL(nf_conntrack_find_get); static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int reply_hash) { hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &nf_conntrack_hash[hash]); hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[reply_hash]); } static bool nf_ct_ext_valid_pre(const struct nf_ct_ext *ext) { /* if ext->gen_id is not equal to nf_conntrack_ext_genid, some extensions * may contain stale pointers to e.g. helper that has been removed. * * The helper can't clear this because the nf_conn object isn't in * any hash and synchronize_rcu() isn't enough because associated skb * might sit in a queue. */ return !ext || ext->gen_id == atomic_read(&nf_conntrack_ext_genid); } static bool nf_ct_ext_valid_post(struct nf_ct_ext *ext) { if (!ext) return true; if (ext->gen_id != atomic_read(&nf_conntrack_ext_genid)) return false; /* inserted into conntrack table, nf_ct_iterate_cleanup() * will find it. Disable nf_ct_ext_find() id check. */ WRITE_ONCE(ext->gen_id, 0); return true; } int nf_conntrack_hash_check_insert(struct nf_conn *ct) { const struct nf_conntrack_zone *zone; struct net *net = nf_ct_net(ct); unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; unsigned int max_chainlen; unsigned int chainlen = 0; unsigned int sequence; int err = -EEXIST; zone = nf_ct_zone(ct); if (!nf_ct_ext_valid_pre(ct->ext)) return -EAGAIN; local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL)); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(hash, reply_hash, sequence)); max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN); /* See if there's one in the list already, including reverse */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) goto chaintoolong; } chainlen = 0; hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) goto chaintoolong; } /* If genid has changed, we can't insert anymore because ct * extensions could have stale pointers and nf_ct_iterate_destroy * might have completed its table scan already. * * Increment of the ext genid right after this check is fine: * nf_ct_iterate_destroy blocks until locks are released. */ if (!nf_ct_ext_valid_post(ct->ext)) { err = -EAGAIN; goto out; } smp_wmb(); /* The caller holds a reference to this object */ refcount_set(&ct->ct_general.use, 2); __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); NF_CT_STAT_INC(net, insert); local_bh_enable(); return 0; chaintoolong: NF_CT_STAT_INC(net, chaintoolong); err = -ENOSPC; out: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return err; } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets, unsigned int bytes) { struct nf_conn_acct *acct; acct = nf_conn_acct_find(ct); if (acct) { struct nf_conn_counter *counter = acct->counter; atomic64_add(packets, &counter[dir].packets); atomic64_add(bytes, &counter[dir].bytes); } } EXPORT_SYMBOL_GPL(nf_ct_acct_add); static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct nf_conn *loser_ct) { struct nf_conn_acct *acct; acct = nf_conn_acct_find(loser_ct); if (acct) { struct nf_conn_counter *counter = acct->counter; unsigned int bytes; /* u32 should be fine since we must have seen one packet. */ bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes); nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes); } } static void __nf_conntrack_insert_prepare(struct nf_conn *ct) { struct nf_conn_tstamp *tstamp; refcount_inc(&ct->ct_general.use); /* set conntrack timestamp, if enabled. */ tstamp = nf_conn_tstamp_find(ct); if (tstamp) tstamp->start = ktime_get_real_ns(); } /** * nf_ct_match_reverse - check if ct1 and ct2 refer to identical flow * @ct1: conntrack in hash table to check against * @ct2: merge candidate * * returns true if ct1 and ct2 happen to refer to the same flow, but * in opposing directions, i.e. * ct1: a:b -> c:d * ct2: c:d -> a:b * for both directions. If so, @ct2 should not have been created * as the skb should have been picked up as ESTABLISHED flow. * But ct1 was not yet committed to hash table before skb that created * ct2 had arrived. * * Note we don't compare netns because ct entries in different net * namespace cannot clash to begin with. * * @return: true if ct1 and ct2 are identical when swapping origin/reply. */ static bool nf_ct_match_reverse(const struct nf_conn *ct1, const struct nf_conn *ct2) { u16 id1, id2; if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &ct2->tuplehash[IP_CT_DIR_REPLY].tuple)) return false; if (!nf_ct_tuple_equal(&ct1->tuplehash[IP_CT_DIR_REPLY].tuple, &ct2->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) return false; id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_ORIGINAL); id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_REPLY); if (id1 != id2) return false; id1 = nf_ct_zone_id(nf_ct_zone(ct1), IP_CT_DIR_REPLY); id2 = nf_ct_zone_id(nf_ct_zone(ct2), IP_CT_DIR_ORIGINAL); return id1 == id2; } static int nf_ct_can_merge(const struct nf_conn *ct, const struct nf_conn *loser_ct) { return nf_ct_match(ct, loser_ct) || nf_ct_match_reverse(ct, loser_ct); } /* caller must hold locks to prevent concurrent changes */ static int __nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); enum ip_conntrack_info ctinfo; struct nf_conn *loser_ct; loser_ct = nf_ct_get(skb, &ctinfo); if (nf_ct_can_merge(ct, loser_ct)) { struct net *net = nf_ct_net(ct); nf_conntrack_get(&ct->ct_general); nf_ct_acct_merge(ct, ctinfo, loser_ct); nf_ct_put(loser_ct); nf_ct_set(skb, ct, ctinfo); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } return NF_DROP; } /** * nf_ct_resolve_clash_harder - attempt to insert clashing conntrack entry * * @skb: skb that causes the collision * @repl_idx: hash slot for reply direction * * Called when origin or reply direction had a clash. * The skb can be handled without packet drop provided the reply direction * is unique or there the existing entry has the identical tuple in both * directions. * * Caller must hold conntrack table locks to prevent concurrent updates. * * Returns NF_DROP if the clash could not be handled. */ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) { struct nf_conn *loser_ct = (struct nf_conn *)skb_nfct(skb); const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; struct net *net; zone = nf_ct_zone(loser_ct); net = nf_ct_net(loser_ct); /* Reply direction must never result in a clash, unless both origin * and reply tuples are identical. */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[repl_idx], hnnode) { if (nf_ct_key_equal(h, &loser_ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) return __nf_ct_resolve_clash(skb, h); } /* We want the clashing entry to go away real soon: 1 second timeout. */ WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); /* IPS_NAT_CLASH removes the entry automatically on the first * reply. Also prevents UDP tracker from moving the entry to * ASSURED state, i.e. the entry can always be evicted under * pressure. */ loser_ct->status |= IPS_FIXED_TIMEOUT | IPS_NAT_CLASH; __nf_conntrack_insert_prepare(loser_ct); /* fake add for ORIGINAL dir: we want lookups to only find the entry * already in the table. This also hides the clashing entry from * ctnetlink iteration, i.e. conntrack -L won't show them. */ hlist_nulls_add_fake(&loser_ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); hlist_nulls_add_head_rcu(&loser_ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &nf_conntrack_hash[repl_idx]); /* confirmed bit must be set after hlist add, not before: * loser_ct can still be visible to other cpu due to * SLAB_TYPESAFE_BY_RCU. */ smp_mb__before_atomic(); set_bit(IPS_CONFIRMED_BIT, &loser_ct->status); NF_CT_STAT_INC(net, clash_resolve); return NF_ACCEPT; } /** * nf_ct_resolve_clash - attempt to handle clash without packet drop * * @skb: skb that causes the clash * @h: tuplehash of the clashing entry already in table * @reply_hash: hash slot for reply direction * * A conntrack entry can be inserted to the connection tracking table * if there is no existing entry with an identical tuple. * * If there is one, @skb (and the associated, unconfirmed conntrack) has * to be dropped. In case @skb is retransmitted, next conntrack lookup * will find the already-existing entry. * * The major problem with such packet drop is the extra delay added by * the packet loss -- it will take some time for a retransmit to occur * (or the sender to time out when waiting for a reply). * * This function attempts to handle the situation without packet drop. * * If @skb has no NAT transformation or if the colliding entries are * exactly the same, only the to-be-confirmed conntrack entry is discarded * and @skb is associated with the conntrack entry already in the table. * * Failing that, the new, unconfirmed conntrack is still added to the table * provided that the collision only occurs in the ORIGINAL direction. * The new entry will be added only in the non-clashing REPLY direction, * so packets in the ORIGINAL direction will continue to match the existing * entry. The new entry will also have a fixed timeout so it expires -- * due to the collision, it will only see reply traffic. * * Returns NF_DROP if the clash could not be resolved. */ static __cold noinline int nf_ct_resolve_clash(struct sk_buff *skb, struct nf_conntrack_tuple_hash *h, u32 reply_hash) { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); const struct nf_conntrack_l4proto *l4proto; enum ip_conntrack_info ctinfo; struct nf_conn *loser_ct; struct net *net; int ret; loser_ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(loser_ct); l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (!l4proto->allow_clash) goto drop; ret = __nf_ct_resolve_clash(skb, h); if (ret == NF_ACCEPT) return ret; ret = nf_ct_resolve_clash_harder(skb, reply_hash); if (ret == NF_ACCEPT) return ret; drop: NF_CT_STAT_INC(net, drop); NF_CT_STAT_INC(net, insert_failed); return NF_DROP; } /* Confirm a connection given skb; places it in hash table */ int __nf_conntrack_confirm(struct sk_buff *skb) { unsigned int chainlen = 0, sequence, max_chainlen; const struct nf_conntrack_zone *zone; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo); net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet which created connection will be IP_CT_NEW or for an expected connection, IP_CT_RELATED. */ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; zone = nf_ct_zone(ct); local_bh_disable(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); /* reuse the hash saved before */ hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev; hash = scale_hash(hash); reply_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY)); } while (nf_conntrack_double_lock(hash, reply_hash, sequence)); /* We're not in hash table, and we refuse to set up related * connections for unconfirmed conns. But packet copies and * REJECT will give spurious warnings here. */ /* Another skb with the same unconfirmed conntrack may * win the race. This may happen for bridge(br_flood) * or broadcast/multicast packets do skb_clone with * unconfirmed conntrack. */ if (unlikely(nf_ct_is_confirmed(ct))) { WARN_ON_ONCE(1); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return NF_DROP; } if (!nf_ct_ext_valid_pre(ct->ext)) { NF_CT_STAT_INC(net, insert_failed); goto dying; } /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from * user context, else we insert an already 'dead' hash, blocking * further use of that particular connection -JM. */ if (unlikely(nf_ct_is_dying(ct))) { NF_CT_STAT_INC(net, insert_failed); goto dying; } max_chainlen = MIN_CHAINLEN + get_random_u32_below(MAX_CHAINLEN); /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) goto chaintoolong; } chainlen = 0; hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; if (chainlen++ > max_chainlen) { chaintoolong: NF_CT_STAT_INC(net, chaintoolong); NF_CT_STAT_INC(net, insert_failed); ret = NF_DROP; goto dying; } } /* Timeout is relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ ct->timeout += nfct_time_stamp; __nf_conntrack_insert_prepare(ct); /* Since the lookup is lockless, hash insertion must be done after * setting ct->timeout. The RCU barriers guarantee that no other CPU * can find the conntrack before the above stores are visible. */ __nf_conntrack_hash_insert(ct, hash, reply_hash); /* IPS_CONFIRMED unset means 'ct not (yet) in hash', conntrack lookups * skip entries that lack this bit. This happens when a CPU is looking * at a stale entry that is being recycled due to SLAB_TYPESAFE_BY_RCU * or when another CPU encounters this entry right after the insertion * but before the set-confirm-bit below. This bit must not be set until * after __nf_conntrack_hash_insert(). */ smp_mb__before_atomic(); set_bit(IPS_CONFIRMED_BIT, &ct->status); nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); /* ext area is still valid (rcu read lock is held, * but will go out of scope soon, we need to remove * this conntrack again. */ if (!nf_ct_ext_valid_post(ct->ext)) { nf_ct_kill(ct); NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); nf_conntrack_event_cache(master_ct(ct) ? IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; out: ret = nf_ct_resolve_clash(skb, h, reply_hash); dying: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); return ret; } EXPORT_SYMBOL_GPL(__nf_conntrack_confirm); /* Returns true if a connection corresponds to the tuple (required for NAT). */ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { struct net *net = nf_ct_net(ignored_conntrack); const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; unsigned int hash, hsize; struct hlist_nulls_node *n; struct nf_conn *ct; zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); begin: nf_conntrack_get_ht(&ct_hash, &hsize); hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (ct == ignored_conntrack) continue; if (nf_ct_is_expired(ct)) { nf_ct_gc_expired(ct); continue; } if (nf_ct_key_equal(h, tuple, zone, net)) { /* Tuple is taken already, so caller will need to find * a new source port to use. * * Only exception: * If the *original tuples* are identical, then both * conntracks refer to the same flow. * This is a rare situation, it can occur e.g. when * more than one UDP packet is sent from same socket * in different threads. * * Let nf_ct_resolve_clash() deal with this later. */ if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) continue; NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; } } if (get_nulls_value(n) != hash) { NF_CT_STAT_INC_ATOMIC(net, search_restart); goto begin; } rcu_read_unlock(); return 0; } EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); #define NF_CT_EVICTION_RANGE 8 /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ static unsigned int early_drop_list(struct net *net, struct hlist_nulls_head *head) { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; unsigned int drops = 0; struct nf_conn *tmp; hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); continue; } if (test_bit(IPS_ASSURED_BIT, &tmp->status) || !net_eq(nf_ct_net(tmp), net) || nf_ct_is_dying(tmp)) continue; if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; /* load ->ct_net and ->status after refcount increase */ smp_acquire__after_ctrl_dep(); /* kill only if still in same netns -- might have moved due to * SLAB_TYPESAFE_BY_RCU rules. * * We steal the timer reference. If that fails timer has * already fired or someone else deleted it. Just drop ref * and move to next entry. */ if (net_eq(nf_ct_net(tmp), net) && nf_ct_is_confirmed(tmp) && nf_ct_delete(tmp, 0, 0)) drops++; nf_ct_put(tmp); } return drops; } static noinline int early_drop(struct net *net, unsigned int hash) { unsigned int i, bucket; for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; unsigned int hsize, drops; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hsize); if (!i) bucket = reciprocal_scale(hash, hsize); else bucket = (bucket + 1) % hsize; drops = early_drop_list(net, &ct_hash[bucket]); rcu_read_unlock(); if (drops) { NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); return true; } } return false; } static bool gc_worker_skip_ct(const struct nf_conn *ct) { return !nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct); } static bool gc_worker_can_early_drop(const struct nf_conn *ct) { const struct nf_conntrack_l4proto *l4proto; u8 protonum = nf_ct_protonum(ct); if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true; l4proto = nf_ct_l4proto_find(protonum); if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) return true; return false; } static void gc_worker(struct work_struct *work) { unsigned int i, hashsz, nf_conntrack_max95 = 0; u32 end_time, start_time = nfct_time_stamp; struct conntrack_gc_work *gc_work; unsigned int expired_count = 0; unsigned long next_run; s32 delta_time; long count; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); i = gc_work->next_bucket; if (gc_work->early_drop) nf_conntrack_max95 = nf_conntrack_max / 100u * 95u; if (i == 0) { gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; gc_work->count = GC_SCAN_INITIAL_COUNT; gc_work->start_time = start_time; } next_run = gc_work->avg_timeout; count = gc_work->count; end_time = start_time + GC_SCAN_MAX_DURATION; do { struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; struct nf_conn *tmp; rcu_read_lock(); nf_conntrack_get_ht(&ct_hash, &hashsz); if (i >= hashsz) { rcu_read_unlock(); break; } hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; struct net *net; long expires; tmp = nf_ct_tuplehash_to_ctrack(h); if (expired_count > GC_SCAN_EXPIRED_MAX) { rcu_read_unlock(); gc_work->next_bucket = i; gc_work->avg_timeout = next_run; gc_work->count = count; delta_time = nfct_time_stamp - gc_work->start_time; /* re-sched immediately if total cycle time is exceeded */ next_run = delta_time < (s32)GC_SCAN_INTERVAL_MAX; goto early_exit; } if (nf_ct_is_expired(tmp)) { nf_ct_gc_expired(tmp); expired_count++; continue; } expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); expires = (expires - (long)next_run) / ++count; next_run += expires; if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; net = nf_ct_net(tmp); cnet = nf_ct_pernet(net); if (atomic_read(&cnet->count) < nf_conntrack_max95) continue; /* need to take reference to avoid possible races */ if (!refcount_inc_not_zero(&tmp->ct_general.use)) continue; /* load ->status after refcount increase */ smp_acquire__after_ctrl_dep(); if (gc_worker_skip_ct(tmp)) { nf_ct_put(tmp); continue; } if (gc_worker_can_early_drop(tmp)) { nf_ct_kill(tmp); expired_count++; } nf_ct_put(tmp); } /* could check get_nulls_value() here and restart if ct * was moved to another chain. But given gc is best-effort * we will just continue with next hash slot. */ rcu_read_unlock(); cond_resched(); i++; delta_time = nfct_time_stamp - end_time; if (delta_time > 0 && i < hashsz) { gc_work->avg_timeout = next_run; gc_work->count = count; gc_work->next_bucket = i; next_run = 0; goto early_exit; } } while (i < hashsz); gc_work->next_bucket = 0; next_run = clamp(next_run, GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_MAX); delta_time = max_t(s32, nfct_time_stamp - gc_work->start_time, 1); if (next_run > (unsigned long)delta_time) next_run -= delta_time; else next_run = 1; early_exit: if (gc_work->exiting) return; if (next_run) gc_work->early_drop = false; queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); gc_work->exiting = false; } static struct nf_conn * __nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp, u32 hash) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); unsigned int ct_count; struct nf_conn *ct; /* We don't want any race condition at early drop stage */ ct_count = atomic_inc_return(&cnet->count); if (nf_conntrack_max && unlikely(ct_count > nf_conntrack_max)) { if (!early_drop(net, hash)) { if (!conntrack_gc_work.early_drop) conntrack_gc_work.early_drop = true; atomic_dec(&cnet->count); if (net == &init_net) net_warn_ratelimited("nf_conntrack: table full, dropping packet\n"); else net_warn_ratelimited("nf_conntrack: table full in netns %u, dropping packet\n", net->ns.inum); return ERR_PTR(-ENOMEM); } } /* * Do not use kmem_cache_zalloc(), as this cache uses * SLAB_TYPESAFE_BY_RCU. */ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); if (ct == NULL) goto out; spin_lock_init(&ct->lock); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); memset_after(ct, 0, __nfct_init_offset); nf_ct_zone_add(ct, zone); /* Because we use RCU lookups, we set ct_general.use to zero before * this is inserted in any list. */ refcount_set(&ct->ct_general.use, 0); return ct; out: atomic_dec(&cnet->count); return ERR_PTR(-ENOMEM); } struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) { return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0); } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); struct nf_conntrack_net *cnet; /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU */ WARN_ON(refcount_read(&ct->ct_general.use) != 0); if (ct->status & IPS_SRC_NAT_DONE) { const struct nf_nat_hook *nat_hook; rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); if (nat_hook) nat_hook->remove_nat_bysrc(ct); rcu_read_unlock(); } kfree(ct->ext); kmem_cache_free(nf_conntrack_cachep, ct); cnet = nf_ct_pernet(net); smp_mb__before_atomic(); atomic_dec(&cnet->count); } EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, struct sk_buff *skb, unsigned int dataoff, u32 hash) { struct nf_conn *ct; struct nf_conn_help *help; struct nf_conntrack_tuple repl_tuple; #ifdef CONFIG_NF_CONNTRACK_EVENTS struct nf_conntrack_ecache *ecache; #endif struct nf_conntrack_expect *exp = NULL; const struct nf_conntrack_zone *zone; struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp; struct nf_conntrack_net *cnet; if (!nf_ct_invert_tuple(&repl_tuple, tuple)) return NULL; zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC, hash); if (IS_ERR(ct)) return ERR_CAST(ct); if (!nf_ct_add_synproxy(ct, tmpl)) { nf_conntrack_free(ct); return ERR_PTR(-ENOMEM); } timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL; if (timeout_ext) nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout), GFP_ATOMIC); nf_ct_acct_ext_add(ct, GFP_ATOMIC); nf_ct_tstamp_ext_add(ct, GFP_ATOMIC); nf_ct_labels_ext_add(ct); #ifdef CONFIG_NF_CONNTRACK_EVENTS ecache = tmpl ? nf_ct_ecache_find(tmpl) : NULL; if ((ecache || net->ct.sysctl_events) && !nf_ct_ecache_ext_add(ct, ecache ? ecache->ctmask : 0, ecache ? ecache->expmask : 0, GFP_ATOMIC)) { nf_conntrack_free(ct); return ERR_PTR(-ENOMEM); } #endif cnet = nf_ct_pernet(net); if (cnet->expect_count) { spin_lock_bh(&nf_conntrack_expect_lock); exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl)); if (exp) { /* Welcome, Mr. Bond. We've been expecting you... */ __set_bit(IPS_EXPECTED_BIT, &ct->status); /* exp->master safe, refcnt bumped in nf_ct_find_expectation */ ct->master = exp->master; if (exp->helper) { help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help) rcu_assign_pointer(help->helper, exp->helper); } #ifdef CONFIG_NF_CONNTRACK_MARK ct->mark = READ_ONCE(exp->master->mark); #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK ct->secmark = exp->master->secmark; #endif NF_CT_STAT_INC(net, expect_new); } spin_unlock_bh(&nf_conntrack_expect_lock); } if (!exp && tmpl) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); /* Other CPU might have obtained a pointer to this object before it was * released. Because refcount is 0, refcount_inc_not_zero() will fail. * * After refcount_set(1) it will succeed; ensure that zeroing of * ct->status and the correct ct->net pointer are visible; else other * core might observe CONFIRMED bit which means the entry is valid and * in the hash table, but its not (anymore). */ smp_wmb(); /* Now it is going to be associated with an sk_buff, set refcount to 1. */ refcount_set(&ct->ct_general.use, 1); if (exp) { if (exp->expectfn) exp->expectfn(ct, exp); nf_ct_expect_put(exp); } return &ct->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns 0, sets skb->_nfct | ctinfo */ static int resolve_normal_ct(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t protonum, const struct nf_hook_state *state) { const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; u32 hash, zone_id, rid; struct nf_conn *ct; if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, &tuple)) return 0; /* look for tuple match */ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL); hash = hash_conntrack_raw(&tuple, zone_id, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); if (!h) { rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY); if (zone_id != rid) { u32 tmp = hash_conntrack_raw(&tuple, rid, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp); } } if (!h) { h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); if (!h) return 0; if (IS_ERR(h)) return PTR_ERR(h); } ct = nf_ct_tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) { ctinfo = IP_CT_ESTABLISHED_REPLY; } else { unsigned long status = READ_ONCE(ct->status); /* Once we've had two way comms, always ESTABLISHED. */ if (likely(status & IPS_SEEN_REPLY)) ctinfo = IP_CT_ESTABLISHED; else if (status & IPS_EXPECTED) ctinfo = IP_CT_RELATED; else ctinfo = IP_CT_NEW; } nf_ct_set(skb, ct, ctinfo); return 0; } /* * icmp packets need special treatment to handle error messages that are * related to a connection. * * Callers need to check if skb has a conntrack assigned when this * helper returns; in such case skb belongs to an already known connection. */ static unsigned int __cold nf_conntrack_handle_icmp(struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u8 protonum, const struct nf_hook_state *state) { int ret; if (state->pf == NFPROTO_IPV4 && protonum == IPPROTO_ICMP) ret = nf_conntrack_icmpv4_error(tmpl, skb, dataoff, state); #if IS_ENABLED(CONFIG_IPV6) else if (state->pf == NFPROTO_IPV6 && protonum == IPPROTO_ICMPV6) ret = nf_conntrack_icmpv6_error(tmpl, skb, dataoff, state); #endif else return NF_ACCEPT; if (ret <= 0) NF_CT_STAT_INC_ATOMIC(state->net, error); return ret; } static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, enum ip_conntrack_info ctinfo) { const unsigned int *timeout = nf_ct_timeout_lookup(ct); if (!timeout) timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Returns verdict for packet, or -1 for invalid. */ static int nf_conntrack_handle_packet(struct nf_conn *ct, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, const struct nf_hook_state *state) { switch (nf_ct_protonum(ct)) { case IPPROTO_TCP: return nf_conntrack_tcp_packet(ct, skb, dataoff, ctinfo, state); case IPPROTO_UDP: return nf_conntrack_udp_packet(ct, skb, dataoff, ctinfo, state); case IPPROTO_ICMP: return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); #if IS_ENABLED(CONFIG_IPV6) case IPPROTO_ICMPV6: return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_UDPLITE case IPPROTO_UDPLITE: return nf_conntrack_udplite_packet(ct, skb, dataoff, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return nf_conntrack_sctp_packet(ct, skb, dataoff, ctinfo, state); #endif #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return nf_conntrack_gre_packet(ct, skb, dataoff, ctinfo, state); #endif } return generic_packet(ct, skb, ctinfo); } unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; int dataoff, ret; tmpl = nf_ct_get(skb, &ctinfo); if (tmpl || ctinfo == IP_CT_UNTRACKED) { /* Previously seen (loopback or untracked)? Ignore. */ if ((tmpl && !nf_ct_is_template(tmpl)) || ctinfo == IP_CT_UNTRACKED) return NF_ACCEPT; skb->_nfct = 0; } /* rcu_read_lock()ed by nf_hook_thresh */ dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum); if (dataoff <= 0) { NF_CT_STAT_INC_ATOMIC(state->net, invalid); ret = NF_ACCEPT; goto out; } if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) { ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff, protonum, state); if (ret <= 0) { ret = -ret; goto out; } /* ICMP[v6] protocol trackers may assign one conntrack. */ if (skb->_nfct) goto out; } repeat: ret = resolve_normal_ct(tmpl, skb, dataoff, protonum, state); if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(state->net, drop); ret = NF_DROP; goto out; } ct = nf_ct_get(skb, &ctinfo); if (!ct) { /* Not valid part of a connection */ NF_CT_STAT_INC_ATOMIC(state->net, invalid); ret = NF_ACCEPT; goto out; } ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ nf_ct_put(ct); skb->_nfct = 0; /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; NF_CT_STAT_INC_ATOMIC(state->net, invalid); if (ret == NF_DROP) NF_CT_STAT_INC_ATOMIC(state->net, drop); ret = -ret; goto out; } if (ctinfo == IP_CT_ESTABLISHED_REPLY && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_REPLY, ct); out: if (tmpl) nf_ct_put(tmpl); return ret; } EXPORT_SYMBOL_GPL(nf_conntrack_in); /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ void __nf_ct_refresh_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, u32 extra_jiffies, unsigned int bytes) { /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) goto acct; /* If not in hash table, timer will not be active yet */ if (nf_ct_is_confirmed(ct)) extra_jiffies += nfct_time_stamp; if (READ_ONCE(ct->timeout) != extra_jiffies) WRITE_ONCE(ct->timeout, extra_jiffies); acct: if (bytes) nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb) { nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len); return nf_ct_delete(ct, 0, 0); } EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #if IS_ENABLED(CONFIG_NF_CT_NETLINK) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/mutex.h> /* Generic function for tcp/udp/sctp/dccp and alike. */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { if (nla_put_be16(skb, CTA_PROTO_SRC_PORT, tuple->src.u.tcp.port) || nla_put_be16(skb, CTA_PROTO_DST_PORT, tuple->dst.u.tcp.port)) goto nla_put_failure; return 0; nla_put_failure: return -1; } EXPORT_SYMBOL_GPL(nf_ct_port_tuple_to_nlattr); const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = { [CTA_PROTO_SRC_PORT] = { .type = NLA_U16 }, [CTA_PROTO_DST_PORT] = { .type = NLA_U16 }, }; EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy); int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t, u_int32_t flags) { if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) { if (!tb[CTA_PROTO_SRC_PORT]) return -EINVAL; t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]); } if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) { if (!tb[CTA_PROTO_DST_PORT]) return -EINVAL; t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]); } return 0; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); unsigned int nf_ct_port_nlattr_tuple_size(void) { static unsigned int size __read_mostly; if (!size) size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); return size; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; /* This ICMP is in reverse direction to the packet which caused it */ ct = nf_ct_get(skb, &ctinfo); if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) ctinfo = IP_CT_RELATED_REPLY; else ctinfo = IP_CT_RELATED; /* Attach to new skbuff, and increment count */ nf_ct_set(nskb, ct, ctinfo); nf_conntrack_get(skb_nfct(nskb)); } /* This packet is coming from userspace via nf_queue, complete the packet * processing after the helper invocation in nf_confirm(). */ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { const struct nf_conntrack_helper *helper; const struct nf_conn_help *help; int protoff; help = nfct_help(ct); if (!help) return NF_ACCEPT; helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; if (!(helper->flags & NF_CT_HELPER_F_USERSPACE)) return NF_ACCEPT; switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: protoff = skb_network_offset(skb) + ip_hdrlen(skb); break; #if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: { __be16 frag_off; u8 pnum; pnum = ipv6_hdr(skb)->nexthdr; protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off); if (protoff < 0 || (frag_off & htons(~0x7)) != 0) return NF_ACCEPT; break; } #endif default: return NF_ACCEPT; } if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && !nf_is_loopback_packet(skb)) { if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; } } /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (!ct) return NF_ACCEPT; return nf_confirm_cthelper(skb, ct, ctinfo); } static bool nf_conntrack_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple, const struct sk_buff *skb) { const struct nf_conntrack_tuple *src_tuple; const struct nf_conntrack_tuple_hash *hash; struct nf_conntrack_tuple srctuple; enum ip_conntrack_info ctinfo; struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); if (ct) { src_tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo)); memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); return true; } if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), NFPROTO_IPV4, dev_net(skb->dev), &srctuple)) return false; hash = nf_conntrack_find_get(dev_net(skb->dev), &nf_ct_zone_dflt, &srctuple); if (!hash) return false; ct = nf_ct_tuplehash_to_ctrack(hash); src_tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir); memcpy(dst_tuple, src_tuple, sizeof(*dst_tuple)); nf_ct_put(ct); return true; } /* Bring out ya dead! */ static struct nf_conn * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct hlist_nulls_node *n; spinlock_t *lockp; for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket]; if (hlist_nulls_empty(hslot)) continue; lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; local_bh_disable(); nf_conntrack_lock(lockp); hlist_nulls_for_each_entry(h, n, hslot, hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) continue; /* All nf_conn objects are added to hash table twice, one * for original direction tuple, once for the reply tuple. * * Exception: In the IPS_NAT_CLASH case, only the reply * tuple is added (the original tuple already existed for * a different object). * * We only need to call the iterator once for each * conntrack, so we just use the 'reply' direction * tuple while iterating. */ ct = nf_ct_tuplehash_to_ctrack(h); if (iter_data->net && !net_eq(iter_data->net, nf_ct_net(ct))) continue; if (iter(ct, iter_data->data)) goto found; } spin_unlock(lockp); local_bh_enable(); cond_resched(); } return NULL; found: refcount_inc(&ct->ct_general.use); spin_unlock(lockp); local_bh_enable(); return ct; } static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data) { unsigned int bucket = 0; struct nf_conn *ct; might_sleep(); mutex_lock(&nf_conntrack_mutex); while ((ct = get_next_corpse(iter, iter_data, &bucket)) != NULL) { /* Time to push up daises... */ nf_ct_delete(ct, iter_data->portid, iter_data->report); nf_ct_put(ct); cond_resched(); } mutex_unlock(&nf_conntrack_mutex); } void nf_ct_iterate_cleanup_net(int (*iter)(struct nf_conn *i, void *data), const struct nf_ct_iter_data *iter_data) { struct net *net = iter_data->net; struct nf_conntrack_net *cnet = nf_ct_pernet(net); might_sleep(); if (atomic_read(&cnet->count) == 0) return; nf_ct_iterate_cleanup(iter, iter_data); } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); /** * nf_ct_iterate_destroy - destroy unconfirmed conntracks and iterate table * @iter: callback to invoke for each conntrack * @data: data to pass to @iter * * Like nf_ct_iterate_cleanup, but first marks conntracks on the * unconfirmed list as dying (so they will not be inserted into * main table). * * Can only be called in module exit path. */ void nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) { struct nf_ct_iter_data iter_data = {}; struct net *net; down_read(&net_rwsem); for_each_net(net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); if (atomic_read(&cnet->count) == 0) continue; nf_queue_nf_hook_drop(net); } up_read(&net_rwsem); /* Need to wait for netns cleanup worker to finish, if its * running -- it might have deleted a net namespace from * the global list, so hook drop above might not have * affected all namespaces. */ net_ns_barrier(); /* a skb w. unconfirmed conntrack could have been reinjected just * before we called nf_queue_nf_hook_drop(). * * This makes sure its inserted into conntrack table. */ synchronize_net(); nf_ct_ext_bump_genid(); iter_data.data = data; nf_ct_iterate_cleanup(iter, &iter_data); /* Another cpu might be in a rcu read section with * rcu protected pointer cleared in iter callback * or hidden via nf_ct_ext_bump_genid() above. * * Wait until those are done. */ synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_ct_iterate_destroy); static int kill_all(struct nf_conn *i, void *data) { return 1; } void nf_conntrack_cleanup_start(void) { cleanup_nf_conntrack_bpf(); conntrack_gc_work.exiting = true; } void nf_conntrack_cleanup_end(void) { RCU_INIT_POINTER(nf_ct_hook, NULL); cancel_delayed_work_sync(&conntrack_gc_work.dwork); kvfree(nf_conntrack_hash); nf_conntrack_proto_fini(); nf_conntrack_helper_fini(); nf_conntrack_expect_fini(); kmem_cache_destroy(nf_conntrack_cachep); } /* * Mishearing the voices in his head, our hero wonders how he's * supposed to kill the mall. */ void nf_conntrack_cleanup_net(struct net *net) { LIST_HEAD(single); list_add(&net->exit_list, &single); nf_conntrack_cleanup_net_list(&single); } void nf_conntrack_cleanup_net_list(struct list_head *net_exit_list) { struct nf_ct_iter_data iter_data = {}; struct net *net; int busy; /* * This makes sure all current packets have passed through * netfilter framework. Roll on, two-stage module * delete... */ synchronize_rcu_expedited(); i_see_dead_people: busy = 0; list_for_each_entry(net, net_exit_list, exit_list) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); iter_data.net = net; nf_ct_iterate_cleanup_net(kill_all, &iter_data); if (atomic_read(&cnet->count) != 0) busy = 1; } if (busy) { schedule(); goto i_see_dead_people; } list_for_each_entry(net, net_exit_list, exit_list) { nf_conntrack_ecache_pernet_fini(net); nf_conntrack_expect_pernet_fini(net); free_percpu(net->ct.stat); } } void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls) { struct hlist_nulls_head *hash; unsigned int nr_slots, i; if (*sizep > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); if (nr_slots > (INT_MAX / sizeof(struct hlist_nulls_head))) return NULL; hash = kvcalloc(nr_slots, sizeof(struct hlist_nulls_head), GFP_KERNEL); if (hash && nulls) for (i = 0; i < nr_slots; i++) INIT_HLIST_NULLS_HEAD(&hash[i], i); return hash; } EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable); int nf_conntrack_hash_resize(unsigned int hashsize) { int i, bucket; unsigned int old_size; struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; if (!hashsize) return -EINVAL; hash = nf_ct_alloc_hashtable(&hashsize, 1); if (!hash) return -ENOMEM; mutex_lock(&nf_conntrack_mutex); old_size = nf_conntrack_htable_size; if (old_size == hashsize) { mutex_unlock(&nf_conntrack_mutex); kvfree(hash); return 0; } local_bh_disable(); nf_conntrack_all_lock(); write_seqcount_begin(&nf_conntrack_generation); /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections * created because of a false negative won't make it into the hash * though since that required taking the locks. */ for (i = 0; i < nf_conntrack_htable_size; i++) { while (!hlist_nulls_empty(&nf_conntrack_hash[i])) { unsigned int zone_id; h = hlist_nulls_entry(nf_conntrack_hash[i].first, struct nf_conntrack_tuple_hash, hnnode); ct = nf_ct_tuplehash_to_ctrack(h); hlist_nulls_del_rcu(&h->hnnode); zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h)); bucket = __hash_conntrack(nf_ct_net(ct), &h->tuple, zone_id, hashsize); hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } old_hash = nf_conntrack_hash; nf_conntrack_hash = hash; nf_conntrack_htable_size = hashsize; write_seqcount_end(&nf_conntrack_generation); nf_conntrack_all_unlock(); local_bh_enable(); mutex_unlock(&nf_conntrack_mutex); synchronize_net(); kvfree(old_hash); return 0; } int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) { unsigned int hashsize; int rc; if (current->nsproxy->net_ns != &init_net) return -EOPNOTSUPP; /* On boot, we can set this without any fancy locking. */ if (!nf_conntrack_hash) return param_set_uint(val, kp); rc = kstrtouint(val, 0, &hashsize); if (rc) return rc; return nf_conntrack_hash_resize(hashsize); } int nf_conntrack_init_start(void) { unsigned long nr_pages = totalram_pages(); int max_factor = 8; int ret = -ENOMEM; int i; seqcount_spinlock_init(&nf_conntrack_generation, &nf_conntrack_locks_all_lock); for (i = 0; i < CONNTRACK_LOCKS; i++) spin_lock_init(&nf_conntrack_locks[i]); if (!nf_conntrack_htable_size) { nf_conntrack_htable_size = (((nr_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); if (BITS_PER_LONG >= 64 && nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) nf_conntrack_htable_size = 262144; else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) nf_conntrack_htable_size = 65536; if (nf_conntrack_htable_size < 1024) nf_conntrack_htable_size = 1024; /* Use a max. factor of one by default to keep the average * hash chain length at 2 entries. Each entry has to be added * twice (once for original direction, once for reply). * When a table size is given we use the old value of 8 to * avoid implicit reduction of the max entries setting. */ max_factor = 1; } nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); if (!nf_conntrack_hash) return -ENOMEM; nf_conntrack_max = max_factor * nf_conntrack_htable_size; nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), NFCT_INFOMASK + 1, SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); if (!nf_conntrack_cachep) goto err_cachep; ret = nf_conntrack_expect_init(); if (ret < 0) goto err_expect; ret = nf_conntrack_helper_init(); if (ret < 0) goto err_helper; ret = nf_conntrack_proto_init(); if (ret < 0) goto err_proto; conntrack_gc_work_init(&conntrack_gc_work); queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); ret = register_nf_conntrack_bpf(); if (ret < 0) goto err_kfunc; return 0; err_kfunc: cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_conntrack_proto_fini(); err_proto: nf_conntrack_helper_fini(); err_helper: nf_conntrack_expect_fini(); err_expect: kmem_cache_destroy(nf_conntrack_cachep); err_cachep: kvfree(nf_conntrack_hash); return ret; } static void nf_conntrack_set_closing(struct nf_conntrack *nfct) { struct nf_conn *ct = nf_ct_to_nf_conn(nfct); switch (nf_ct_protonum(ct)) { case IPPROTO_TCP: nf_conntrack_tcp_set_closing(ct); break; } } static const struct nf_ct_hook nf_conntrack_hook = { .update = nf_conntrack_update, .destroy = nf_ct_destroy, .get_tuple_skb = nf_conntrack_get_tuple_skb, .attach = nf_conntrack_attach, .set_closing = nf_conntrack_set_closing, .confirm = __nf_conntrack_confirm, .get_id = nf_conntrack_get_id, }; void nf_conntrack_init_end(void) { RCU_INIT_POINTER(nf_ct_hook, &nf_conntrack_hook); } /* * We need to use special "null" values, not used in hash table */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) int nf_conntrack_init_net(struct net *net) { struct nf_conntrack_net *cnet = nf_ct_pernet(net); int ret = -ENOMEM; BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER); BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS); atomic_set(&cnet->count, 0); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) return ret; ret = nf_conntrack_expect_pernet_init(net); if (ret < 0) goto err_expect; nf_conntrack_acct_pernet_init(net); nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); nf_conntrack_proto_pernet_init(net); return 0; err_expect: free_percpu(net->ct.stat); return ret; } /* ctnetlink code shared by both ctnetlink and nf_conntrack_bpf */ int __nf_ct_change_timeout(struct nf_conn *ct, u64 timeout) { if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) return -EPERM; __nf_ct_set_timeout(ct, timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; return 0; } EXPORT_SYMBOL_GPL(__nf_ct_change_timeout); void __nf_ct_change_status(struct nf_conn *ct, unsigned long on, unsigned long off) { unsigned int bit; /* Ignore these unchangable bits */ on &= ~IPS_UNCHANGEABLE_MASK; off &= ~IPS_UNCHANGEABLE_MASK; for (bit = 0; bit < __IPS_MAX_BIT; bit++) { if (on & (1 << bit)) set_bit(bit, &ct->status); else if (off & (1 << bit)) clear_bit(bit, &ct->status); } } EXPORT_SYMBOL_GPL(__nf_ct_change_status); int nf_ct_change_status_common(struct nf_conn *ct, unsigned int status) { unsigned long d; d = ct->status ^ status; if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) /* unchangeable */ return -EBUSY; if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) /* SEEN_REPLY bit can only be set */ return -EBUSY; if (d & IPS_ASSURED && !(status & IPS_ASSURED)) /* ASSURED bit can only be set */ return -EBUSY; __nf_ct_change_status(ct, status, 0); return 0; } EXPORT_SYMBOL_GPL(nf_ct_change_status_common);
10 10 10 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2019 Hammerspace Inc */ #include <linux/module.h> #include <linux/kobject.h> #include <linux/sysfs.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/string.h> #include <linux/nfs_fs.h> #include <linux/rcupdate.h> #include <linux/lockd/lockd.h> #include "internal.h" #include "nfs4_fs.h" #include "netns.h" #include "sysfs.h" static struct kset *nfs_kset; static void nfs_kset_release(struct kobject *kobj) { struct kset *kset = container_of(kobj, struct kset, kobj); kfree(kset); } static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type( const struct kobject *kobj) { return &net_ns_type_operations; } static struct kobj_type nfs_kset_type = { .release = nfs_kset_release, .sysfs_ops = &kobj_sysfs_ops, .child_ns_type = nfs_netns_object_child_ns_type, }; int nfs_sysfs_init(void) { int ret; nfs_kset = kzalloc(sizeof(*nfs_kset), GFP_KERNEL); if (!nfs_kset) return -ENOMEM; ret = kobject_set_name(&nfs_kset->kobj, "nfs"); if (ret) { kfree(nfs_kset); return ret; } nfs_kset->kobj.parent = fs_kobj; nfs_kset->kobj.ktype = &nfs_kset_type; nfs_kset->kobj.kset = NULL; ret = kset_register(nfs_kset); if (ret) { kfree(nfs_kset); return ret; } return 0; } void nfs_sysfs_exit(void) { kset_unregister(nfs_kset); } static ssize_t nfs_netns_identifier_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, kobject); ssize_t ret; rcu_read_lock(); ret = sysfs_emit(buf, "%s\n", rcu_dereference(c->identifier)); rcu_read_unlock(); return ret; } /* Strip trailing '\n' */ static size_t nfs_string_strip(const char *c, size_t len) { while (len > 0 && c[len-1] == '\n') --len; return len; } static ssize_t nfs_netns_identifier_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, kobject); const char *old; char *p; size_t len; len = nfs_string_strip(buf, min_t(size_t, count, CONTAINER_ID_MAXLEN)); if (!len) return 0; p = kmemdup_nul(buf, len, GFP_KERNEL); if (!p) return -ENOMEM; old = rcu_dereference_protected(xchg(&c->identifier, (char __rcu *)p), 1); if (old) { synchronize_rcu(); kfree(old); } return count; } static void nfs_netns_client_release(struct kobject *kobj) { struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, kobject); kfree(rcu_dereference_raw(c->identifier)); } static const void *nfs_netns_client_namespace(const struct kobject *kobj) { return container_of(kobj, struct nfs_netns_client, kobject)->net; } static struct kobj_attribute nfs_netns_client_id = __ATTR(identifier, 0644, nfs_netns_identifier_show, nfs_netns_identifier_store); static struct attribute *nfs_netns_client_attrs[] = { &nfs_netns_client_id.attr, NULL, }; ATTRIBUTE_GROUPS(nfs_netns_client); static struct kobj_type nfs_netns_client_type = { .release = nfs_netns_client_release, .default_groups = nfs_netns_client_groups, .sysfs_ops = &kobj_sysfs_ops, .namespace = nfs_netns_client_namespace, }; static void nfs_netns_object_release(struct kobject *kobj) { struct nfs_netns_client *c = container_of(kobj, struct nfs_netns_client, nfs_net_kobj); kfree(c); } static const void *nfs_netns_namespace(const struct kobject *kobj) { return container_of(kobj, struct nfs_netns_client, nfs_net_kobj)->net; } static struct kobj_type nfs_netns_object_type = { .release = nfs_netns_object_release, .sysfs_ops = &kobj_sysfs_ops, .namespace = nfs_netns_namespace, }; static struct nfs_netns_client *nfs_netns_client_alloc(struct kobject *parent, struct net *net) { struct nfs_netns_client *p; p = kzalloc(sizeof(*p), GFP_KERNEL); if (p) { p->net = net; p->kobject.kset = nfs_kset; p->nfs_net_kobj.kset = nfs_kset; if (kobject_init_and_add(&p->nfs_net_kobj, &nfs_netns_object_type, parent, "net") != 0) { kobject_put(&p->nfs_net_kobj); return NULL; } if (kobject_init_and_add(&p->kobject, &nfs_netns_client_type, &p->nfs_net_kobj, "nfs_client") == 0) return p; kobject_put(&p->kobject); } return NULL; } void nfs_netns_sysfs_setup(struct nfs_net *netns, struct net *net) { struct nfs_netns_client *clp; clp = nfs_netns_client_alloc(&nfs_kset->kobj, net); if (clp) { netns->nfs_client = clp; kobject_uevent(&clp->kobject, KOBJ_ADD); } } void nfs_netns_sysfs_destroy(struct nfs_net *netns) { struct nfs_netns_client *clp = netns->nfs_client; if (clp) { kobject_uevent(&clp->kobject, KOBJ_REMOVE); kobject_del(&clp->kobject); kobject_put(&clp->kobject); kobject_del(&clp->nfs_net_kobj); kobject_put(&clp->nfs_net_kobj); netns->nfs_client = NULL; } } static bool shutdown_match_client(const struct rpc_task *task, const void *data) { return true; } static void shutdown_client(struct rpc_clnt *clnt) { clnt->cl_shutdown = 1; rpc_cancel_tasks(clnt, -EIO, shutdown_match_client, NULL); } /* * Shut down the nfs_client only once all the superblocks * have been shut down. */ static void shutdown_nfs_client(struct nfs_client *clp) { struct nfs_server *server; rcu_read_lock(); list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { if (!(server->flags & NFS_MOUNT_SHUTDOWN)) { rcu_read_unlock(); return; } } rcu_read_unlock(); nfs_mark_client_ready(clp, -EIO); shutdown_client(clp->cl_rpcclient); } static ssize_t shutdown_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); bool shutdown = server->flags & NFS_MOUNT_SHUTDOWN; return sysfs_emit(buf, "%d\n", shutdown); } static ssize_t shutdown_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { struct nfs_server *server; int ret, val; server = container_of(kobj, struct nfs_server, kobj); ret = kstrtoint(buf, 0, &val); if (ret) return ret; if (val != 1) return -EINVAL; /* already shut down? */ if (server->flags & NFS_MOUNT_SHUTDOWN) goto out; server->flags |= NFS_MOUNT_SHUTDOWN; shutdown_client(server->client); if (!IS_ERR(server->client_acl)) shutdown_client(server->client_acl); if (server->nlm_host) shutdown_client(server->nlm_host->h_rpcclnt); out: shutdown_nfs_client(server->nfs_client); return count; } static struct kobj_attribute nfs_sysfs_attr_shutdown = __ATTR_RW(shutdown); #if IS_ENABLED(CONFIG_NFS_V4_1) static ssize_t implid_domain_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid; if (!impl_id || strlen(impl_id->domain) == 0) return 0; //sysfs_emit(buf, ""); return sysfs_emit(buf, "%s\n", impl_id->domain); } static struct kobj_attribute nfs_sysfs_attr_implid_domain = __ATTR_RO(implid_domain); static ssize_t implid_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); struct nfs41_impl_id *impl_id = server->nfs_client->cl_implid; if (!impl_id || strlen(impl_id->name) == 0) return 0; //sysfs_emit(buf, ""); return sysfs_emit(buf, "%s\n", impl_id->name); } static struct kobj_attribute nfs_sysfs_attr_implid_name = __ATTR_RO(implid_name); #endif /* IS_ENABLED(CONFIG_NFS_V4_1) */ #define RPC_CLIENT_NAME_SIZE 64 void nfs_sysfs_link_rpc_client(struct nfs_server *server, struct rpc_clnt *clnt, const char *uniq) { char name[RPC_CLIENT_NAME_SIZE]; int ret; strscpy(name, clnt->cl_program->name, sizeof(name)); strncat(name, uniq ? uniq : "", sizeof(name) - strlen(name) - 1); strncat(name, "_client", sizeof(name) - strlen(name) - 1); ret = sysfs_create_link_nowarn(&server->kobj, &clnt->cl_sysfs->kobject, name); if (ret < 0) pr_warn("NFS: can't create link to %s in sysfs (%d)\n", name, ret); } EXPORT_SYMBOL_GPL(nfs_sysfs_link_rpc_client); static void nfs_sysfs_sb_release(struct kobject *kobj) { /* no-op: why? see lib/kobject.c kobject_cleanup() */ } static const void *nfs_netns_server_namespace(const struct kobject *kobj) { return container_of(kobj, struct nfs_server, kobj)->nfs_client->cl_net; } static struct kobj_type nfs_sb_ktype = { .release = nfs_sysfs_sb_release, .sysfs_ops = &kobj_sysfs_ops, .namespace = nfs_netns_server_namespace, .child_ns_type = nfs_netns_object_child_ns_type, }; #if IS_ENABLED(CONFIG_NFS_V4_1) static void nfs_sysfs_add_nfsv41_server(struct nfs_server *server) { int ret; if (!server->nfs_client->cl_implid) return; ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_domain.attr, nfs_netns_server_namespace(&server->kobj)); if (ret < 0) pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", server->s_sysfs_id, ret); ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_implid_name.attr, nfs_netns_server_namespace(&server->kobj)); if (ret < 0) pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", server->s_sysfs_id, ret); } #else /* CONFIG_NFS_V4_1 */ static inline void nfs_sysfs_add_nfsv41_server(struct nfs_server *server) { } #endif /* CONFIG_NFS_V4_1 */ #if IS_ENABLED(CONFIG_NFS_LOCALIO) static ssize_t localio_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct nfs_server *server = container_of(kobj, struct nfs_server, kobj); bool localio = nfs_server_is_local(server->nfs_client); return sysfs_emit(buf, "%d\n", localio); } static struct kobj_attribute nfs_sysfs_attr_localio = __ATTR_RO(localio); static void nfs_sysfs_add_nfs_localio_server(struct nfs_server *server) { int ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_localio.attr, nfs_netns_server_namespace(&server->kobj)); if (ret < 0) pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", server->s_sysfs_id, ret); } #else static inline void nfs_sysfs_add_nfs_localio_server(struct nfs_server *server) { } #endif /* IS_ENABLED(CONFIG_NFS_LOCALIO) */ void nfs_sysfs_add_server(struct nfs_server *server) { int ret; ret = kobject_init_and_add(&server->kobj, &nfs_sb_ktype, &nfs_kset->kobj, "server-%d", server->s_sysfs_id); if (ret < 0) { pr_warn("NFS: nfs sysfs add server-%d failed (%d)\n", server->s_sysfs_id, ret); return; } ret = sysfs_create_file_ns(&server->kobj, &nfs_sysfs_attr_shutdown.attr, nfs_netns_server_namespace(&server->kobj)); if (ret < 0) pr_warn("NFS: sysfs_create_file_ns for server-%d failed (%d)\n", server->s_sysfs_id, ret); nfs_sysfs_add_nfsv41_server(server); nfs_sysfs_add_nfs_localio_server(server); } EXPORT_SYMBOL_GPL(nfs_sysfs_add_server); void nfs_sysfs_move_server_to_sb(struct super_block *s) { struct nfs_server *server = s->s_fs_info; int ret; ret = kobject_rename(&server->kobj, s->s_id); if (ret < 0) pr_warn("NFS: rename sysfs %s failed (%d)\n", server->kobj.name, ret); } void nfs_sysfs_move_sb_to_server(struct nfs_server *server) { const char *s; int ret = -ENOMEM; s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id); if (s) { ret = kobject_rename(&server->kobj, s); kfree(s); } if (ret < 0) pr_warn("NFS: rename sysfs %s failed (%d)\n", server->kobj.name, ret); } /* unlink, not dec-ref */ void nfs_sysfs_remove_server(struct nfs_server *server) { kobject_del(&server->kobj); }
36 30 2 2 3 65 64 65 65 111 1 2 2 108 5 2 1 1 64 1 1 1 1 120 114 5 5 125 110 1 106 2 1 3 13 120 2 4 6 6 2 117 1 1 116 6 6 6 1 1 35 36 1 38 37 37 1 22 21 12 4 4 18 1 68 68 68 69 69 55 2 2 1 4 3 1 1 1 1 2 58 1 62 35 33 1 35 34 35 21 15 35 35 5 5 5 5 5 5 33 34 34 33 17 18 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 // SPDX-License-Identifier: GPL-2.0 /* * This file contains helper code to handle channel * settings and keeping track of what is possible at * any point in time. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2018-2025 Intel Corporation */ #include <linux/export.h> #include <linux/bitfield.h> #include <net/cfg80211.h> #include "core.h" #include "rdev-ops.h" static bool cfg80211_valid_60g_freq(u32 freq) { return freq >= 58320 && freq <= 70200; } void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, enum nl80211_channel_type chan_type) { if (WARN_ON(!chan)) return; *chandef = (struct cfg80211_chan_def) { .chan = chan, .freq1_offset = chan->freq_offset, }; switch (chan_type) { case NL80211_CHAN_NO_HT: chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = chan->center_freq; break; case NL80211_CHAN_HT20: chandef->width = NL80211_CHAN_WIDTH_20; chandef->center_freq1 = chan->center_freq; break; case NL80211_CHAN_HT40PLUS: chandef->width = NL80211_CHAN_WIDTH_40; chandef->center_freq1 = chan->center_freq + 10; break; case NL80211_CHAN_HT40MINUS: chandef->width = NL80211_CHAN_WIDTH_40; chandef->center_freq1 = chan->center_freq - 10; break; default: WARN_ON(1); } } EXPORT_SYMBOL(cfg80211_chandef_create); static u32 cfg80211_get_start_freq(const struct cfg80211_chan_def *chandef, u32 cf) { u32 start_freq, center_freq, bandwidth; center_freq = MHZ_TO_KHZ((cf == 1) ? chandef->center_freq1 : chandef->center_freq2); bandwidth = MHZ_TO_KHZ(cfg80211_chandef_get_width(chandef)); if (bandwidth <= MHZ_TO_KHZ(20)) start_freq = center_freq; else start_freq = center_freq - bandwidth / 2 + MHZ_TO_KHZ(10); return start_freq; } static u32 cfg80211_get_end_freq(const struct cfg80211_chan_def *chandef, u32 cf) { u32 end_freq, center_freq, bandwidth; center_freq = MHZ_TO_KHZ((cf == 1) ? chandef->center_freq1 : chandef->center_freq2); bandwidth = MHZ_TO_KHZ(cfg80211_chandef_get_width(chandef)); if (bandwidth <= MHZ_TO_KHZ(20)) end_freq = center_freq; else end_freq = center_freq + bandwidth / 2 - MHZ_TO_KHZ(10); return end_freq; } #define for_each_subchan(chandef, freq, cf) \ for (u32 punctured = chandef->punctured, \ cf = 1, freq = cfg80211_get_start_freq(chandef, cf); \ freq <= cfg80211_get_end_freq(chandef, cf); \ freq += MHZ_TO_KHZ(20), \ ((cf == 1 && chandef->center_freq2 != 0 && \ freq > cfg80211_get_end_freq(chandef, cf)) ? \ (cf++, freq = cfg80211_get_start_freq(chandef, cf), \ punctured = 0) : (punctured >>= 1))) \ if (!(punctured & 1)) struct cfg80211_per_bw_puncturing_values { u8 len; const u16 *valid_values; }; static const u16 puncturing_values_80mhz[] = { 0x8, 0x4, 0x2, 0x1 }; static const u16 puncturing_values_160mhz[] = { 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3 }; static const u16 puncturing_values_320mhz[] = { 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00, 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f, 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f }; #define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \ { \ .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \ .valid_values = puncturing_values_ ## _bw ## mhz \ } static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = { CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80), CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160), CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320) }; static bool valid_puncturing_bitmap(const struct cfg80211_chan_def *chandef) { u32 idx, i, start_freq, primary_center = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_80: idx = 0; start_freq = chandef->center_freq1 - 40; break; case NL80211_CHAN_WIDTH_160: idx = 1; start_freq = chandef->center_freq1 - 80; break; case NL80211_CHAN_WIDTH_320: idx = 2; start_freq = chandef->center_freq1 - 160; break; default: return chandef->punctured == 0; } if (!chandef->punctured) return true; /* check if primary channel is punctured */ if (chandef->punctured & (u16)BIT((primary_center - start_freq) / 20)) return false; for (i = 0; i < per_bw_puncturing[idx].len; i++) { if (per_bw_puncturing[idx].valid_values[i] == chandef->punctured) return true; } return false; } static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) { int max_contiguous = 0; int num_of_enabled = 0; int contiguous = 0; int i; if (!chandef->edmg.channels || !chandef->edmg.bw_config) return false; if (!cfg80211_valid_60g_freq(chandef->chan->center_freq)) return false; for (i = 0; i < 6; i++) { if (chandef->edmg.channels & BIT(i)) { contiguous++; num_of_enabled++; } else { contiguous = 0; } max_contiguous = max(contiguous, max_contiguous); } /* basic verification of edmg configuration according to * IEEE P802.11ay/D4.0 section 9.4.2.251 */ /* check bw_config against contiguous edmg channels */ switch (chandef->edmg.bw_config) { case IEEE80211_EDMG_BW_CONFIG_4: case IEEE80211_EDMG_BW_CONFIG_8: case IEEE80211_EDMG_BW_CONFIG_12: if (max_contiguous < 1) return false; break; case IEEE80211_EDMG_BW_CONFIG_5: case IEEE80211_EDMG_BW_CONFIG_9: case IEEE80211_EDMG_BW_CONFIG_13: if (max_contiguous < 2) return false; break; case IEEE80211_EDMG_BW_CONFIG_6: case IEEE80211_EDMG_BW_CONFIG_10: case IEEE80211_EDMG_BW_CONFIG_14: if (max_contiguous < 3) return false; break; case IEEE80211_EDMG_BW_CONFIG_7: case IEEE80211_EDMG_BW_CONFIG_11: case IEEE80211_EDMG_BW_CONFIG_15: if (max_contiguous < 4) return false; break; default: return false; } /* check bw_config against aggregated (non contiguous) edmg channels */ switch (chandef->edmg.bw_config) { case IEEE80211_EDMG_BW_CONFIG_4: case IEEE80211_EDMG_BW_CONFIG_5: case IEEE80211_EDMG_BW_CONFIG_6: case IEEE80211_EDMG_BW_CONFIG_7: break; case IEEE80211_EDMG_BW_CONFIG_8: case IEEE80211_EDMG_BW_CONFIG_9: case IEEE80211_EDMG_BW_CONFIG_10: case IEEE80211_EDMG_BW_CONFIG_11: if (num_of_enabled < 2) return false; break; case IEEE80211_EDMG_BW_CONFIG_12: case IEEE80211_EDMG_BW_CONFIG_13: case IEEE80211_EDMG_BW_CONFIG_14: case IEEE80211_EDMG_BW_CONFIG_15: if (num_of_enabled < 4 || max_contiguous < 2) return false; break; default: return false; } return true; } int nl80211_chan_width_to_mhz(enum nl80211_chan_width chan_width) { int mhz; switch (chan_width) { case NL80211_CHAN_WIDTH_1: mhz = 1; break; case NL80211_CHAN_WIDTH_2: mhz = 2; break; case NL80211_CHAN_WIDTH_4: mhz = 4; break; case NL80211_CHAN_WIDTH_8: mhz = 8; break; case NL80211_CHAN_WIDTH_16: mhz = 16; break; case NL80211_CHAN_WIDTH_5: mhz = 5; break; case NL80211_CHAN_WIDTH_10: mhz = 10; break; case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: mhz = 20; break; case NL80211_CHAN_WIDTH_40: mhz = 40; break; case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_80: mhz = 80; break; case NL80211_CHAN_WIDTH_160: mhz = 160; break; case NL80211_CHAN_WIDTH_320: mhz = 320; break; default: WARN_ON_ONCE(1); return -1; } return mhz; } EXPORT_SYMBOL(nl80211_chan_width_to_mhz); static bool cfg80211_valid_center_freq(u32 center, enum nl80211_chan_width width) { int bw; int step; /* We only do strict verification on 6 GHz */ if (center < 5955 || center > 7115) return true; bw = nl80211_chan_width_to_mhz(width); if (bw < 0) return false; /* Validate that the channels bw is entirely within the 6 GHz band */ if (center - bw / 2 < 5945 || center + bw / 2 > 7125) return false; /* With 320 MHz the permitted channels overlap */ if (bw == 320) step = 160; else step = bw; /* * Valid channels are packed from lowest frequency towards higher ones. * So test that the lower frequency aligns with one of these steps. */ return (center - bw / 2 - 5945) % step == 0; } bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq, oper_freq; int oper_width, control_width; if (!chandef->chan) return false; if (chandef->freq1_offset >= 1000) return false; control_freq = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: if (ieee80211_chandef_to_khz(chandef) != ieee80211_channel_to_khz(chandef->chan)) return false; if (chandef->center_freq2) return false; break; case NL80211_CHAN_WIDTH_1: case NL80211_CHAN_WIDTH_2: case NL80211_CHAN_WIDTH_4: case NL80211_CHAN_WIDTH_8: case NL80211_CHAN_WIDTH_16: if (chandef->chan->band != NL80211_BAND_S1GHZ) return false; control_freq = ieee80211_channel_to_khz(chandef->chan); oper_freq = ieee80211_chandef_to_khz(chandef); control_width = nl80211_chan_width_to_mhz( ieee80211_s1g_channel_width( chandef->chan)); oper_width = cfg80211_chandef_get_width(chandef); if (oper_width < 0 || control_width < 0) return false; if (chandef->center_freq2) return false; if (control_freq + MHZ_TO_KHZ(control_width) / 2 > oper_freq + MHZ_TO_KHZ(oper_width) / 2) return false; if (control_freq - MHZ_TO_KHZ(control_width) / 2 < oper_freq - MHZ_TO_KHZ(oper_width) / 2) return false; break; case NL80211_CHAN_WIDTH_80P80: if (!chandef->center_freq2) return false; /* adjacent is not allowed -- that's a 160 MHz channel */ if (chandef->center_freq1 - chandef->center_freq2 == 80 || chandef->center_freq2 - chandef->center_freq1 == 80) return false; break; default: if (chandef->center_freq2) return false; break; } switch (chandef->width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: case NL80211_CHAN_WIDTH_20: case NL80211_CHAN_WIDTH_20_NOHT: case NL80211_CHAN_WIDTH_1: case NL80211_CHAN_WIDTH_2: case NL80211_CHAN_WIDTH_4: case NL80211_CHAN_WIDTH_8: case NL80211_CHAN_WIDTH_16: /* all checked above */ break; case NL80211_CHAN_WIDTH_320: if (chandef->center_freq1 == control_freq + 150 || chandef->center_freq1 == control_freq + 130 || chandef->center_freq1 == control_freq + 110 || chandef->center_freq1 == control_freq + 90 || chandef->center_freq1 == control_freq - 90 || chandef->center_freq1 == control_freq - 110 || chandef->center_freq1 == control_freq - 130 || chandef->center_freq1 == control_freq - 150) break; fallthrough; case NL80211_CHAN_WIDTH_160: if (chandef->center_freq1 == control_freq + 70 || chandef->center_freq1 == control_freq + 50 || chandef->center_freq1 == control_freq - 50 || chandef->center_freq1 == control_freq - 70) break; fallthrough; case NL80211_CHAN_WIDTH_80P80: case NL80211_CHAN_WIDTH_80: if (chandef->center_freq1 == control_freq + 30 || chandef->center_freq1 == control_freq - 30) break; fallthrough; case NL80211_CHAN_WIDTH_40: if (chandef->center_freq1 == control_freq + 10 || chandef->center_freq1 == control_freq - 10) break; fallthrough; default: return false; } if (!cfg80211_valid_center_freq(chandef->center_freq1, chandef->width)) return false; if (chandef->width == NL80211_CHAN_WIDTH_80P80 && !cfg80211_valid_center_freq(chandef->center_freq2, chandef->width)) return false; /* channel 14 is only for IEEE 802.11b */ if (chandef->center_freq1 == 2484 && chandef->width != NL80211_CHAN_WIDTH_20_NOHT) return false; if (cfg80211_chandef_is_edmg(chandef) && !cfg80211_edmg_chandef_valid(chandef)) return false; return valid_puncturing_bitmap(chandef); } EXPORT_SYMBOL(cfg80211_chandef_valid); int cfg80211_chandef_primary(const struct cfg80211_chan_def *c, enum nl80211_chan_width primary_chan_width, u16 *punctured) { int pri_width = nl80211_chan_width_to_mhz(primary_chan_width); int width = cfg80211_chandef_get_width(c); u32 control = c->chan->center_freq; u32 center = c->center_freq1; u16 _punct = 0; if (WARN_ON_ONCE(pri_width < 0 || width < 0)) return -1; /* not intended to be called this way, can't determine */ if (WARN_ON_ONCE(pri_width > width)) return -1; if (!punctured) punctured = &_punct; *punctured = c->punctured; while (width > pri_width) { unsigned int bits_to_drop = width / 20 / 2; if (control > center) { center += width / 4; *punctured >>= bits_to_drop; } else { center -= width / 4; *punctured &= (1 << bits_to_drop) - 1; } width /= 2; } return center; } EXPORT_SYMBOL(cfg80211_chandef_primary); static const struct cfg80211_chan_def * check_chandef_primary_compat(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2, enum nl80211_chan_width primary_chan_width) { u16 punct_c1 = 0, punct_c2 = 0; /* check primary is compatible -> error if not */ if (cfg80211_chandef_primary(c1, primary_chan_width, &punct_c1) != cfg80211_chandef_primary(c2, primary_chan_width, &punct_c2)) return ERR_PTR(-EINVAL); if (punct_c1 != punct_c2) return ERR_PTR(-EINVAL); /* assumes c1 is smaller width, if that was just checked -> done */ if (c1->width == primary_chan_width) return c2; /* otherwise continue checking the next width */ return NULL; } static const struct cfg80211_chan_def * _cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) { const struct cfg80211_chan_def *ret; /* If they are identical, return */ if (cfg80211_chandef_identical(c1, c2)) return c2; /* otherwise, must have same control channel */ if (c1->chan != c2->chan) return NULL; /* * If they have the same width, but aren't identical, * then they can't be compatible. */ if (c1->width == c2->width) return NULL; /* * can't be compatible if one of them is 5/10 MHz or S1G * but they don't have the same width. */ #define NARROW_OR_S1G(width) ((width) == NL80211_CHAN_WIDTH_5 || \ (width) == NL80211_CHAN_WIDTH_10 || \ (width) == NL80211_CHAN_WIDTH_1 || \ (width) == NL80211_CHAN_WIDTH_2 || \ (width) == NL80211_CHAN_WIDTH_4 || \ (width) == NL80211_CHAN_WIDTH_8 || \ (width) == NL80211_CHAN_WIDTH_16) if (NARROW_OR_S1G(c1->width) || NARROW_OR_S1G(c2->width)) return NULL; /* * Make sure that c1 is always the narrower one, so that later * we either return NULL or c2 and don't have to check both * directions. */ if (c1->width > c2->width) swap(c1, c2); /* * No further checks needed if the "narrower" one is only 20 MHz. * Here "narrower" includes being a 20 MHz non-HT channel vs. a * 20 MHz HT (or later) one. */ if (c1->width <= NL80211_CHAN_WIDTH_20) return c2; ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_40); if (ret) return ret; ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_80); if (ret) return ret; /* * If c1 is 80+80, then c2 is 160 or higher, but that cannot * match. If c2 was also 80+80 it was already either accepted * or rejected above (identical or not, respectively.) */ if (c1->width == NL80211_CHAN_WIDTH_80P80) return NULL; ret = check_chandef_primary_compat(c1, c2, NL80211_CHAN_WIDTH_160); if (ret) return ret; /* * Getting here would mean they're both wider than 160, have the * same primary 160, but are not identical - this cannot happen * since they must be 320 (no wider chandefs exist, at least yet.) */ WARN_ON_ONCE(1); return NULL; } const struct cfg80211_chan_def * cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, const struct cfg80211_chan_def *c2) { const struct cfg80211_chan_def *ret; ret = _cfg80211_chandef_compatible(c1, c2); if (IS_ERR(ret)) return NULL; return ret; } EXPORT_SYMBOL(cfg80211_chandef_compatible); void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state) { struct ieee80211_channel *c; int width; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return; width = cfg80211_chandef_get_width(chandef); if (width < 0) return; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c || !(c->flags & IEEE80211_CHAN_RADAR)) continue; c->dfs_state = dfs_state; c->dfs_state_entered = jiffies; } } static bool cfg80211_dfs_permissive_check_wdev(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, struct wireless_dev *wdev, struct ieee80211_channel *chan) { unsigned int link_id; for_each_valid_link(wdev, link_id) { struct ieee80211_channel *other_chan = NULL; struct cfg80211_chan_def chandef = {}; int ret; /* In order to avoid daisy chaining only allow BSS STA */ if (wdev->iftype != NL80211_IFTYPE_STATION || !wdev->links[link_id].client.current_bss) continue; other_chan = wdev->links[link_id].client.current_bss->pub.channel; if (!other_chan) continue; if (chan == other_chan) return true; /* continue if we can't get the channel */ ret = rdev_get_channel(rdev, wdev, link_id, &chandef); if (ret) continue; if (cfg80211_is_sub_chan(&chandef, chan, false)) return true; } return false; } /* * Check if P2P GO is allowed to operate on a DFS channel */ static bool cfg80211_dfs_permissive_chan(struct wiphy *wiphy, enum nl80211_iftype iftype, struct ieee80211_channel *chan) { struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_held(&rdev->wiphy.mtx); if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_DFS_CONCURRENT) || !(chan->flags & IEEE80211_CHAN_DFS_CONCURRENT)) return false; /* only valid for P2P GO */ if (iftype != NL80211_IFTYPE_P2P_GO) return false; /* * Allow only if there's a concurrent BSS */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { bool ret = cfg80211_dfs_permissive_check_wdev(rdev, iftype, wdev, chan); if (ret) return ret; } return false; } static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { struct ieee80211_channel *c; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return -EINVAL; if (c->flags & IEEE80211_CHAN_RADAR && !cfg80211_dfs_permissive_chan(wiphy, iftype, c)) return 1; } return 0; } int cfg80211_chandef_dfs_required(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype) { int width; int ret; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return -EINVAL; switch (iftype) { case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: width = cfg80211_chandef_get_width(chandef); if (width < 0) return -EINVAL; ret = cfg80211_get_chans_dfs_required(wiphy, chandef, iftype); return (ret > 0) ? BIT(chandef->width) : ret; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: WARN_ON(1); } return 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_required); bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { struct ieee80211_channel *c; int width, count = 0; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; width = cfg80211_chandef_get_width(chandef); if (width < 0) return false; /* * Check entire range of channels for the bandwidth. * Check all channels are DFS channels (DFS_USABLE or * DFS_AVAILABLE). Return number of usable channels * (require CAC). Allow DFS and non-DFS channel mix. */ for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; if (c->flags & IEEE80211_CHAN_DISABLED) return false; if (c->flags & IEEE80211_CHAN_RADAR) { if (c->dfs_state == NL80211_DFS_UNAVAILABLE) return false; if (c->dfs_state == NL80211_DFS_USABLE) count++; } } return count > 0; } EXPORT_SYMBOL(cfg80211_chandef_dfs_usable); /* * Checks if center frequency of chan falls with in the bandwidth * range of chandef. */ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, bool primary_only) { int width; u32 freq; if (!chandef->chan) return false; if (chandef->chan->center_freq == chan->center_freq) return true; if (primary_only) return false; width = cfg80211_chandef_get_width(chandef); if (width <= 20) return false; for (freq = chandef->center_freq1 - width / 2 + 10; freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { if (chan->center_freq == freq) return true; } if (!chandef->center_freq2) return false; for (freq = chandef->center_freq2 - width / 2 + 10; freq <= chandef->center_freq2 + width / 2 - 10; freq += 20) { if (chan->center_freq == freq) return true; } return false; } bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) { unsigned int link; lockdep_assert_wiphy(wdev->wiphy); switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: for_each_valid_link(wdev, link) { if (wdev->links[link].ap.beacon_interval) return true; } break; case NL80211_IFTYPE_ADHOC: if (wdev->u.ibss.ssid_len) return true; break; case NL80211_IFTYPE_MESH_POINT: if (wdev->u.mesh.id_len) return true; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: /* Can NAN type be considered as beaconing interface? */ case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: WARN_ON(1); } return false; } bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, struct ieee80211_channel *chan, bool primary_only) { unsigned int link; switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: for_each_valid_link(wdev, link) { if (cfg80211_is_sub_chan(&wdev->links[link].ap.chandef, chan, primary_only)) return true; } break; case NL80211_IFTYPE_ADHOC: return cfg80211_is_sub_chan(&wdev->u.ibss.chandef, chan, primary_only); case NL80211_IFTYPE_MESH_POINT: return cfg80211_is_sub_chan(&wdev->u.mesh.chandef, chan, primary_only); default: break; } return false; } static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan) { struct wireless_dev *wdev; lockdep_assert_wiphy(wiphy); list_for_each_entry(wdev, &wiphy->wdev_list, list) { if (!cfg80211_beaconing_iface_active(wdev)) continue; if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) return true; } return false; } static bool cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, struct ieee80211_channel *channel) { if (!rdev->background_radar_wdev) return false; if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) return false; return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel, false); } bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, struct ieee80211_channel *chan) { struct cfg80211_registered_device *rdev; ASSERT_RTNL(); if (!(chan->flags & IEEE80211_CHAN_RADAR)) return false; for_each_rdev(rdev) { bool found; if (!reg_dfs_domain_same(wiphy, &rdev->wiphy)) continue; guard(wiphy)(&rdev->wiphy); found = cfg80211_is_wiphy_oper_chan(&rdev->wiphy, chan) || cfg80211_offchan_chain_is_active(rdev, chan); if (found) return true; } return false; } static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { struct ieee80211_channel *c; int width; bool dfs_offload; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; width = cfg80211_chandef_get_width(chandef); if (width < 0) return false; dfs_offload = wiphy_ext_feature_isset(wiphy, NL80211_EXT_FEATURE_DFS_OFFLOAD); /* * Check entire range of channels for the bandwidth. * If any channel in between is disabled or has not * had gone through CAC return false */ for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; if (c->flags & IEEE80211_CHAN_DISABLED) return false; if ((c->flags & IEEE80211_CHAN_RADAR) && (c->dfs_state != NL80211_DFS_AVAILABLE) && !(c->dfs_state == NL80211_DFS_USABLE && dfs_offload)) return false; } return true; } unsigned int cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef) { struct ieee80211_channel *c; int width; unsigned int t1 = 0, t2 = 0; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return 0; width = cfg80211_chandef_get_width(chandef); if (width < 0) return 0; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c || (c->flags & IEEE80211_CHAN_DISABLED)) { if (cf == 1) t1 = INT_MAX; else t2 = INT_MAX; continue; } if (!(c->flags & IEEE80211_CHAN_RADAR)) continue; if (cf == 1 && c->dfs_cac_ms > t1) t1 = c->dfs_cac_ms; if (cf == 2 && c->dfs_cac_ms > t2) t2 = c->dfs_cac_ms; } if (t1 == INT_MAX && t2 == INT_MAX) return 0; if (t1 == INT_MAX) return t2; if (t2 == INT_MAX) return t1; return max(t1, t2); } EXPORT_SYMBOL(cfg80211_chandef_dfs_cac_time); /* check if the operating channels are valid and supported */ static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels, enum ieee80211_edmg_bw_config edmg_bw_config, int primary_channel, struct ieee80211_edmg *edmg_cap) { struct ieee80211_channel *chan; int i, freq; int channels_counter = 0; if (!edmg_channels && !edmg_bw_config) return true; if ((!edmg_channels && edmg_bw_config) || (edmg_channels && !edmg_bw_config)) return false; if (!(edmg_channels & BIT(primary_channel - 1))) return false; /* 60GHz channels 1..6 */ for (i = 0; i < 6; i++) { if (!(edmg_channels & BIT(i))) continue; if (!(edmg_cap->channels & BIT(i))) return false; channels_counter++; freq = ieee80211_channel_to_frequency(i + 1, NL80211_BAND_60GHZ); chan = ieee80211_get_channel(wiphy, freq); if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) return false; } /* IEEE802.11 allows max 4 channels */ if (channels_counter > 4) return false; /* check bw_config is a subset of what driver supports * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13) */ if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4)) return false; if (edmg_bw_config > edmg_cap->bw_config) return false; return true; } bool _cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags, u32 permitting_flags) { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; struct ieee80211_edmg *edmg_cap; u32 width, control_freq, cap; bool ext_nss_cap, support_80_80 = false, support_320 = false; const struct ieee80211_sband_iftype_data *iftd; struct ieee80211_supported_band *sband; struct ieee80211_channel *c; int i; if (WARN_ON(!cfg80211_chandef_valid(chandef))) return false; ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap; vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap; edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap; ext_nss_cap = __le16_to_cpu(vht_cap->vht_mcs.tx_highest) & IEEE80211_VHT_EXT_NSS_BW_CAPABLE; if (edmg_cap->channels && !cfg80211_edmg_usable(wiphy, chandef->edmg.channels, chandef->edmg.bw_config, chandef->chan->hw_value, edmg_cap)) return false; control_freq = chandef->chan->center_freq; switch (chandef->width) { case NL80211_CHAN_WIDTH_1: width = 1; break; case NL80211_CHAN_WIDTH_2: width = 2; break; case NL80211_CHAN_WIDTH_4: width = 4; break; case NL80211_CHAN_WIDTH_8: width = 8; break; case NL80211_CHAN_WIDTH_16: width = 16; break; case NL80211_CHAN_WIDTH_5: width = 5; break; case NL80211_CHAN_WIDTH_10: prohibited_flags |= IEEE80211_CHAN_NO_10MHZ; width = 10; break; case NL80211_CHAN_WIDTH_20: if (!ht_cap->ht_supported && chandef->chan->band != NL80211_BAND_6GHZ) return false; fallthrough; case NL80211_CHAN_WIDTH_20_NOHT: prohibited_flags |= IEEE80211_CHAN_NO_20MHZ; width = 20; break; case NL80211_CHAN_WIDTH_40: width = 40; if (chandef->chan->band == NL80211_BAND_6GHZ) break; if (!ht_cap->ht_supported) return false; if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) return false; if (chandef->center_freq1 < control_freq && chandef->chan->flags & IEEE80211_CHAN_NO_HT40MINUS) return false; if (chandef->center_freq1 > control_freq && chandef->chan->flags & IEEE80211_CHAN_NO_HT40PLUS) return false; break; case NL80211_CHAN_WIDTH_80P80: cap = vht_cap->cap; support_80_80 = (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) || (cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) || (ext_nss_cap && u32_get_bits(cap, IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) > 1); if (chandef->chan->band != NL80211_BAND_6GHZ && !support_80_80) return false; fallthrough; case NL80211_CHAN_WIDTH_80: prohibited_flags |= IEEE80211_CHAN_NO_80MHZ; width = 80; if (chandef->chan->band == NL80211_BAND_6GHZ) break; if (!vht_cap->vht_supported) return false; break; case NL80211_CHAN_WIDTH_160: prohibited_flags |= IEEE80211_CHAN_NO_160MHZ; width = 160; if (chandef->chan->band == NL80211_BAND_6GHZ) break; if (!vht_cap->vht_supported) return false; cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK; if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ && cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ && !(ext_nss_cap && (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))) return false; break; case NL80211_CHAN_WIDTH_320: prohibited_flags |= IEEE80211_CHAN_NO_320MHZ; width = 320; if (chandef->chan->band != NL80211_BAND_6GHZ) return false; sband = wiphy->bands[NL80211_BAND_6GHZ]; if (!sband) return false; for_each_sband_iftype_data(sband, i, iftd) { if (!iftd->eht_cap.has_eht) continue; if (iftd->eht_cap.eht_cap_elem.phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) { support_320 = true; break; } } if (!support_320) return false; break; default: WARN_ON_ONCE(1); return false; } /* * TODO: What if there are only certain 80/160/80+80 MHz channels * allowed by the driver, or only certain combinations? * For 40 MHz the driver can set the NO_HT40 flags, but for * 80/160 MHz and in particular 80+80 MHz this isn't really * feasible and we only have NO_80MHZ/NO_160MHZ so far but * no way to cover 80+80 MHz or more complex restrictions. * Note that such restrictions also need to be advertised to * userspace, for example for P2P channel selection. */ if (width > 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; /* 5 and 10 MHz are only defined for the OFDM PHY */ if (width < 20) prohibited_flags |= IEEE80211_CHAN_NO_OFDM; for_each_subchan(chandef, freq, cf) { c = ieee80211_get_channel_khz(wiphy, freq); if (!c) return false; if (c->flags & permitting_flags) continue; if (c->flags & prohibited_flags) return false; } return true; } bool cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags) { return _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, 0); } EXPORT_SYMBOL(cfg80211_chandef_usable); static bool cfg80211_ir_permissive_check_wdev(enum nl80211_iftype iftype, struct wireless_dev *wdev, struct ieee80211_channel *chan) { struct ieee80211_channel *other_chan = NULL; unsigned int link_id; int r1, r2; for_each_valid_link(wdev, link_id) { if (wdev->iftype == NL80211_IFTYPE_STATION && wdev->links[link_id].client.current_bss) other_chan = wdev->links[link_id].client.current_bss->pub.channel; /* * If a GO already operates on the same GO_CONCURRENT channel, * this one (maybe the same one) can beacon as well. We allow * the operation even if the station we relied on with * GO_CONCURRENT is disconnected now. But then we must make sure * we're not outdoor on an indoor-only channel. */ if (iftype == NL80211_IFTYPE_P2P_GO && wdev->iftype == NL80211_IFTYPE_P2P_GO && wdev->links[link_id].ap.beacon_interval && !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) other_chan = wdev->links[link_id].ap.chandef.chan; if (!other_chan) continue; if (chan == other_chan) return true; if (chan->band != NL80211_BAND_5GHZ && chan->band != NL80211_BAND_6GHZ) continue; r1 = cfg80211_get_unii(chan->center_freq); r2 = cfg80211_get_unii(other_chan->center_freq); if (r1 != -EINVAL && r1 == r2) { /* * At some locations channels 149-165 are considered a * bundle, but at other locations, e.g., Indonesia, * channels 149-161 are considered a bundle while * channel 165 is left out and considered to be in a * different bundle. Thus, in case that there is a * station interface connected to an AP on channel 165, * it is assumed that channels 149-161 are allowed for * GO operations. However, having a station interface * connected to an AP on channels 149-161, does not * allow GO operation on channel 165. */ if (chan->center_freq == 5825 && other_chan->center_freq != 5825) continue; return true; } } return false; } /* * Check if the channel can be used under permissive conditions mandated by * some regulatory bodies, i.e., the channel is marked with * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface * associated to an AP on the same channel or on the same UNII band * (assuming that the AP is an authorized master). * In addition allow operation on a channel on which indoor operation is * allowed, iff we are currently operating in an indoor environment. */ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, enum nl80211_iftype iftype, struct ieee80211_channel *chan) { struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); lockdep_assert_held(&rdev->wiphy.mtx); if (!IS_ENABLED(CONFIG_CFG80211_REG_RELAX_NO_IR) || !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR)) return false; /* only valid for GO and TDLS off-channel (station/p2p-CL) */ if (iftype != NL80211_IFTYPE_P2P_GO && iftype != NL80211_IFTYPE_STATION && iftype != NL80211_IFTYPE_P2P_CLIENT) return false; if (regulatory_indoor_allowed() && (chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) return true; if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT)) return false; /* * Generally, it is possible to rely on another device/driver to allow * the IR concurrent relaxation, however, since the device can further * enforce the relaxation (by doing a similar verifications as this), * and thus fail the GO instantiation, consider only the interfaces of * the current registered device. */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { bool ret; ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan); if (ret) return ret; } return false; } static bool _cfg80211_reg_can_beacon(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, enum nl80211_iftype iftype, u32 prohibited_flags, u32 permitting_flags) { bool res, check_radar; int dfs_required; trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype, prohibited_flags, permitting_flags); if (!_cfg80211_chandef_usable(wiphy, chandef, IEEE80211_CHAN_DISABLED, 0)) return false; dfs_required = cfg80211_chandef_dfs_required(wiphy, chandef, iftype); check_radar = dfs_required != 0; if (dfs_required > 0 && cfg80211_chandef_dfs_available(wiphy, chandef)) { /* We can skip IEEE80211_CHAN_NO_IR if chandef dfs available */ prohibited_flags &= ~IEEE80211_CHAN_NO_IR; check_radar = false; } if (check_radar && !_cfg80211_chandef_usable(wiphy, chandef, IEEE80211_CHAN_RADAR, 0)) return false; res = _cfg80211_chandef_usable(wiphy, chandef, prohibited_flags, permitting_flags); trace_cfg80211_return_bool(res); return res; } bool cfg80211_reg_check_beaconing(struct wiphy *wiphy, struct cfg80211_chan_def *chandef, struct cfg80211_beaconing_check_config *cfg) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); u32 permitting_flags = 0; bool check_no_ir = true; /* * Under certain conditions suggested by some regulatory bodies a * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag * only if such relaxations are not enabled and the conditions are not * met. */ if (cfg->relax) { lockdep_assert_held(&rdev->wiphy.mtx); check_no_ir = !cfg80211_ir_permissive_chan(wiphy, cfg->iftype, chandef->chan); } if (cfg->reg_power == IEEE80211_REG_VLP_AP) permitting_flags |= IEEE80211_CHAN_ALLOW_6GHZ_VLP_AP; if ((cfg->iftype == NL80211_IFTYPE_P2P_GO || cfg->iftype == NL80211_IFTYPE_AP) && (chandef->width == NL80211_CHAN_WIDTH_20_NOHT || chandef->width == NL80211_CHAN_WIDTH_20)) permitting_flags |= IEEE80211_CHAN_ALLOW_20MHZ_ACTIVITY; return _cfg80211_reg_can_beacon(wiphy, chandef, cfg->iftype, check_no_ir ? IEEE80211_CHAN_NO_IR : 0, permitting_flags); } EXPORT_SYMBOL(cfg80211_reg_check_beaconing); int cfg80211_set_monitor_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_chan_def *chandef) { if (!rdev->ops->set_monitor_channel) return -EOPNOTSUPP; if (!cfg80211_has_monitors_only(rdev)) return -EBUSY; return rdev_set_monitor_channel(rdev, dev, chandef); } bool cfg80211_any_usable_channels(struct wiphy *wiphy, unsigned long sband_mask, u32 prohibited_flags) { int idx; prohibited_flags |= IEEE80211_CHAN_DISABLED; for_each_set_bit(idx, &sband_mask, NUM_NL80211_BANDS) { struct ieee80211_supported_band *sband = wiphy->bands[idx]; int chanidx; if (!sband) continue; for (chanidx = 0; chanidx < sband->n_channels; chanidx++) { struct ieee80211_channel *chan; chan = &sband->channels[chanidx]; if (chan->flags & prohibited_flags) continue; return true; } } return false; } EXPORT_SYMBOL(cfg80211_any_usable_channels); struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, unsigned int link_id) { lockdep_assert_wiphy(wdev->wiphy); WARN_ON(wdev->valid_links && !(wdev->valid_links & BIT(link_id))); WARN_ON(!wdev->valid_links && link_id > 0); switch (wdev->iftype) { case NL80211_IFTYPE_MESH_POINT: return &wdev->u.mesh.chandef; case NL80211_IFTYPE_ADHOC: return &wdev->u.ibss.chandef; case NL80211_IFTYPE_OCB: return &wdev->u.ocb.chandef; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: return &wdev->links[link_id].ap.chandef; default: return NULL; } } EXPORT_SYMBOL(wdev_chandef);
3 3 3 169 118 116 102 22 10 2 26 1 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_X86_TLBFLUSH_H #define _ASM_X86_TLBFLUSH_H #include <linux/mm_types.h> #include <linux/mmu_notifier.h> #include <linux/sched.h> #include <asm/barrier.h> #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> #include <asm/smp.h> #include <asm/invpcid.h> #include <asm/pti.h> #include <asm/processor-flags.h> #include <asm/pgtable.h> DECLARE_PER_CPU(u64, tlbstate_untag_mask); void __flush_tlb_all(void); #define TLB_FLUSH_ALL -1UL #define TLB_GENERATION_INVALID 0 void cr4_update_irqsoff(unsigned long set, unsigned long clear); unsigned long cr4_read_shadow(void); /* Set in this cpu's CR4. */ static inline void cr4_set_bits_irqsoff(unsigned long mask) { cr4_update_irqsoff(mask, 0); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits_irqsoff(unsigned long mask) { cr4_update_irqsoff(0, mask); } /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { unsigned long flags; local_irq_save(flags); cr4_set_bits_irqsoff(mask); local_irq_restore(flags); } /* Clear in this cpu's CR4. */ static inline void cr4_clear_bits(unsigned long mask) { unsigned long flags; local_irq_save(flags); cr4_clear_bits_irqsoff(mask); local_irq_restore(flags); } #ifndef MODULE /* * 6 because 6 should be plenty and struct tlb_state will fit in two cache * lines. */ #define TLB_NR_DYN_ASIDS 6 struct tlb_context { u64 ctx_id; u64 tlb_gen; }; struct tlb_state { /* * cpu_tlbstate.loaded_mm should match CR3 whenever interrupts * are on. This means that it may not match current->active_mm, * which will contain the previous user mm when we're in lazy TLB * mode even if we've already switched back to swapper_pg_dir. * * During switch_mm_irqs_off(), loaded_mm will be set to * LOADED_MM_SWITCHING during the brief interrupts-off window * when CR3 and loaded_mm would otherwise be inconsistent. This * is for nmi_uaccess_okay()'s benefit. */ struct mm_struct *loaded_mm; #define LOADED_MM_SWITCHING ((struct mm_struct *)1UL) /* Last user mm for optimizing IBPB */ union { struct mm_struct *last_user_mm; unsigned long last_user_mm_spec; }; u16 loaded_mm_asid; u16 next_asid; /* * If set we changed the page tables in such a way that we * needed an invalidation of all contexts (aka. PCIDs / ASIDs). * This tells us to go invalidate all the non-loaded ctxs[] * on the next context switch. * * The current ctx was kept up-to-date as it ran and does not * need to be invalidated. */ bool invalidate_other; #ifdef CONFIG_ADDRESS_MASKING /* * Active LAM mode. * * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM * disabled. */ u8 lam; #endif /* * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate * the corresponding user PCID needs a flush next time we * switch to it; see SWITCH_TO_USER_CR3. */ unsigned short user_pcid_flush_mask; /* * Access to this CR4 shadow and to H/W CR4 is protected by * disabling interrupts when modifying either one. */ unsigned long cr4; /* * This is a list of all contexts that might exist in the TLB. * There is one per ASID that we use, and the ASID (what the * CPU calls PCID) is the index into ctxts. * * For each context, ctx_id indicates which mm the TLB's user * entries came from. As an invariant, the TLB will never * contain entries that are out-of-date as when that mm reached * the tlb_gen in the list. * * To be clear, this means that it's legal for the TLB code to * flush the TLB without updating tlb_gen. This can happen * (for now, at least) due to paravirt remote flushes. * * NB: context 0 is a bit special, since it's also used by * various bits of init code. This is fine -- code that * isn't aware of PCID will end up harmlessly flushing * context 0. */ struct tlb_context ctxs[TLB_NR_DYN_ASIDS]; }; DECLARE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate); struct tlb_state_shared { /* * We can be in one of several states: * * - Actively using an mm. Our CPU's bit will be set in * mm_cpumask(loaded_mm) and is_lazy == false; * * - Not using a real mm. loaded_mm == &init_mm. Our CPU's bit * will not be set in mm_cpumask(&init_mm) and is_lazy == false. * * - Lazily using a real mm. loaded_mm != &init_mm, our bit * is set in mm_cpumask(loaded_mm), but is_lazy == true. * We're heuristically guessing that the CR3 load we * skipped more than makes up for the overhead added by * lazy mode. */ bool is_lazy; }; DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared); bool nmi_uaccess_okay(void); #define nmi_uaccess_okay nmi_uaccess_okay /* Initialize cr4 shadow for this CPU. */ static inline void cr4_init_shadow(void) { this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); } extern unsigned long mmu_cr4_features; extern u32 *trampoline_cr4_features; /* How many pages can be invalidated with one INVLPGB. */ extern u16 invlpgb_count_max; extern void initialize_tlbstate_and_flush(void); /* * TLB flushing: * * - flush_tlb_all() flushes all processes TLBs * - flush_tlb_mm(mm) flushes the specified mm context TLB's * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages * - flush_tlb_multi(cpumask, info) flushes TLBs on multiple cpus * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. */ struct flush_tlb_info { /* * We support several kinds of flushes. * * - Fully flush a single mm. .mm will be set, .end will be * TLB_FLUSH_ALL, and .new_tlb_gen will be the tlb_gen to * which the IPI sender is trying to catch us up. * * - Partially flush a single mm. .mm will be set, .start and * .end will indicate the range, and .new_tlb_gen will be set * such that the changes between generation .new_tlb_gen-1 and * .new_tlb_gen are entirely contained in the indicated range. * * - Fully flush all mms whose tlb_gens have been updated. .mm * will be NULL, .end will be TLB_FLUSH_ALL, and .new_tlb_gen * will be zero. */ struct mm_struct *mm; unsigned long start; unsigned long end; u64 new_tlb_gen; unsigned int initiating_cpu; u8 stride_shift; u8 freed_tables; u8 trim_cpumask; }; void flush_tlb_local(void); void flush_tlb_one_user(unsigned long addr); void flush_tlb_one_kernel(unsigned long addr); void flush_tlb_multi(const struct cpumask *cpumask, const struct flush_tlb_info *info); static inline bool is_dyn_asid(u16 asid) { return asid < TLB_NR_DYN_ASIDS; } static inline bool is_global_asid(u16 asid) { return !is_dyn_asid(asid); } #ifdef CONFIG_BROADCAST_TLB_FLUSH static inline u16 mm_global_asid(struct mm_struct *mm) { u16 asid; if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) return 0; asid = smp_load_acquire(&mm->context.global_asid); /* mm->context.global_asid is either 0, or a global ASID */ VM_WARN_ON_ONCE(asid && is_dyn_asid(asid)); return asid; } static inline void mm_init_global_asid(struct mm_struct *mm) { if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) { mm->context.global_asid = 0; mm->context.asid_transition = false; } } static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { /* * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() -> * finish_asid_transition() needs to observe asid_transition = true * once it observes global_asid. */ mm->context.asid_transition = true; smp_store_release(&mm->context.global_asid, asid); } static inline void mm_clear_asid_transition(struct mm_struct *mm) { WRITE_ONCE(mm->context.asid_transition, false); } static inline bool mm_in_asid_transition(struct mm_struct *mm) { if (!cpu_feature_enabled(X86_FEATURE_INVLPGB)) return false; return mm && READ_ONCE(mm->context.asid_transition); } #else static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; } static inline void mm_init_global_asid(struct mm_struct *mm) { } static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { } static inline void mm_clear_asid_transition(struct mm_struct *mm) { } static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; } #endif /* CONFIG_BROADCAST_TLB_FLUSH */ #ifdef CONFIG_PARAVIRT #include <asm/paravirt.h> #endif #define flush_tlb_mm(mm) \ flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL, true) #define flush_tlb_range(vma, start, end) \ flush_tlb_mm_range((vma)->vm_mm, start, end, \ ((vma)->vm_flags & VM_HUGETLB) \ ? huge_page_shift(hstate_vma(vma)) \ : PAGE_SHIFT, true) extern void flush_tlb_all(void); extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned int stride_shift, bool freed_tables); extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a) { flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false); } static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm) { bool should_defer = false; /* If remote CPUs need to be flushed then defer batch the flush */ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids) should_defer = true; put_cpu(); return should_defer; } static inline u64 inc_mm_tlb_gen(struct mm_struct *mm) { /* * Bump the generation count. This also serves as a full barrier * that synchronizes with switch_mm(): callers are required to order * their read of mm_cpumask after their writes to the paging * structures. */ return atomic64_inc_return(&mm->context.tlb_gen); } static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch, struct mm_struct *mm, unsigned long start, unsigned long end) { inc_mm_tlb_gen(mm); cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm)); batch->unmapped_pages = true; mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL); } extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); static inline bool pte_flags_need_flush(unsigned long oldflags, unsigned long newflags, bool ignore_access) { /* * Flags that require a flush when cleared but not when they are set. * Only include flags that would not trigger spurious page-faults. * Non-present entries are not cached. Hardware would set the * dirty/access bit if needed without a fault. */ const pteval_t flush_on_clear = _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED; const pteval_t software_flags = _PAGE_SOFTW1 | _PAGE_SOFTW2 | _PAGE_SOFTW3 | _PAGE_SOFTW4 | _PAGE_SAVED_DIRTY; const pteval_t flush_on_change = _PAGE_RW | _PAGE_USER | _PAGE_PWT | _PAGE_PCD | _PAGE_PSE | _PAGE_GLOBAL | _PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PKEY_BIT0 | _PAGE_PKEY_BIT1 | _PAGE_PKEY_BIT2 | _PAGE_PKEY_BIT3 | _PAGE_NX; unsigned long diff = oldflags ^ newflags; BUILD_BUG_ON(flush_on_clear & software_flags); BUILD_BUG_ON(flush_on_clear & flush_on_change); BUILD_BUG_ON(flush_on_change & software_flags); /* Ignore software flags */ diff &= ~software_flags; if (ignore_access) diff &= ~_PAGE_ACCESSED; /* * Did any of the 'flush_on_clear' flags was clleared set from between * 'oldflags' and 'newflags'? */ if (diff & oldflags & flush_on_clear) return true; /* Flush on modified flags. */ if (diff & flush_on_change) return true; /* Ensure there are no flags that were left behind */ if (IS_ENABLED(CONFIG_DEBUG_VM) && (diff & ~(flush_on_clear | software_flags | flush_on_change))) { VM_WARN_ON_ONCE(1); return true; } return false; } /* * pte_needs_flush() checks whether permissions were demoted and require a * flush. It should only be used for userspace PTEs. */ static inline bool pte_needs_flush(pte_t oldpte, pte_t newpte) { /* !PRESENT -> * ; no need for flush */ if (!(pte_flags(oldpte) & _PAGE_PRESENT)) return false; /* PFN changed ; needs flush */ if (pte_pfn(oldpte) != pte_pfn(newpte)) return true; /* * check PTE flags; ignore access-bit; see comment in * ptep_clear_flush_young(). */ return pte_flags_need_flush(pte_flags(oldpte), pte_flags(newpte), true); } #define pte_needs_flush pte_needs_flush /* * huge_pmd_needs_flush() checks whether permissions were demoted and require a * flush. It should only be used for userspace huge PMDs. */ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) { /* !PRESENT -> * ; no need for flush */ if (!(pmd_flags(oldpmd) & _PAGE_PRESENT)) return false; /* PFN changed ; needs flush */ if (pmd_pfn(oldpmd) != pmd_pfn(newpmd)) return true; /* * check PMD flags; do not ignore access-bit; see * pmdp_clear_flush_young(). */ return pte_flags_need_flush(pmd_flags(oldpmd), pmd_flags(newpmd), false); } #define huge_pmd_needs_flush huge_pmd_needs_flush #ifdef CONFIG_ADDRESS_MASKING static inline u64 tlbstate_lam_cr3_mask(void) { u64 lam = this_cpu_read(cpu_tlbstate.lam); return lam << X86_CR3_LAM_U57_BIT; } static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask) { this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT); this_cpu_write(tlbstate_untag_mask, untag_mask); } #else static inline u64 tlbstate_lam_cr3_mask(void) { return 0; } static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask) { } #endif #endif /* !MODULE */ static inline void __native_tlb_flush_global(unsigned long cr4) { native_write_cr4(cr4 ^ X86_CR4_PGE); native_write_cr4(cr4); } #endif /* _ASM_X86_TLBFLUSH_H */
1 4 4 3 3 3 3 2 2 2 2 1 3 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for assembler optimized version of Blowfish * * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/algapi.h> #include <crypto/blowfish.h> #include <crypto/internal/skcipher.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); /* 4-way parallel cipher functions */ asmlinkage void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void __blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src, bool cbc); static inline void blowfish_dec_ecb_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src) { return __blowfish_dec_blk_4way(ctx, dst, src, false); } static inline void blowfish_dec_cbc_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src) { return __blowfish_dec_blk_4way(ctx, dst, src, true); } static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); } static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return blowfish_setkey(&tfm->base, key, keylen); } static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, BF_BLOCK_SIZE, -1); ECB_BLOCK(4, blowfish_enc_blk_4way); ECB_BLOCK(1, blowfish_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, BF_BLOCK_SIZE, -1); ECB_BLOCK(4, blowfish_dec_ecb_4way); ECB_BLOCK(1, blowfish_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, BF_BLOCK_SIZE, -1); CBC_ENC_BLOCK(blowfish_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, BF_BLOCK_SIZE, -1); CBC_DEC_BLOCK(4, blowfish_dec_cbc_4way); CBC_DEC_BLOCK(1, blowfish_dec_blk); CBC_WALK_END(); } static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = BF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct bf_ctx), .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, .cia_max_keysize = BF_MAX_KEY_SIZE, .cia_setkey = blowfish_setkey, .cia_encrypt = blowfish_encrypt, .cia_decrypt = blowfish_decrypt, } } }; static struct skcipher_alg bf_skcipher_algs[] = { { .base.cra_name = "ecb(blowfish)", .base.cra_driver_name = "ecb-blowfish-asm", .base.cra_priority = 300, .base.cra_blocksize = BF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct bf_ctx), .base.cra_module = THIS_MODULE, .min_keysize = BF_MIN_KEY_SIZE, .max_keysize = BF_MAX_KEY_SIZE, .setkey = blowfish_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(blowfish)", .base.cra_driver_name = "cbc-blowfish-asm", .base.cra_priority = 300, .base.cra_blocksize = BF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct bf_ctx), .base.cra_module = THIS_MODULE, .min_keysize = BF_MIN_KEY_SIZE, .max_keysize = BF_MAX_KEY_SIZE, .ivsize = BF_BLOCK_SIZE, .setkey = blowfish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, blowfish-x86_64 is slower than generic C * implementation because use of 64bit rotates (which are really * slow on P4). Therefore blacklist P4s. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init blowfish_init(void) { int err; if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "blowfish-x86_64: performance on this CPU " "would be suboptimal: disabling " "blowfish-x86_64.\n"); return -ENODEV; } err = crypto_register_alg(&bf_cipher_alg); if (err) return err; err = crypto_register_skciphers(bf_skcipher_algs, ARRAY_SIZE(bf_skcipher_algs)); if (err) crypto_unregister_alg(&bf_cipher_alg); return err; } static void __exit blowfish_fini(void) { crypto_unregister_alg(&bf_cipher_alg); crypto_unregister_skciphers(bf_skcipher_algs, ARRAY_SIZE(bf_skcipher_algs)); } module_init(blowfish_init); module_exit(blowfish_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); MODULE_ALIAS_CRYPTO("blowfish"); MODULE_ALIAS_CRYPTO("blowfish-asm");
12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 // SPDX-License-Identifier: GPL-2.0-only #include <linux/crc-ccitt.h> #include <linux/export.h> #include <linux/module.h> #include <linux/types.h> /* * This mysterious table is just the CRC of each possible byte. It can be * computed using the standard bit-at-a-time methods. The polynomial can * be seen in entry 128, 0x8408. This corresponds to x^0 + x^5 + x^12. * Add the implicit x^16, and you have the standard CRC-CCITT. */ u16 const crc_ccitt_table[256] = { 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf, 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7, 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e, 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876, 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd, 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5, 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c, 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974, 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb, 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3, 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a, 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72, 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9, 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1, 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738, 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70, 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7, 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff, 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036, 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e, 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5, 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd, 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134, 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c, 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3, 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb, 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232, 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a, 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1, 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9, 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330, 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78 }; EXPORT_SYMBOL(crc_ccitt_table); /** * crc_ccitt - recompute the CRC (CRC-CCITT variant) for the data * buffer * @crc: previous CRC value * @buffer: data pointer * @len: number of bytes in the buffer */ u16 crc_ccitt(u16 crc, u8 const *buffer, size_t len) { while (len--) crc = crc_ccitt_byte(crc, *buffer++); return crc; } EXPORT_SYMBOL(crc_ccitt); MODULE_DESCRIPTION("CRC-CCITT calculations"); MODULE_LICENSE("GPL");
103 105 105 104 103 104 105 105 86 88 88 85 88 86 86 1 3 1 2 28033 3 3 3 3 3 28050 28020 28033 28038 87 88 88 88 88 88 87 84 87 88 88 6 88 87 86 88 87 88 27 27 1 1 1 1 1 1 87 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 // SPDX-License-Identifier: GPL-2.0-or-later /* * Linux Socket Filter - Kernel level socket filtering * * Based on the design of the Berkeley Packet Filter. The new * internal format has been designed by PLUMgrid: * * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com * * Authors: * * Jay Schulist <jschlst@samba.org> * Alexei Starovoitov <ast@plumgrid.com> * Daniel Borkmann <dborkman@redhat.com> * * Andi Kleen - Fix a few bad bugs and races. * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ #include <uapi/linux/btf.h> #include <linux/filter.h> #include <linux/skbuff.h> #include <linux/vmalloc.h> #include <linux/prandom.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/objtool.h> #include <linux/overflow.h> #include <linux/rbtree_latch.h> #include <linux/kallsyms.h> #include <linux/rcupdate.h> #include <linux/perf_event.h> #include <linux/extable.h> #include <linux/log2.h> #include <linux/bpf_verifier.h> #include <linux/nodemask.h> #include <linux/nospec.h> #include <linux/bpf_mem_alloc.h> #include <linux/memcontrol.h> #include <linux/execmem.h> #include <asm/barrier.h> #include <linux/unaligned.h> /* Registers */ #define BPF_R0 regs[BPF_REG_0] #define BPF_R1 regs[BPF_REG_1] #define BPF_R2 regs[BPF_REG_2] #define BPF_R3 regs[BPF_REG_3] #define BPF_R4 regs[BPF_REG_4] #define BPF_R5 regs[BPF_REG_5] #define BPF_R6 regs[BPF_REG_6] #define BPF_R7 regs[BPF_REG_7] #define BPF_R8 regs[BPF_REG_8] #define BPF_R9 regs[BPF_REG_9] #define BPF_R10 regs[BPF_REG_10] /* Named registers */ #define DST regs[insn->dst_reg] #define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] #define AX regs[BPF_REG_AX] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] #define OFF insn->off #define IMM insn->imm struct bpf_mem_alloc bpf_global_ma; bool bpf_global_ma_set; /* No hurry in this branch * * Exported for the bpf jit load helper. */ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size) { u8 *ptr = NULL; if (k >= SKF_NET_OFF) { ptr = skb_network_header(skb) + k - SKF_NET_OFF; } else if (k >= SKF_LL_OFF) { if (unlikely(!skb_mac_header_was_set(skb))) return NULL; ptr = skb_mac_header(skb) + k - SKF_LL_OFF; } if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb)) return ptr; return NULL; } /* tell bpf programs that include vmlinux.h kernel's PAGE_SIZE */ enum page_size_enum { __PAGE_SIZE = PAGE_SIZE }; struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog_aux *aux; struct bpf_prog *fp; size = round_up(size, __PAGE_SIZE); fp = __vmalloc(size, gfp_flags); if (fp == NULL) return NULL; aux = kzalloc(sizeof(*aux), bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (aux == NULL) { vfree(fp); return NULL; } fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags)); if (!fp->active) { vfree(fp); kfree(aux); return NULL; } fp->pages = size / PAGE_SIZE; fp->aux = aux; fp->aux->prog = fp; fp->jit_requested = ebpf_jit_enabled(); fp->blinding_requested = bpf_jit_blinding_enabled(fp); #ifdef CONFIG_CGROUP_BPF aux->cgroup_atype = CGROUP_BPF_ATTACH_TYPE_INVALID; #endif INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode); #ifdef CONFIG_FINEIBT INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode); #endif mutex_init(&fp->aux->used_maps_mutex); mutex_init(&fp->aux->ext_mutex); mutex_init(&fp->aux->dst_mutex); #ifdef CONFIG_BPF_SYSCALL bpf_prog_stream_init(fp); #endif return fp; } struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog *prog; int cpu; prog = bpf_prog_alloc_no_stats(size, gfp_extra_flags); if (!prog) return NULL; prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags); if (!prog->stats) { free_percpu(prog->active); kfree(prog->aux); vfree(prog); return NULL; } for_each_possible_cpu(cpu) { struct bpf_prog_stats *pstats; pstats = per_cpu_ptr(prog->stats, cpu); u64_stats_init(&pstats->syncp); } return prog; } EXPORT_SYMBOL_GPL(bpf_prog_alloc); int bpf_prog_alloc_jited_linfo(struct bpf_prog *prog) { if (!prog->aux->nr_linfo || !prog->jit_requested) return 0; prog->aux->jited_linfo = kvcalloc(prog->aux->nr_linfo, sizeof(*prog->aux->jited_linfo), bpf_memcg_flags(GFP_KERNEL | __GFP_NOWARN)); if (!prog->aux->jited_linfo) return -ENOMEM; return 0; } void bpf_prog_jit_attempt_done(struct bpf_prog *prog) { if (prog->aux->jited_linfo && (!prog->jited || !prog->aux->jited_linfo[0])) { kvfree(prog->aux->jited_linfo); prog->aux->jited_linfo = NULL; } kfree(prog->aux->kfunc_tab); prog->aux->kfunc_tab = NULL; } /* The jit engine is responsible to provide an array * for insn_off to the jited_off mapping (insn_to_jit_off). * * The idx to this array is the insn_off. Hence, the insn_off * here is relative to the prog itself instead of the main prog. * This array has one entry for each xlated bpf insn. * * jited_off is the byte off to the end of the jited insn. * * Hence, with * insn_start: * The first bpf insn off of the prog. The insn off * here is relative to the main prog. * e.g. if prog is a subprog, insn_start > 0 * linfo_idx: * The prog's idx to prog->aux->linfo and jited_linfo * * jited_linfo[linfo_idx] = prog->bpf_func * * For i > linfo_idx, * * jited_linfo[i] = prog->bpf_func + * insn_to_jit_off[linfo[i].insn_off - insn_start - 1] */ void bpf_prog_fill_jited_linfo(struct bpf_prog *prog, const u32 *insn_to_jit_off) { u32 linfo_idx, insn_start, insn_end, nr_linfo, i; const struct bpf_line_info *linfo; void **jited_linfo; if (!prog->aux->jited_linfo || prog->aux->func_idx > prog->aux->func_cnt) /* Userspace did not provide linfo */ return; linfo_idx = prog->aux->linfo_idx; linfo = &prog->aux->linfo[linfo_idx]; insn_start = linfo[0].insn_off; insn_end = insn_start + prog->len; jited_linfo = &prog->aux->jited_linfo[linfo_idx]; jited_linfo[0] = prog->bpf_func; nr_linfo = prog->aux->nr_linfo - linfo_idx; for (i = 1; i < nr_linfo && linfo[i].insn_off < insn_end; i++) /* The verifier ensures that linfo[i].insn_off is * strictly increasing */ jited_linfo[i] = prog->bpf_func + insn_to_jit_off[linfo[i].insn_off - insn_start - 1]; } struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags) { gfp_t gfp_flags = bpf_memcg_flags(GFP_KERNEL | __GFP_ZERO | gfp_extra_flags); struct bpf_prog *fp; u32 pages; size = round_up(size, PAGE_SIZE); pages = size / PAGE_SIZE; if (pages <= fp_old->pages) return fp_old; fp = __vmalloc(size, gfp_flags); if (fp) { memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); fp->pages = pages; fp->aux->prog = fp; /* We keep fp->aux from fp_old around in the new * reallocated structure. */ fp_old->aux = NULL; fp_old->stats = NULL; fp_old->active = NULL; __bpf_prog_free(fp_old); } return fp; } void __bpf_prog_free(struct bpf_prog *fp) { if (fp->aux) { mutex_destroy(&fp->aux->used_maps_mutex); mutex_destroy(&fp->aux->dst_mutex); kfree(fp->aux->poke_tab); kfree(fp->aux); } free_percpu(fp->stats); free_percpu(fp->active); vfree(fp); } int bpf_prog_calc_tag(struct bpf_prog *fp) { const u32 bits_offset = SHA1_BLOCK_SIZE - sizeof(__be64); u32 raw_size = bpf_prog_tag_scratch_size(fp); u32 digest[SHA1_DIGEST_WORDS]; u32 ws[SHA1_WORKSPACE_WORDS]; u32 i, bsize, psize, blocks; struct bpf_insn *dst; bool was_ld_map; u8 *raw, *todo; __be32 *result; __be64 *bits; raw = vmalloc(raw_size); if (!raw) return -ENOMEM; sha1_init_raw(digest); memset(ws, 0, sizeof(ws)); /* We need to take out the map fd for the digest calculation * since they are unstable from user space side. */ dst = (void *)raw; for (i = 0, was_ld_map = false; i < fp->len; i++) { dst[i] = fp->insnsi[i]; if (!was_ld_map && dst[i].code == (BPF_LD | BPF_IMM | BPF_DW) && (dst[i].src_reg == BPF_PSEUDO_MAP_FD || dst[i].src_reg == BPF_PSEUDO_MAP_VALUE)) { was_ld_map = true; dst[i].imm = 0; } else if (was_ld_map && dst[i].code == 0 && dst[i].dst_reg == 0 && dst[i].src_reg == 0 && dst[i].off == 0) { was_ld_map = false; dst[i].imm = 0; } else { was_ld_map = false; } } psize = bpf_prog_insn_size(fp); memset(&raw[psize], 0, raw_size - psize); raw[psize++] = 0x80; bsize = round_up(psize, SHA1_BLOCK_SIZE); blocks = bsize / SHA1_BLOCK_SIZE; todo = raw; if (bsize - psize >= sizeof(__be64)) { bits = (__be64 *)(todo + bsize - sizeof(__be64)); } else { bits = (__be64 *)(todo + bsize + bits_offset); blocks++; } *bits = cpu_to_be64((psize - 1) << 3); while (blocks--) { sha1_transform(digest, todo, ws); todo += SHA1_BLOCK_SIZE; } result = (__force __be32 *)digest; for (i = 0; i < SHA1_DIGEST_WORDS; i++) result[i] = cpu_to_be32(digest[i]); memcpy(fp->tag, result, sizeof(fp->tag)); vfree(raw); return 0; } static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, s32 end_old, s32 end_new, s32 curr, const bool probe_pass) { const s64 imm_min = S32_MIN, imm_max = S32_MAX; s32 delta = end_new - end_old; s64 imm = insn->imm; if (curr < pos && curr + imm + 1 >= end_old) imm += delta; else if (curr >= end_new && curr + imm + 1 < end_new) imm -= delta; if (imm < imm_min || imm > imm_max) return -ERANGE; if (!probe_pass) insn->imm = imm; return 0; } static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old, s32 end_new, s32 curr, const bool probe_pass) { s64 off_min, off_max, off; s32 delta = end_new - end_old; if (insn->code == (BPF_JMP32 | BPF_JA)) { off = insn->imm; off_min = S32_MIN; off_max = S32_MAX; } else { off = insn->off; off_min = S16_MIN; off_max = S16_MAX; } if (curr < pos && curr + off + 1 >= end_old) off += delta; else if (curr >= end_new && curr + off + 1 < end_new) off -= delta; if (off < off_min || off > off_max) return -ERANGE; if (!probe_pass) { if (insn->code == (BPF_JMP32 | BPF_JA)) insn->imm = off; else insn->off = off; } return 0; } static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, s32 end_old, s32 end_new, const bool probe_pass) { u32 i, insn_cnt = prog->len + (probe_pass ? end_new - end_old : 0); struct bpf_insn *insn = prog->insnsi; int ret = 0; for (i = 0; i < insn_cnt; i++, insn++) { u8 code; /* In the probing pass we still operate on the original, * unpatched image in order to check overflows before we * do any other adjustments. Therefore skip the patchlet. */ if (probe_pass && i == pos) { i = end_new; insn = prog->insnsi + end_old; } if (bpf_pseudo_func(insn)) { ret = bpf_adj_delta_to_imm(insn, pos, end_old, end_new, i, probe_pass); if (ret) return ret; continue; } code = insn->code; if ((BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) || BPF_OP(code) == BPF_EXIT) continue; /* Adjust offset of jmps if we cross patch boundaries. */ if (BPF_OP(code) == BPF_CALL) { if (insn->src_reg != BPF_PSEUDO_CALL) continue; ret = bpf_adj_delta_to_imm(insn, pos, end_old, end_new, i, probe_pass); } else { ret = bpf_adj_delta_to_off(insn, pos, end_old, end_new, i, probe_pass); } if (ret) break; } return ret; } static void bpf_adj_linfo(struct bpf_prog *prog, u32 off, u32 delta) { struct bpf_line_info *linfo; u32 i, nr_linfo; nr_linfo = prog->aux->nr_linfo; if (!nr_linfo || !delta) return; linfo = prog->aux->linfo; for (i = 0; i < nr_linfo; i++) if (off < linfo[i].insn_off) break; /* Push all off < linfo[i].insn_off by delta */ for (; i < nr_linfo; i++) linfo[i].insn_off += delta; } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len) { u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; const u32 cnt_max = S16_MAX; struct bpf_prog *prog_adj; int err; /* Since our patchlet doesn't expand the image, we're done. */ if (insn_delta == 0) { memcpy(prog->insnsi + off, patch, sizeof(*patch)); return prog; } insn_adj_cnt = prog->len + insn_delta; /* Reject anything that would potentially let the insn->off * target overflow when we have excessive program expansions. * We need to probe here before we do any reallocation where * we afterwards may not fail anymore. */ if (insn_adj_cnt > cnt_max && (err = bpf_adj_branches(prog, off, off + 1, off + len, true))) return ERR_PTR(err); /* Several new instructions need to be inserted. Make room * for them. Likely, there's no need for a new allocation as * last page could have large enough tailroom. */ prog_adj = bpf_prog_realloc(prog, bpf_prog_size(insn_adj_cnt), GFP_USER); if (!prog_adj) return ERR_PTR(-ENOMEM); prog_adj->len = insn_adj_cnt; /* Patching happens in 3 steps: * * 1) Move over tail of insnsi from next instruction onwards, * so we can patch the single target insn with one or more * new ones (patching is always from 1 to n insns, n > 0). * 2) Inject new instructions at the target location. * 3) Adjust branch offsets if necessary. */ insn_rest = insn_adj_cnt - off - len; memmove(prog_adj->insnsi + off + len, prog_adj->insnsi + off + 1, sizeof(*patch) * insn_rest); memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); /* We are guaranteed to not fail at this point, otherwise * the ship has sailed to reverse to the original state. An * overflow cannot happen at this point. */ BUG_ON(bpf_adj_branches(prog_adj, off, off + 1, off + len, false)); bpf_adj_linfo(prog_adj, off, insn_delta); return prog_adj; } int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt) { int err; /* Branch offsets can't overflow when program is shrinking, no need * to call bpf_adj_branches(..., true) here */ memmove(prog->insnsi + off, prog->insnsi + off + cnt, sizeof(struct bpf_insn) * (prog->len - off - cnt)); prog->len -= cnt; err = bpf_adj_branches(prog, off, off + cnt, off, false); WARN_ON_ONCE(err); return err; } static void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp) { int i; for (i = 0; i < fp->aux->real_func_cnt; i++) bpf_prog_kallsyms_del(fp->aux->func[i]); } void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) { bpf_prog_kallsyms_del_subprogs(fp); bpf_prog_kallsyms_del(fp); } #ifdef CONFIG_BPF_JIT /* All BPF JIT sysctl knobs here. */ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; long bpf_jit_limit __read_mostly; long bpf_jit_limit_max __read_mostly; static void bpf_prog_ksym_set_addr(struct bpf_prog *prog) { WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); prog->aux->ksym.start = (unsigned long) prog->bpf_func; prog->aux->ksym.end = prog->aux->ksym.start + prog->jited_len; } static void bpf_prog_ksym_set_name(struct bpf_prog *prog) { char *sym = prog->aux->ksym.name; const char *end = sym + KSYM_NAME_LEN; const struct btf_type *type; const char *func_name; BUILD_BUG_ON(sizeof("bpf_prog_") + sizeof(prog->tag) * 2 + /* name has been null terminated. * We should need +1 for the '_' preceding * the name. However, the null character * is double counted between the name and the * sizeof("bpf_prog_") above, so we omit * the +1 here. */ sizeof(prog->aux->name) > KSYM_NAME_LEN); sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); /* prog->aux->name will be ignored if full btf name is available */ if (prog->aux->func_info_cnt && prog->aux->func_idx < prog->aux->func_info_cnt) { type = btf_type_by_id(prog->aux->btf, prog->aux->func_info[prog->aux->func_idx].type_id); func_name = btf_name_by_offset(prog->aux->btf, type->name_off); snprintf(sym, (size_t)(end - sym), "_%s", func_name); return; } if (prog->aux->name[0]) snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name); else *sym = 0; } static unsigned long bpf_get_ksym_start(struct latch_tree_node *n) { return container_of(n, struct bpf_ksym, tnode)->start; } static __always_inline bool bpf_tree_less(struct latch_tree_node *a, struct latch_tree_node *b) { return bpf_get_ksym_start(a) < bpf_get_ksym_start(b); } static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) { unsigned long val = (unsigned long)key; const struct bpf_ksym *ksym; ksym = container_of(n, struct bpf_ksym, tnode); if (val < ksym->start) return -1; /* Ensure that we detect return addresses as part of the program, when * the final instruction is a call for a program part of the stack * trace. Therefore, do val > ksym->end instead of val >= ksym->end. */ if (val > ksym->end) return 1; return 0; } static const struct latch_tree_ops bpf_tree_ops = { .less = bpf_tree_less, .comp = bpf_tree_comp, }; static DEFINE_SPINLOCK(bpf_lock); static LIST_HEAD(bpf_kallsyms); static struct latch_tree_root bpf_tree __cacheline_aligned; void bpf_ksym_add(struct bpf_ksym *ksym) { spin_lock_bh(&bpf_lock); WARN_ON_ONCE(!list_empty(&ksym->lnode)); list_add_tail_rcu(&ksym->lnode, &bpf_kallsyms); latch_tree_insert(&ksym->tnode, &bpf_tree, &bpf_tree_ops); spin_unlock_bh(&bpf_lock); } static void __bpf_ksym_del(struct bpf_ksym *ksym) { if (list_empty(&ksym->lnode)) return; latch_tree_erase(&ksym->tnode, &bpf_tree, &bpf_tree_ops); list_del_rcu(&ksym->lnode); } void bpf_ksym_del(struct bpf_ksym *ksym) { spin_lock_bh(&bpf_lock); __bpf_ksym_del(ksym); spin_unlock_bh(&bpf_lock); } static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) { return fp->jited && !bpf_prog_was_classic(fp); } void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || !bpf_token_capable(fp->aux->token, CAP_BPF)) return; bpf_prog_ksym_set_addr(fp); bpf_prog_ksym_set_name(fp); fp->aux->ksym.prog = true; bpf_ksym_add(&fp->aux->ksym); #ifdef CONFIG_FINEIBT /* * When FineIBT, code in the __cfi_foo() symbols can get executed * and hence unwinder needs help. */ if (cfi_mode != CFI_FINEIBT) return; snprintf(fp->aux->ksym_prefix.name, KSYM_NAME_LEN, "__cfi_%s", fp->aux->ksym.name); fp->aux->ksym_prefix.start = (unsigned long) fp->bpf_func - 16; fp->aux->ksym_prefix.end = (unsigned long) fp->bpf_func; bpf_ksym_add(&fp->aux->ksym_prefix); #endif } void bpf_prog_kallsyms_del(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp)) return; bpf_ksym_del(&fp->aux->ksym); #ifdef CONFIG_FINEIBT if (cfi_mode != CFI_FINEIBT) return; bpf_ksym_del(&fp->aux->ksym_prefix); #endif } static struct bpf_ksym *bpf_ksym_find(unsigned long addr) { struct latch_tree_node *n; n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); return n ? container_of(n, struct bpf_ksym, tnode) : NULL; } int __bpf_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char *sym) { struct bpf_ksym *ksym; int ret = 0; rcu_read_lock(); ksym = bpf_ksym_find(addr); if (ksym) { unsigned long symbol_start = ksym->start; unsigned long symbol_end = ksym->end; ret = strscpy(sym, ksym->name, KSYM_NAME_LEN); if (size) *size = symbol_end - symbol_start; if (off) *off = addr - symbol_start; } rcu_read_unlock(); return ret; } bool is_bpf_text_address(unsigned long addr) { bool ret; rcu_read_lock(); ret = bpf_ksym_find(addr) != NULL; rcu_read_unlock(); return ret; } struct bpf_prog *bpf_prog_ksym_find(unsigned long addr) { struct bpf_ksym *ksym; WARN_ON_ONCE(!rcu_read_lock_held()); ksym = bpf_ksym_find(addr); return ksym && ksym->prog ? container_of(ksym, struct bpf_prog_aux, ksym)->prog : NULL; } const struct exception_table_entry *search_bpf_extables(unsigned long addr) { const struct exception_table_entry *e = NULL; struct bpf_prog *prog; rcu_read_lock(); prog = bpf_prog_ksym_find(addr); if (!prog) goto out; if (!prog->aux->num_exentries) goto out; e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr); out: rcu_read_unlock(); return e; } int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { struct bpf_ksym *ksym; unsigned int it = 0; int ret = -ERANGE; if (!bpf_jit_kallsyms_enabled()) return ret; rcu_read_lock(); list_for_each_entry_rcu(ksym, &bpf_kallsyms, lnode) { if (it++ != symnum) continue; strscpy(sym, ksym->name, KSYM_NAME_LEN); *value = ksym->start; *type = BPF_SYM_ELF_TYPE; ret = 0; break; } rcu_read_unlock(); return ret; } int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_jit_poke_descriptor *poke) { struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; static const u32 poke_tab_max = 1024; u32 slot = prog->aux->size_poke_tab; u32 size = slot + 1; if (size > poke_tab_max) return -ENOSPC; if (poke->tailcall_target || poke->tailcall_target_stable || poke->tailcall_bypass || poke->adj_off || poke->bypass_addr) return -EINVAL; switch (poke->reason) { case BPF_POKE_REASON_TAIL_CALL: if (!poke->tail_call.map) return -EINVAL; break; default: return -EINVAL; } tab = krealloc_array(tab, size, sizeof(*poke), GFP_KERNEL); if (!tab) return -ENOMEM; memcpy(&tab[slot], poke, sizeof(*poke)); prog->aux->size_poke_tab = size; prog->aux->poke_tab = tab; return slot; } /* * BPF program pack allocator. * * Most BPF programs are pretty small. Allocating a hole page for each * program is sometime a waste. Many small bpf program also adds pressure * to instruction TLB. To solve this issue, we introduce a BPF program pack * allocator. The prog_pack allocator uses HPAGE_PMD_SIZE page (2MB on x86) * to host BPF programs. */ #define BPF_PROG_CHUNK_SHIFT 6 #define BPF_PROG_CHUNK_SIZE (1 << BPF_PROG_CHUNK_SHIFT) #define BPF_PROG_CHUNK_MASK (~(BPF_PROG_CHUNK_SIZE - 1)) struct bpf_prog_pack { struct list_head list; void *ptr; unsigned long bitmap[]; }; void bpf_jit_fill_hole_with_zero(void *area, unsigned int size) { memset(area, 0, size); } #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) static DEFINE_MUTEX(pack_mutex); static LIST_HEAD(pack_list); /* PMD_SIZE is not available in some special config, e.g. ARCH=arm with * CONFIG_MMU=n. Use PAGE_SIZE in these cases. */ #ifdef PMD_SIZE /* PMD_SIZE is really big for some archs. It doesn't make sense to * reserve too much memory in one allocation. Hardcode BPF_PROG_PACK_SIZE to * 2MiB * num_possible_nodes(). On most architectures PMD_SIZE will be * greater than or equal to 2MB. */ #define BPF_PROG_PACK_SIZE (SZ_2M * num_possible_nodes()) #else #define BPF_PROG_PACK_SIZE PAGE_SIZE #endif #define BPF_PROG_CHUNK_COUNT (BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE) static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_prog_pack *pack; int err; pack = kzalloc(struct_size(pack, bitmap, BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)), GFP_KERNEL); if (!pack) return NULL; pack->ptr = bpf_jit_alloc_exec(BPF_PROG_PACK_SIZE); if (!pack->ptr) goto out; bpf_fill_ill_insns(pack->ptr, BPF_PROG_PACK_SIZE); bitmap_zero(pack->bitmap, BPF_PROG_PACK_SIZE / BPF_PROG_CHUNK_SIZE); set_vm_flush_reset_perms(pack->ptr); err = set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); if (err) goto out; list_add_tail(&pack->list, &pack_list); return pack; out: bpf_jit_free_exec(pack->ptr); kfree(pack); return NULL; } void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); struct bpf_prog_pack *pack; unsigned long pos; void *ptr = NULL; mutex_lock(&pack_mutex); if (size > BPF_PROG_PACK_SIZE) { size = round_up(size, PAGE_SIZE); ptr = bpf_jit_alloc_exec(size); if (ptr) { int err; bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); err = set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); if (err) { bpf_jit_free_exec(ptr); ptr = NULL; } } goto out; } list_for_each_entry(pack, &pack_list, list) { pos = bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, nbits, 0); if (pos < BPF_PROG_CHUNK_COUNT) goto found_free_area; } pack = alloc_new_pack(bpf_fill_ill_insns); if (!pack) goto out; pos = 0; found_free_area: bitmap_set(pack->bitmap, pos, nbits); ptr = (void *)(pack->ptr) + (pos << BPF_PROG_CHUNK_SHIFT); out: mutex_unlock(&pack_mutex); return ptr; } void bpf_prog_pack_free(void *ptr, u32 size) { struct bpf_prog_pack *pack = NULL, *tmp; unsigned int nbits; unsigned long pos; mutex_lock(&pack_mutex); if (size > BPF_PROG_PACK_SIZE) { bpf_jit_free_exec(ptr); goto out; } list_for_each_entry(tmp, &pack_list, list) { if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) { pack = tmp; break; } } if (WARN_ONCE(!pack, "bpf_prog_pack bug\n")) goto out; nbits = BPF_PROG_SIZE_TO_NBITS(size); pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT; WARN_ONCE(bpf_arch_text_invalidate(ptr, size), "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n"); bitmap_clear(pack->bitmap, pos, nbits); if (bitmap_find_next_zero_area(pack->bitmap, BPF_PROG_CHUNK_COUNT, 0, BPF_PROG_CHUNK_COUNT, 0) == 0) { list_del(&pack->list); bpf_jit_free_exec(pack->ptr); kfree(pack); } out: mutex_unlock(&pack_mutex); } static atomic_long_t bpf_jit_current; /* Can be overridden by an arch's JIT compiler if it has a custom, * dedicated BPF backend memory area, or if neither of the two * below apply. */ u64 __weak bpf_jit_alloc_exec_limit(void) { #if defined(MODULES_VADDR) return MODULES_END - MODULES_VADDR; #else return VMALLOC_END - VMALLOC_START; #endif } static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1, PAGE_SIZE), LONG_MAX); return 0; } pure_initcall(bpf_jit_charge_init); int bpf_jit_charge_modmem(u32 size) { if (atomic_long_add_return(size, &bpf_jit_current) > READ_ONCE(bpf_jit_limit)) { if (!bpf_capable()) { atomic_long_sub(size, &bpf_jit_current); return -EPERM; } } return 0; } void bpf_jit_uncharge_modmem(u32 size) { atomic_long_sub(size, &bpf_jit_current); } void *__weak bpf_jit_alloc_exec(unsigned long size) { return execmem_alloc(EXECMEM_BPF, size); } void __weak bpf_jit_free_exec(void *addr) { execmem_free(addr); } struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *hdr; u32 size, hole, start; WARN_ON_ONCE(!is_power_of_2(alignment) || alignment > BPF_IMAGE_ALIGNMENT); /* Most of BPF filters are really small, but if some of them * fill a page, allow at least 128 extra bytes to insert a * random section of illegal instructions. */ size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); if (bpf_jit_charge_modmem(size)) return NULL; hdr = bpf_jit_alloc_exec(size); if (!hdr) { bpf_jit_uncharge_modmem(size); return NULL; } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(hdr, size); hdr->size = size; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); start = get_random_u32_below(hole) & ~(alignment - 1); /* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; return hdr; } void bpf_jit_binary_free(struct bpf_binary_header *hdr) { u32 size = hdr->size; bpf_jit_free_exec(hdr); bpf_jit_uncharge_modmem(size); } /* Allocate jit binary from bpf_prog_pack allocator. * Since the allocated memory is RO+X, the JIT engine cannot write directly * to the memory. To solve this problem, a RW buffer is also allocated at * as the same time. The JIT engine should calculate offsets based on the * RO memory address, but write JITed program to the RW buffer. Once the * JIT engine finishes, it calls bpf_jit_binary_pack_finalize, which copies * the JITed program to the RO memory. */ struct bpf_binary_header * bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, struct bpf_binary_header **rw_header, u8 **rw_image, bpf_jit_fill_hole_t bpf_fill_ill_insns) { struct bpf_binary_header *ro_header; u32 size, hole, start; WARN_ON_ONCE(!is_power_of_2(alignment) || alignment > BPF_IMAGE_ALIGNMENT); /* add 16 bytes for a random section of illegal instructions */ size = round_up(proglen + sizeof(*ro_header) + 16, BPF_PROG_CHUNK_SIZE); if (bpf_jit_charge_modmem(size)) return NULL; ro_header = bpf_prog_pack_alloc(size, bpf_fill_ill_insns); if (!ro_header) { bpf_jit_uncharge_modmem(size); return NULL; } *rw_header = kvmalloc(size, GFP_KERNEL); if (!*rw_header) { bpf_prog_pack_free(ro_header, size); bpf_jit_uncharge_modmem(size); return NULL; } /* Fill space with illegal/arch-dep instructions. */ bpf_fill_ill_insns(*rw_header, size); (*rw_header)->size = size; hole = min_t(unsigned int, size - (proglen + sizeof(*ro_header)), BPF_PROG_CHUNK_SIZE - sizeof(*ro_header)); start = get_random_u32_below(hole) & ~(alignment - 1); *image_ptr = &ro_header->image[start]; *rw_image = &(*rw_header)->image[start]; return ro_header; } /* Copy JITed text from rw_header to its final location, the ro_header. */ int bpf_jit_binary_pack_finalize(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header) { void *ptr; ptr = bpf_arch_text_copy(ro_header, rw_header, rw_header->size); kvfree(rw_header); if (IS_ERR(ptr)) { bpf_prog_pack_free(ro_header, ro_header->size); return PTR_ERR(ptr); } return 0; } /* bpf_jit_binary_pack_free is called in two different scenarios: * 1) when the program is freed after; * 2) when the JIT engine fails (before bpf_jit_binary_pack_finalize). * For case 2), we need to free both the RO memory and the RW buffer. * * bpf_jit_binary_pack_free requires proper ro_header->size. However, * bpf_jit_binary_pack_alloc does not set it. Therefore, ro_header->size * must be set with either bpf_jit_binary_pack_finalize (normal path) or * bpf_arch_text_copy (when jit fails). */ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, struct bpf_binary_header *rw_header) { u32 size = ro_header->size; bpf_prog_pack_free(ro_header, size); kvfree(rw_header); bpf_jit_uncharge_modmem(size); } struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp) { unsigned long real_start = (unsigned long)fp->bpf_func; unsigned long addr; addr = real_start & BPF_PROG_CHUNK_MASK; return (void *)addr; } static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) { unsigned long real_start = (unsigned long)fp->bpf_func; unsigned long addr; addr = real_start & PAGE_MASK; return (void *)addr; } /* This symbol is only overridden by archs that have different * requirements than the usual eBPF JITs, f.e. when they only * implement cBPF JIT, do not set images read-only, etc. */ void __weak bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) { struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); bpf_jit_binary_free(hdr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } bpf_prog_unlock_free(fp); } int bpf_jit_get_func_addr(const struct bpf_prog *prog, const struct bpf_insn *insn, bool extra_pass, u64 *func_addr, bool *func_addr_fixed) { s16 off = insn->off; s32 imm = insn->imm; u8 *addr; int err; *func_addr_fixed = insn->src_reg != BPF_PSEUDO_CALL; if (!*func_addr_fixed) { /* Place-holder address till the last pass has collected * all addresses for JITed subprograms in which case we * can pick them up from prog->aux. */ if (!extra_pass) addr = NULL; else if (prog->aux->func && off >= 0 && off < prog->aux->real_func_cnt) addr = (u8 *)prog->aux->func[off]->bpf_func; else return -EINVAL; } else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && bpf_jit_supports_far_kfunc_call()) { err = bpf_get_kfunc_addr(prog, insn->imm, insn->off, &addr); if (err) return err; } else { /* Address of a BPF helper call. Since part of the core * kernel, it's always at a fixed location. __bpf_call_base * and the helper with imm relative to it are both in core * kernel. */ addr = (u8 *)__bpf_call_base + imm; } *func_addr = (unsigned long)addr; return 0; } const char *bpf_jit_get_prog_name(struct bpf_prog *prog) { if (prog->aux->ksym.prog) return prog->aux->ksym.name; return prog->aux->name; } static int bpf_jit_blind_insn(const struct bpf_insn *from, const struct bpf_insn *aux, struct bpf_insn *to_buff, bool emit_zext) { struct bpf_insn *to = to_buff; u32 imm_rnd = get_random_u32(); s16 off; BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); /* Constraints on AX register: * * AX register is inaccessible from user space. It is mapped in * all JITs, and used here for constant blinding rewrites. It is * typically "stateless" meaning its contents are only valid within * the executed instruction, but not across several instructions. * There are a few exceptions however which are further detailed * below. * * Constant blinding is only used by JITs, not in the interpreter. * The interpreter uses AX in some occasions as a local temporary * register e.g. in DIV or MOD instructions. * * In restricted circumstances, the verifier can also use the AX * register for rewrites as long as they do not interfere with * the above cases! */ if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) goto out; if (from->imm == 0 && (from->code == (BPF_ALU | BPF_MOV | BPF_K) || from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { *to++ = BPF_ALU64_REG(BPF_XOR, from->dst_reg, from->dst_reg); goto out; } switch (from->code) { case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU | BPF_MOV | BPF_K: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU | BPF_MOD | BPF_K: *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_ALU32_REG_OFF(from->code, from->dst_reg, BPF_REG_AX, from->off); break; case BPF_ALU64 | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MOV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_ALU64_REG_OFF(from->code, from->dst_reg, BPF_REG_AX, from->off); break; case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP | BPF_JSET | BPF_K: /* Accommodate for extra offset in case of a backjump. */ off = from->off; if (off < 0) off -= 2; *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_JMP_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; case BPF_JMP32 | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: case BPF_JMP32 | BPF_JLT | BPF_K: case BPF_JMP32 | BPF_JGE | BPF_K: case BPF_JMP32 | BPF_JLE | BPF_K: case BPF_JMP32 | BPF_JSGT | BPF_K: case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: /* Accommodate for extra offset in case of a backjump. */ off = from->off; if (off < 0) off -= 2; *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX, off); break; case BPF_LD | BPF_IMM | BPF_DW: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[1].imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32); *to++ = BPF_ALU64_REG(BPF_MOV, aux[0].dst_reg, BPF_REG_AX); break; case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */ *to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ aux[0].imm); *to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); if (emit_zext) *to++ = BPF_ZEXT_REG(BPF_REG_AX); *to++ = BPF_ALU64_REG(BPF_OR, aux[0].dst_reg, BPF_REG_AX); break; case BPF_ST | BPF_MEM | BPF_DW: case BPF_ST | BPF_MEM | BPF_W: case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: *to++ = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm); *to++ = BPF_ALU64_IMM(BPF_XOR, BPF_REG_AX, imm_rnd); *to++ = BPF_STX_MEM(from->code, from->dst_reg, BPF_REG_AX, from->off); break; } out: return to - to_buff; } static struct bpf_prog *bpf_prog_clone_create(struct bpf_prog *fp_other, gfp_t gfp_extra_flags) { gfp_t gfp_flags = GFP_KERNEL | __GFP_ZERO | gfp_extra_flags; struct bpf_prog *fp; fp = __vmalloc(fp_other->pages * PAGE_SIZE, gfp_flags); if (fp != NULL) { /* aux->prog still points to the fp_other one, so * when promoting the clone to the real program, * this still needs to be adapted. */ memcpy(fp, fp_other, fp_other->pages * PAGE_SIZE); } return fp; } static void bpf_prog_clone_free(struct bpf_prog *fp) { /* aux was stolen by the other clone, so we cannot free * it from this path! It will be freed eventually by the * other program on release. * * At this point, we don't need a deferred release since * clone is guaranteed to not be locked. */ fp->aux = NULL; fp->stats = NULL; fp->active = NULL; __bpf_prog_free(fp); } void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other) { /* We have to repoint aux->prog to self, as we don't * know whether fp here is the clone or the original. */ fp->aux->prog = fp; bpf_prog_clone_free(fp_other); } struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *prog) { struct bpf_insn insn_buff[16], aux[2]; struct bpf_prog *clone, *tmp; int insn_delta, insn_cnt; struct bpf_insn *insn; int i, rewritten; if (!prog->blinding_requested || prog->blinded) return prog; clone = bpf_prog_clone_create(prog, GFP_USER); if (!clone) return ERR_PTR(-ENOMEM); insn_cnt = clone->len; insn = clone->insnsi; for (i = 0; i < insn_cnt; i++, insn++) { if (bpf_pseudo_func(insn)) { /* ld_imm64 with an address of bpf subprog is not * a user controlled constant. Don't randomize it, * since it will conflict with jit_subprogs() logic. */ insn++; i++; continue; } /* We temporarily need to hold the original ld64 insn * so that we can still access the first part in the * second blinding run. */ if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW) && insn[1].code == 0) memcpy(aux, insn, sizeof(aux)); rewritten = bpf_jit_blind_insn(insn, aux, insn_buff, clone->aux->verifier_zext); if (!rewritten) continue; tmp = bpf_patch_insn_single(clone, i, insn_buff, rewritten); if (IS_ERR(tmp)) { /* Patching may have repointed aux->prog during * realloc from the original one, so we need to * fix it up here on error. */ bpf_jit_prog_release_other(prog, clone); return tmp; } clone = tmp; insn_delta = rewritten - 1; /* Walk new program and skip insns we just inserted. */ insn = clone->insnsi + i + insn_delta; insn_cnt += insn_delta; i += insn_delta; } clone->blinded = 1; return clone; } #endif /* CONFIG_BPF_JIT */ /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. This also needs * to go into kallsyms for correlation from e.g. bpftool, so naming * must not change. */ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { return 0; } EXPORT_SYMBOL_GPL(__bpf_call_base); /* All UAPI available opcodes. */ #define BPF_INSN_MAP(INSN_2, INSN_3) \ /* 32 bit ALU operations. */ \ /* Register based. */ \ INSN_3(ALU, ADD, X), \ INSN_3(ALU, SUB, X), \ INSN_3(ALU, AND, X), \ INSN_3(ALU, OR, X), \ INSN_3(ALU, LSH, X), \ INSN_3(ALU, RSH, X), \ INSN_3(ALU, XOR, X), \ INSN_3(ALU, MUL, X), \ INSN_3(ALU, MOV, X), \ INSN_3(ALU, ARSH, X), \ INSN_3(ALU, DIV, X), \ INSN_3(ALU, MOD, X), \ INSN_2(ALU, NEG), \ INSN_3(ALU, END, TO_BE), \ INSN_3(ALU, END, TO_LE), \ /* Immediate based. */ \ INSN_3(ALU, ADD, K), \ INSN_3(ALU, SUB, K), \ INSN_3(ALU, AND, K), \ INSN_3(ALU, OR, K), \ INSN_3(ALU, LSH, K), \ INSN_3(ALU, RSH, K), \ INSN_3(ALU, XOR, K), \ INSN_3(ALU, MUL, K), \ INSN_3(ALU, MOV, K), \ INSN_3(ALU, ARSH, K), \ INSN_3(ALU, DIV, K), \ INSN_3(ALU, MOD, K), \ /* 64 bit ALU operations. */ \ /* Register based. */ \ INSN_3(ALU64, ADD, X), \ INSN_3(ALU64, SUB, X), \ INSN_3(ALU64, AND, X), \ INSN_3(ALU64, OR, X), \ INSN_3(ALU64, LSH, X), \ INSN_3(ALU64, RSH, X), \ INSN_3(ALU64, XOR, X), \ INSN_3(ALU64, MUL, X), \ INSN_3(ALU64, MOV, X), \ INSN_3(ALU64, ARSH, X), \ INSN_3(ALU64, DIV, X), \ INSN_3(ALU64, MOD, X), \ INSN_2(ALU64, NEG), \ INSN_3(ALU64, END, TO_LE), \ /* Immediate based. */ \ INSN_3(ALU64, ADD, K), \ INSN_3(ALU64, SUB, K), \ INSN_3(ALU64, AND, K), \ INSN_3(ALU64, OR, K), \ INSN_3(ALU64, LSH, K), \ INSN_3(ALU64, RSH, K), \ INSN_3(ALU64, XOR, K), \ INSN_3(ALU64, MUL, K), \ INSN_3(ALU64, MOV, K), \ INSN_3(ALU64, ARSH, K), \ INSN_3(ALU64, DIV, K), \ INSN_3(ALU64, MOD, K), \ /* Call instruction. */ \ INSN_2(JMP, CALL), \ /* Exit instruction. */ \ INSN_2(JMP, EXIT), \ /* 32-bit Jump instructions. */ \ /* Register based. */ \ INSN_3(JMP32, JEQ, X), \ INSN_3(JMP32, JNE, X), \ INSN_3(JMP32, JGT, X), \ INSN_3(JMP32, JLT, X), \ INSN_3(JMP32, JGE, X), \ INSN_3(JMP32, JLE, X), \ INSN_3(JMP32, JSGT, X), \ INSN_3(JMP32, JSLT, X), \ INSN_3(JMP32, JSGE, X), \ INSN_3(JMP32, JSLE, X), \ INSN_3(JMP32, JSET, X), \ /* Immediate based. */ \ INSN_3(JMP32, JEQ, K), \ INSN_3(JMP32, JNE, K), \ INSN_3(JMP32, JGT, K), \ INSN_3(JMP32, JLT, K), \ INSN_3(JMP32, JGE, K), \ INSN_3(JMP32, JLE, K), \ INSN_3(JMP32, JSGT, K), \ INSN_3(JMP32, JSLT, K), \ INSN_3(JMP32, JSGE, K), \ INSN_3(JMP32, JSLE, K), \ INSN_3(JMP32, JSET, K), \ /* Jump instructions. */ \ /* Register based. */ \ INSN_3(JMP, JEQ, X), \ INSN_3(JMP, JNE, X), \ INSN_3(JMP, JGT, X), \ INSN_3(JMP, JLT, X), \ INSN_3(JMP, JGE, X), \ INSN_3(JMP, JLE, X), \ INSN_3(JMP, JSGT, X), \ INSN_3(JMP, JSLT, X), \ INSN_3(JMP, JSGE, X), \ INSN_3(JMP, JSLE, X), \ INSN_3(JMP, JSET, X), \ /* Immediate based. */ \ INSN_3(JMP, JEQ, K), \ INSN_3(JMP, JNE, K), \ INSN_3(JMP, JGT, K), \ INSN_3(JMP, JLT, K), \ INSN_3(JMP, JGE, K), \ INSN_3(JMP, JLE, K), \ INSN_3(JMP, JSGT, K), \ INSN_3(JMP, JSLT, K), \ INSN_3(JMP, JSGE, K), \ INSN_3(JMP, JSLE, K), \ INSN_3(JMP, JSET, K), \ INSN_2(JMP, JA), \ INSN_2(JMP32, JA), \ /* Atomic operations. */ \ INSN_3(STX, ATOMIC, B), \ INSN_3(STX, ATOMIC, H), \ INSN_3(STX, ATOMIC, W), \ INSN_3(STX, ATOMIC, DW), \ /* Store instructions. */ \ /* Register based. */ \ INSN_3(STX, MEM, B), \ INSN_3(STX, MEM, H), \ INSN_3(STX, MEM, W), \ INSN_3(STX, MEM, DW), \ /* Immediate based. */ \ INSN_3(ST, MEM, B), \ INSN_3(ST, MEM, H), \ INSN_3(ST, MEM, W), \ INSN_3(ST, MEM, DW), \ /* Load instructions. */ \ /* Register based. */ \ INSN_3(LDX, MEM, B), \ INSN_3(LDX, MEM, H), \ INSN_3(LDX, MEM, W), \ INSN_3(LDX, MEM, DW), \ INSN_3(LDX, MEMSX, B), \ INSN_3(LDX, MEMSX, H), \ INSN_3(LDX, MEMSX, W), \ /* Immediate based. */ \ INSN_3(LD, IMM, DW) bool bpf_opcode_in_insntable(u8 code) { #define BPF_INSN_2_TBL(x, y) [BPF_##x | BPF_##y] = true #define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true static const bool public_insntable[256] = { [0 ... 255] = false, /* Now overwrite non-defaults ... */ BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL), /* UAPI exposed, but rewritten opcodes. cBPF carry-over. */ [BPF_LD | BPF_ABS | BPF_B] = true, [BPF_LD | BPF_ABS | BPF_H] = true, [BPF_LD | BPF_ABS | BPF_W] = true, [BPF_LD | BPF_IND | BPF_B] = true, [BPF_LD | BPF_IND | BPF_H] = true, [BPF_LD | BPF_IND | BPF_W] = true, [BPF_JMP | BPF_JCOND] = true, }; #undef BPF_INSN_3_TBL #undef BPF_INSN_2_TBL return public_insntable[code]; } #ifndef CONFIG_BPF_JIT_ALWAYS_ON /** * ___bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers * @insn: is the array of eBPF instructions * * Decode and execute eBPF instructions. * * Return: whatever value is in %BPF_R0 at program exit */ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) { #define BPF_INSN_2_LBL(x, y) [BPF_##x | BPF_##y] = &&x##_##y #define BPF_INSN_3_LBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = &&x##_##y##_##z static const void * const jumptable[256] __annotate_jump_table = { [0 ... 255] = &&default_label, /* Now overwrite non-defaults ... */ BPF_INSN_MAP(BPF_INSN_2_LBL, BPF_INSN_3_LBL), /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, [BPF_ST | BPF_NOSPEC] = &&ST_NOSPEC, [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW, [BPF_LDX | BPF_PROBE_MEMSX | BPF_B] = &&LDX_PROBE_MEMSX_B, [BPF_LDX | BPF_PROBE_MEMSX | BPF_H] = &&LDX_PROBE_MEMSX_H, [BPF_LDX | BPF_PROBE_MEMSX | BPF_W] = &&LDX_PROBE_MEMSX_W, }; #undef BPF_INSN_3_LBL #undef BPF_INSN_2_LBL u32 tail_call_cnt = 0; #define CONT ({ insn++; goto select_insn; }) #define CONT_JMP ({ insn++; goto select_insn; }) select_insn: goto *jumptable[insn->code]; /* Explicitly mask the register-based shift amounts with 63 or 31 * to avoid undefined behavior. Normally this won't affect the * generated code, for example, in case of native 64 bit archs such * as x86-64 or arm64, the compiler is optimizing the AND away for * the interpreter. In case of JITs, each of the JIT backends compiles * the BPF shift operations to machine instructions which produce * implementation-defined results in such a case; the resulting * contents of the register may be arbitrary, but program behaviour * as a whole remains defined. In other words, in case of JIT backends, * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation. */ /* ALU (shifts) */ #define SHT(OPCODE, OP) \ ALU64_##OPCODE##_X: \ DST = DST OP (SRC & 63); \ CONT; \ ALU_##OPCODE##_X: \ DST = (u32) DST OP ((u32) SRC & 31); \ CONT; \ ALU64_##OPCODE##_K: \ DST = DST OP IMM; \ CONT; \ ALU_##OPCODE##_K: \ DST = (u32) DST OP (u32) IMM; \ CONT; /* ALU (rest) */ #define ALU(OPCODE, OP) \ ALU64_##OPCODE##_X: \ DST = DST OP SRC; \ CONT; \ ALU_##OPCODE##_X: \ DST = (u32) DST OP (u32) SRC; \ CONT; \ ALU64_##OPCODE##_K: \ DST = DST OP IMM; \ CONT; \ ALU_##OPCODE##_K: \ DST = (u32) DST OP (u32) IMM; \ CONT; ALU(ADD, +) ALU(SUB, -) ALU(AND, &) ALU(OR, |) ALU(XOR, ^) ALU(MUL, *) SHT(LSH, <<) SHT(RSH, >>) #undef SHT #undef ALU ALU_NEG: DST = (u32) -DST; CONT; ALU64_NEG: DST = -DST; CONT; ALU_MOV_X: switch (OFF) { case 0: DST = (u32) SRC; break; case 8: DST = (u32)(s8) SRC; break; case 16: DST = (u32)(s16) SRC; break; } CONT; ALU_MOV_K: DST = (u32) IMM; CONT; ALU64_MOV_X: switch (OFF) { case 0: DST = SRC; break; case 8: DST = (s8) SRC; break; case 16: DST = (s16) SRC; break; case 32: DST = (s32) SRC; break; } CONT; ALU64_MOV_K: DST = IMM; CONT; LD_IMM_DW: DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; insn++; CONT; ALU_ARSH_X: DST = (u64) (u32) (((s32) DST) >> (SRC & 31)); CONT; ALU_ARSH_K: DST = (u64) (u32) (((s32) DST) >> IMM); CONT; ALU64_ARSH_X: (*(s64 *) &DST) >>= (SRC & 63); CONT; ALU64_ARSH_K: (*(s64 *) &DST) >>= IMM; CONT; ALU64_MOD_X: switch (OFF) { case 0: div64_u64_rem(DST, SRC, &AX); DST = AX; break; case 1: AX = div64_s64(DST, SRC); DST = DST - AX * SRC; break; } CONT; ALU_MOD_X: switch (OFF) { case 0: AX = (u32) DST; DST = do_div(AX, (u32) SRC); break; case 1: AX = abs((s32)DST); AX = do_div(AX, abs((s32)SRC)); if ((s32)DST < 0) DST = (u32)-AX; else DST = (u32)AX; break; } CONT; ALU64_MOD_K: switch (OFF) { case 0: div64_u64_rem(DST, IMM, &AX); DST = AX; break; case 1: AX = div64_s64(DST, IMM); DST = DST - AX * IMM; break; } CONT; ALU_MOD_K: switch (OFF) { case 0: AX = (u32) DST; DST = do_div(AX, (u32) IMM); break; case 1: AX = abs((s32)DST); AX = do_div(AX, abs((s32)IMM)); if ((s32)DST < 0) DST = (u32)-AX; else DST = (u32)AX; break; } CONT; ALU64_DIV_X: switch (OFF) { case 0: DST = div64_u64(DST, SRC); break; case 1: DST = div64_s64(DST, SRC); break; } CONT; ALU_DIV_X: switch (OFF) { case 0: AX = (u32) DST; do_div(AX, (u32) SRC); DST = (u32) AX; break; case 1: AX = abs((s32)DST); do_div(AX, abs((s32)SRC)); if (((s32)DST < 0) == ((s32)SRC < 0)) DST = (u32)AX; else DST = (u32)-AX; break; } CONT; ALU64_DIV_K: switch (OFF) { case 0: DST = div64_u64(DST, IMM); break; case 1: DST = div64_s64(DST, IMM); break; } CONT; ALU_DIV_K: switch (OFF) { case 0: AX = (u32) DST; do_div(AX, (u32) IMM); DST = (u32) AX; break; case 1: AX = abs((s32)DST); do_div(AX, abs((s32)IMM)); if (((s32)DST < 0) == ((s32)IMM < 0)) DST = (u32)AX; else DST = (u32)-AX; break; } CONT; ALU_END_TO_BE: switch (IMM) { case 16: DST = (__force u16) cpu_to_be16(DST); break; case 32: DST = (__force u32) cpu_to_be32(DST); break; case 64: DST = (__force u64) cpu_to_be64(DST); break; } CONT; ALU_END_TO_LE: switch (IMM) { case 16: DST = (__force u16) cpu_to_le16(DST); break; case 32: DST = (__force u32) cpu_to_le32(DST); break; case 64: DST = (__force u64) cpu_to_le64(DST); break; } CONT; ALU64_END_TO_LE: switch (IMM) { case 16: DST = (__force u16) __swab16(DST); break; case 32: DST = (__force u32) __swab32(DST); break; case 64: DST = (__force u64) __swab64(DST); break; } CONT; /* CALL */ JMP_CALL: /* Function call scratches BPF_R1-BPF_R5 registers, * preserves BPF_R6-BPF_R9, and stores return value * into BPF_R0. */ BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3, BPF_R4, BPF_R5); CONT; JMP_CALL_ARGS: BPF_R0 = (__bpf_call_base_args + insn->imm)(BPF_R1, BPF_R2, BPF_R3, BPF_R4, BPF_R5, insn + insn->off + 1); CONT; JMP_TAIL_CALL: { struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct bpf_prog *prog; u32 index = BPF_R3; if (unlikely(index >= array->map.max_entries)) goto out; if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; prog = READ_ONCE(array->ptrs[index]); if (!prog) goto out; /* ARG1 at this point is guaranteed to point to CTX from * the verifier side due to the fact that the tail call is * handled like a helper, that is, bpf_tail_call_proto, * where arg1_type is ARG_PTR_TO_CTX. */ insn = prog->insnsi; goto select_insn; out: CONT; } JMP_JA: insn += insn->off; CONT; JMP32_JA: insn += insn->imm; CONT; JMP_EXIT: return BPF_R0; /* JMP */ #define COND_JMP(SIGN, OPCODE, CMP_OP) \ JMP_##OPCODE##_X: \ if ((SIGN##64) DST CMP_OP (SIGN##64) SRC) { \ insn += insn->off; \ CONT_JMP; \ } \ CONT; \ JMP32_##OPCODE##_X: \ if ((SIGN##32) DST CMP_OP (SIGN##32) SRC) { \ insn += insn->off; \ CONT_JMP; \ } \ CONT; \ JMP_##OPCODE##_K: \ if ((SIGN##64) DST CMP_OP (SIGN##64) IMM) { \ insn += insn->off; \ CONT_JMP; \ } \ CONT; \ JMP32_##OPCODE##_K: \ if ((SIGN##32) DST CMP_OP (SIGN##32) IMM) { \ insn += insn->off; \ CONT_JMP; \ } \ CONT; COND_JMP(u, JEQ, ==) COND_JMP(u, JNE, !=) COND_JMP(u, JGT, >) COND_JMP(u, JLT, <) COND_JMP(u, JGE, >=) COND_JMP(u, JLE, <=) COND_JMP(u, JSET, &) COND_JMP(s, JSGT, >) COND_JMP(s, JSLT, <) COND_JMP(s, JSGE, >=) COND_JMP(s, JSLE, <=) #undef COND_JMP /* ST, STX and LDX*/ ST_NOSPEC: /* Speculation barrier for mitigating Speculative Store Bypass, * Bounds-Check Bypass and Type Confusion. In case of arm64, we * rely on the firmware mitigation as controlled via the ssbd * kernel parameter. Whenever the mitigation is enabled, it * works for all of the kernel code with no need to provide any * additional instructions here. In case of x86, we use 'lfence' * insn for mitigation. We reuse preexisting logic from Spectre * v1 mitigation that happens to produce the required code on * x86 for v4 as well. */ barrier_nospec(); CONT; #define LDST(SIZEOP, SIZE) \ STX_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = SRC; \ CONT; \ ST_MEM_##SIZEOP: \ *(SIZE *)(unsigned long) (DST + insn->off) = IMM; \ CONT; \ LDX_MEM_##SIZEOP: \ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ CONT; \ LDX_PROBE_MEM_##SIZEOP: \ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \ (const void *)(long) (SRC + insn->off)); \ DST = *((SIZE *)&DST); \ CONT; LDST(B, u8) LDST(H, u16) LDST(W, u32) LDST(DW, u64) #undef LDST #define LDSX(SIZEOP, SIZE) \ LDX_MEMSX_##SIZEOP: \ DST = *(SIZE *)(unsigned long) (SRC + insn->off); \ CONT; \ LDX_PROBE_MEMSX_##SIZEOP: \ bpf_probe_read_kernel_common(&DST, sizeof(SIZE), \ (const void *)(long) (SRC + insn->off)); \ DST = *((SIZE *)&DST); \ CONT; LDSX(B, s8) LDSX(H, s16) LDSX(W, s32) #undef LDSX #define ATOMIC_ALU_OP(BOP, KOP) \ case BOP: \ if (BPF_SIZE(insn->code) == BPF_W) \ atomic_##KOP((u32) SRC, (atomic_t *)(unsigned long) \ (DST + insn->off)); \ else if (BPF_SIZE(insn->code) == BPF_DW) \ atomic64_##KOP((u64) SRC, (atomic64_t *)(unsigned long) \ (DST + insn->off)); \ else \ goto default_label; \ break; \ case BOP | BPF_FETCH: \ if (BPF_SIZE(insn->code) == BPF_W) \ SRC = (u32) atomic_fetch_##KOP( \ (u32) SRC, \ (atomic_t *)(unsigned long) (DST + insn->off)); \ else if (BPF_SIZE(insn->code) == BPF_DW) \ SRC = (u64) atomic64_fetch_##KOP( \ (u64) SRC, \ (atomic64_t *)(unsigned long) (DST + insn->off)); \ else \ goto default_label; \ break; STX_ATOMIC_DW: STX_ATOMIC_W: STX_ATOMIC_H: STX_ATOMIC_B: switch (IMM) { /* Atomic read-modify-write instructions support only W and DW * size modifiers. */ ATOMIC_ALU_OP(BPF_ADD, add) ATOMIC_ALU_OP(BPF_AND, and) ATOMIC_ALU_OP(BPF_OR, or) ATOMIC_ALU_OP(BPF_XOR, xor) #undef ATOMIC_ALU_OP case BPF_XCHG: if (BPF_SIZE(insn->code) == BPF_W) SRC = (u32) atomic_xchg( (atomic_t *)(unsigned long) (DST + insn->off), (u32) SRC); else if (BPF_SIZE(insn->code) == BPF_DW) SRC = (u64) atomic64_xchg( (atomic64_t *)(unsigned long) (DST + insn->off), (u64) SRC); else goto default_label; break; case BPF_CMPXCHG: if (BPF_SIZE(insn->code) == BPF_W) BPF_R0 = (u32) atomic_cmpxchg( (atomic_t *)(unsigned long) (DST + insn->off), (u32) BPF_R0, (u32) SRC); else if (BPF_SIZE(insn->code) == BPF_DW) BPF_R0 = (u64) atomic64_cmpxchg( (atomic64_t *)(unsigned long) (DST + insn->off), (u64) BPF_R0, (u64) SRC); else goto default_label; break; /* Atomic load and store instructions support all size * modifiers. */ case BPF_LOAD_ACQ: switch (BPF_SIZE(insn->code)) { #define LOAD_ACQUIRE(SIZEOP, SIZE) \ case BPF_##SIZEOP: \ DST = (SIZE)smp_load_acquire( \ (SIZE *)(unsigned long)(SRC + insn->off)); \ break; LOAD_ACQUIRE(B, u8) LOAD_ACQUIRE(H, u16) LOAD_ACQUIRE(W, u32) #ifdef CONFIG_64BIT LOAD_ACQUIRE(DW, u64) #endif #undef LOAD_ACQUIRE default: goto default_label; } break; case BPF_STORE_REL: switch (BPF_SIZE(insn->code)) { #define STORE_RELEASE(SIZEOP, SIZE) \ case BPF_##SIZEOP: \ smp_store_release( \ (SIZE *)(unsigned long)(DST + insn->off), (SIZE)SRC); \ break; STORE_RELEASE(B, u8) STORE_RELEASE(H, u16) STORE_RELEASE(W, u32) #ifdef CONFIG_64BIT STORE_RELEASE(DW, u64) #endif #undef STORE_RELEASE default: goto default_label; } break; default: goto default_label; } CONT; default_label: /* If we ever reach this, we have a bug somewhere. Die hard here * instead of just returning 0; we could be somewhere in a subprog, * so execution could continue otherwise which we do /not/ want. * * Note, verifier whitelists all opcodes in bpf_opcode_in_insntable(). */ pr_warn("BPF interpreter: unknown opcode %02x (imm: 0x%x)\n", insn->code, insn->imm); BUG_ON(1); return 0; } #define PROG_NAME(stack_size) __bpf_prog_run##stack_size #define DEFINE_BPF_PROG_RUN(stack_size) \ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ u64 regs[MAX_BPF_EXT_REG] = {}; \ \ kmsan_unpoison_memory(stack, sizeof(stack)); \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ return ___bpf_prog_run(regs, insn); \ } #define PROG_NAME_ARGS(stack_size) __bpf_prog_run_args##stack_size #define DEFINE_BPF_PROG_RUN_ARGS(stack_size) \ static u64 PROG_NAME_ARGS(stack_size)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, \ const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ u64 regs[MAX_BPF_EXT_REG]; \ \ kmsan_unpoison_memory(stack, sizeof(stack)); \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ BPF_R1 = r1; \ BPF_R2 = r2; \ BPF_R3 = r3; \ BPF_R4 = r4; \ BPF_R5 = r5; \ return ___bpf_prog_run(regs, insn); \ } #define EVAL1(FN, X) FN(X) #define EVAL2(FN, X, Y...) FN(X) EVAL1(FN, Y) #define EVAL3(FN, X, Y...) FN(X) EVAL2(FN, Y) #define EVAL4(FN, X, Y...) FN(X) EVAL3(FN, Y) #define EVAL5(FN, X, Y...) FN(X) EVAL4(FN, Y) #define EVAL6(FN, X, Y...) FN(X) EVAL5(FN, Y) EVAL6(DEFINE_BPF_PROG_RUN, 32, 64, 96, 128, 160, 192); EVAL6(DEFINE_BPF_PROG_RUN, 224, 256, 288, 320, 352, 384); EVAL4(DEFINE_BPF_PROG_RUN, 416, 448, 480, 512); EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 32, 64, 96, 128, 160, 192); EVAL6(DEFINE_BPF_PROG_RUN_ARGS, 224, 256, 288, 320, 352, 384); EVAL4(DEFINE_BPF_PROG_RUN_ARGS, 416, 448, 480, 512); #define PROG_NAME_LIST(stack_size) PROG_NAME(stack_size), static unsigned int (*interpreters[])(const void *ctx, const struct bpf_insn *insn) = { EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST #define PROG_NAME_LIST(stack_size) PROG_NAME_ARGS(stack_size), static __maybe_unused u64 (*interpreters_args[])(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5, const struct bpf_insn *insn) = { EVAL6(PROG_NAME_LIST, 32, 64, 96, 128, 160, 192) EVAL6(PROG_NAME_LIST, 224, 256, 288, 320, 352, 384) EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) }; #undef PROG_NAME_LIST #ifdef CONFIG_BPF_SYSCALL void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth) { stack_depth = max_t(u32, stack_depth, 1); insn->off = (s16) insn->imm; insn->imm = interpreters_args[(round_up(stack_depth, 32) / 32) - 1] - __bpf_call_base_args; insn->code = BPF_JMP | BPF_CALL_ARGS; } #endif #endif static unsigned int __bpf_prog_ret0_warn(const void *ctx, const struct bpf_insn *insn) { /* If this handler ever gets executed, then BPF_JIT_ALWAYS_ON * is not working properly, or interpreter is being used when * prog->jit_requested is not 0, so warn about it! */ WARN_ON_ONCE(1); return 0; } static bool __bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) { enum bpf_prog_type prog_type = resolve_prog_type(fp); struct bpf_prog_aux *aux = fp->aux; enum bpf_cgroup_storage_type i; bool ret = false; u64 cookie; if (fp->kprobe_override) return ret; spin_lock(&map->owner_lock); /* There's no owner yet where we could check for compatibility. */ if (!map->owner) { map->owner = bpf_map_owner_alloc(map); if (!map->owner) goto err; map->owner->type = prog_type; map->owner->jited = fp->jited; map->owner->xdp_has_frags = aux->xdp_has_frags; map->owner->attach_func_proto = aux->attach_func_proto; for_each_cgroup_storage_type(i) { map->owner->storage_cookie[i] = aux->cgroup_storage[i] ? aux->cgroup_storage[i]->cookie : 0; } ret = true; } else { ret = map->owner->type == prog_type && map->owner->jited == fp->jited && map->owner->xdp_has_frags == aux->xdp_has_frags; for_each_cgroup_storage_type(i) { if (!ret) break; cookie = aux->cgroup_storage[i] ? aux->cgroup_storage[i]->cookie : 0; ret = map->owner->storage_cookie[i] == cookie || !cookie; } if (ret && map->owner->attach_func_proto != aux->attach_func_proto) { switch (prog_type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_STRUCT_OPS: ret = false; break; default: break; } } } err: spin_unlock(&map->owner_lock); return ret; } bool bpf_prog_map_compatible(struct bpf_map *map, const struct bpf_prog *fp) { /* XDP programs inserted into maps are not guaranteed to run on * a particular netdev (and can run outside driver context entirely * in the case of devmap and cpumap). Until device checks * are implemented, prohibit adding dev-bound programs to program maps. */ if (bpf_prog_is_dev_bound(fp->aux)) return false; return __bpf_prog_map_compatible(map, fp); } static int bpf_check_tail_call(const struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; int i, ret = 0; mutex_lock(&aux->used_maps_mutex); for (i = 0; i < aux->used_map_cnt; i++) { struct bpf_map *map = aux->used_maps[i]; if (!map_type_contains_progs(map)) continue; if (!__bpf_prog_map_compatible(map, fp)) { ret = -EINVAL; goto out; } } out: mutex_unlock(&aux->used_maps_mutex); return ret; } static void bpf_prog_select_func(struct bpf_prog *fp) { #ifndef CONFIG_BPF_JIT_ALWAYS_ON u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1); u32 idx = (round_up(stack_depth, 32) / 32) - 1; /* may_goto may cause stack size > 512, leading to idx out-of-bounds. * But for non-JITed programs, we don't need bpf_func, so no bounds * check needed. */ if (!fp->jit_requested && !WARN_ON_ONCE(idx >= ARRAY_SIZE(interpreters))) { fp->bpf_func = interpreters[idx]; } else { fp->bpf_func = __bpf_prog_ret0_warn; } #else fp->bpf_func = __bpf_prog_ret0_warn; #endif } /** * bpf_prog_select_runtime - select exec runtime for BPF program * @fp: bpf_prog populated with BPF program * @err: pointer to error variable * * Try to JIT eBPF program, if JIT is not available, use interpreter. * The BPF program will be executed via bpf_prog_run() function. * * Return: the &fp argument along with &err set to 0 for success or * a negative errno code on failure */ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) { /* In case of BPF to BPF calls, verifier did all the prep * work with regards to JITing, etc. */ bool jit_needed = fp->jit_requested; if (fp->bpf_func) goto finalize; if (IS_ENABLED(CONFIG_BPF_JIT_ALWAYS_ON) || bpf_prog_has_kfunc_call(fp)) jit_needed = true; bpf_prog_select_func(fp); /* eBPF JITs can rewrite the program in case constant * blinding is active. However, in case of error during * blinding, bpf_int_jit_compile() must always return a * valid program, which in this case would simply not * be JITed, but falls back to the interpreter. */ if (!bpf_prog_is_offloaded(fp->aux)) { *err = bpf_prog_alloc_jited_linfo(fp); if (*err) return fp; fp = bpf_int_jit_compile(fp); bpf_prog_jit_attempt_done(fp); if (!fp->jited && jit_needed) { *err = -ENOTSUPP; return fp; } } else { *err = bpf_prog_offload_compile(fp); if (*err) return fp; } finalize: *err = bpf_prog_lock_ro(fp); if (*err) return fp; /* The tail call compatibility check can only be done at * this late stage as we need to determine, if we deal * with JITed or non JITed program concatenations and not * all eBPF JITs might immediately support all features. */ *err = bpf_check_tail_call(fp); return fp; } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); static unsigned int __bpf_prog_ret1(const void *ctx, const struct bpf_insn *insn) { return 1; } static struct bpf_prog_dummy { struct bpf_prog prog; } dummy_bpf_prog = { .prog = { .bpf_func = __bpf_prog_ret1, }, }; struct bpf_empty_prog_array bpf_empty_prog_array = { .null_prog = NULL, }; EXPORT_SYMBOL(bpf_empty_prog_array); struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags) { struct bpf_prog_array *p; if (prog_cnt) p = kzalloc(struct_size(p, items, prog_cnt + 1), flags); else p = &bpf_empty_prog_array.hdr; return p; } void bpf_prog_array_free(struct bpf_prog_array *progs) { if (!progs || progs == &bpf_empty_prog_array.hdr) return; kfree_rcu(progs, rcu); } static void __bpf_prog_array_free_sleepable_cb(struct rcu_head *rcu) { struct bpf_prog_array *progs; /* If RCU Tasks Trace grace period implies RCU grace period, there is * no need to call kfree_rcu(), just call kfree() directly. */ progs = container_of(rcu, struct bpf_prog_array, rcu); if (rcu_trace_implies_rcu_gp()) kfree(progs); else kfree_rcu(progs, rcu); } void bpf_prog_array_free_sleepable(struct bpf_prog_array *progs) { if (!progs || progs == &bpf_empty_prog_array.hdr) return; call_rcu_tasks_trace(&progs->rcu, __bpf_prog_array_free_sleepable_cb); } int bpf_prog_array_length(struct bpf_prog_array *array) { struct bpf_prog_array_item *item; u32 cnt = 0; for (item = array->items; item->prog; item++) if (item->prog != &dummy_bpf_prog.prog) cnt++; return cnt; } bool bpf_prog_array_is_empty(struct bpf_prog_array *array) { struct bpf_prog_array_item *item; for (item = array->items; item->prog; item++) if (item->prog != &dummy_bpf_prog.prog) return false; return true; } static bool bpf_prog_array_copy_core(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt) { struct bpf_prog_array_item *item; int i = 0; for (item = array->items; item->prog; item++) { if (item->prog == &dummy_bpf_prog.prog) continue; prog_ids[i] = item->prog->aux->id; if (++i == request_cnt) { item++; break; } } return !!(item->prog); } int bpf_prog_array_copy_to_user(struct bpf_prog_array *array, __u32 __user *prog_ids, u32 cnt) { unsigned long err = 0; bool nospc; u32 *ids; /* users of this function are doing: * cnt = bpf_prog_array_length(); * if (cnt > 0) * bpf_prog_array_copy_to_user(..., cnt); * so below kcalloc doesn't need extra cnt > 0 check. */ ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); if (!ids) return -ENOMEM; nospc = bpf_prog_array_copy_core(array, ids, cnt); err = copy_to_user(prog_ids, ids, cnt * sizeof(u32)); kfree(ids); if (err) return -EFAULT; if (nospc) return -ENOSPC; return 0; } void bpf_prog_array_delete_safe(struct bpf_prog_array *array, struct bpf_prog *old_prog) { struct bpf_prog_array_item *item; for (item = array->items; item->prog; item++) if (item->prog == old_prog) { WRITE_ONCE(item->prog, &dummy_bpf_prog.prog); break; } } /** * bpf_prog_array_delete_safe_at() - Replaces the program at the given * index into the program array with * a dummy no-op program. * @array: a bpf_prog_array * @index: the index of the program to replace * * Skips over dummy programs, by not counting them, when calculating * the position of the program to replace. * * Return: * * 0 - Success * * -EINVAL - Invalid index value. Must be a non-negative integer. * * -ENOENT - Index out of range */ int bpf_prog_array_delete_safe_at(struct bpf_prog_array *array, int index) { return bpf_prog_array_update_at(array, index, &dummy_bpf_prog.prog); } /** * bpf_prog_array_update_at() - Updates the program at the given index * into the program array. * @array: a bpf_prog_array * @index: the index of the program to update * @prog: the program to insert into the array * * Skips over dummy programs, by not counting them, when calculating * the position of the program to update. * * Return: * * 0 - Success * * -EINVAL - Invalid index value. Must be a non-negative integer. * * -ENOENT - Index out of range */ int bpf_prog_array_update_at(struct bpf_prog_array *array, int index, struct bpf_prog *prog) { struct bpf_prog_array_item *item; if (unlikely(index < 0)) return -EINVAL; for (item = array->items; item->prog; item++) { if (item->prog == &dummy_bpf_prog.prog) continue; if (!index) { WRITE_ONCE(item->prog, prog); return 0; } index--; } return -ENOENT; } int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, u64 bpf_cookie, struct bpf_prog_array **new_array) { int new_prog_cnt, carry_prog_cnt = 0; struct bpf_prog_array_item *existing, *new; struct bpf_prog_array *array; bool found_exclude = false; /* Figure out how many existing progs we need to carry over to * the new array. */ if (old_array) { existing = old_array->items; for (; existing->prog; existing++) { if (existing->prog == exclude_prog) { found_exclude = true; continue; } if (existing->prog != &dummy_bpf_prog.prog) carry_prog_cnt++; if (existing->prog == include_prog) return -EEXIST; } } if (exclude_prog && !found_exclude) return -ENOENT; /* How many progs (not NULL) will be in the new array? */ new_prog_cnt = carry_prog_cnt; if (include_prog) new_prog_cnt += 1; /* Do we have any prog (not NULL) in the new array? */ if (!new_prog_cnt) { *new_array = NULL; return 0; } /* +1 as the end of prog_array is marked with NULL */ array = bpf_prog_array_alloc(new_prog_cnt + 1, GFP_KERNEL); if (!array) return -ENOMEM; new = array->items; /* Fill in the new prog array */ if (carry_prog_cnt) { existing = old_array->items; for (; existing->prog; existing++) { if (existing->prog == exclude_prog || existing->prog == &dummy_bpf_prog.prog) continue; new->prog = existing->prog; new->bpf_cookie = existing->bpf_cookie; new++; } } if (include_prog) { new->prog = include_prog; new->bpf_cookie = bpf_cookie; new++; } new->prog = NULL; *new_array = array; return 0; } int bpf_prog_array_copy_info(struct bpf_prog_array *array, u32 *prog_ids, u32 request_cnt, u32 *prog_cnt) { u32 cnt = 0; if (array) cnt = bpf_prog_array_length(array); *prog_cnt = cnt; /* return early if user requested only program count or nothing to copy */ if (!request_cnt || !cnt) return 0; /* this function is called under trace/bpf_trace.c: bpf_event_mutex */ return bpf_prog_array_copy_core(array, prog_ids, request_cnt) ? -ENOSPC : 0; } void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len) { struct bpf_map *map; bool sleepable; u32 i; sleepable = aux->prog->sleepable; for (i = 0; i < len; i++) { map = used_maps[i]; if (map->ops->map_poke_untrack) map->ops->map_poke_untrack(map, aux); if (sleepable) atomic64_dec(&map->sleepable_refcnt); bpf_map_put(map); } } static void bpf_free_used_maps(struct bpf_prog_aux *aux) { __bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt); kfree(aux->used_maps); } void __bpf_free_used_btfs(struct btf_mod_pair *used_btfs, u32 len) { #ifdef CONFIG_BPF_SYSCALL struct btf_mod_pair *btf_mod; u32 i; for (i = 0; i < len; i++) { btf_mod = &used_btfs[i]; if (btf_mod->module) module_put(btf_mod->module); btf_put(btf_mod->btf); } #endif } static void bpf_free_used_btfs(struct bpf_prog_aux *aux) { __bpf_free_used_btfs(aux->used_btfs, aux->used_btf_cnt); kfree(aux->used_btfs); } static void bpf_prog_free_deferred(struct work_struct *work) { struct bpf_prog_aux *aux; int i; aux = container_of(work, struct bpf_prog_aux, work); #ifdef CONFIG_BPF_SYSCALL bpf_free_kfunc_btf_tab(aux->kfunc_btf_tab); bpf_prog_stream_free(aux->prog); #endif #ifdef CONFIG_CGROUP_BPF if (aux->cgroup_atype != CGROUP_BPF_ATTACH_TYPE_INVALID) bpf_cgroup_atype_put(aux->cgroup_atype); #endif bpf_free_used_maps(aux); bpf_free_used_btfs(aux); if (bpf_prog_is_dev_bound(aux)) bpf_prog_dev_bound_destroy(aux->prog); #ifdef CONFIG_PERF_EVENTS if (aux->prog->has_callchain_buf) put_callchain_buffers(); #endif if (aux->dst_trampoline) bpf_trampoline_put(aux->dst_trampoline); for (i = 0; i < aux->real_func_cnt; i++) { /* We can just unlink the subprog poke descriptor table as * it was originally linked to the main program and is also * released along with it. */ aux->func[i]->aux->poke_tab = NULL; bpf_jit_free(aux->func[i]); } if (aux->real_func_cnt) { kfree(aux->func); bpf_prog_unlock_free(aux->prog); } else { bpf_jit_free(aux->prog); } } void bpf_prog_free(struct bpf_prog *fp) { struct bpf_prog_aux *aux = fp->aux; if (aux->dst_prog) bpf_prog_put(aux->dst_prog); bpf_token_put(aux->token); INIT_WORK(&aux->work, bpf_prog_free_deferred); schedule_work(&aux->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); /* RNG for unprivileged user space with separated state from prandom_u32(). */ static DEFINE_PER_CPU(struct rnd_state, bpf_user_rnd_state); void bpf_user_rnd_init_once(void) { prandom_init_once(&bpf_user_rnd_state); } BPF_CALL_0(bpf_user_rnd_u32) { /* Should someone ever have the rather unwise idea to use some * of the registers passed into this function, then note that * this function is called from native eBPF and classic-to-eBPF * transformations. Register assignments from both sides are * different, f.e. classic always sets fn(ctx, A, X) here. */ struct rnd_state *state; u32 res; state = &get_cpu_var(bpf_user_rnd_state); res = prandom_u32_state(state); put_cpu_var(bpf_user_rnd_state); return res; } BPF_CALL_0(bpf_get_raw_cpu_id) { return raw_smp_processor_id(); } /* Weak definitions of helper functions in case we don't have bpf syscall. */ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; const struct bpf_func_proto bpf_map_delete_elem_proto __weak; const struct bpf_func_proto bpf_map_push_elem_proto __weak; const struct bpf_func_proto bpf_map_pop_elem_proto __weak; const struct bpf_func_proto bpf_map_peek_elem_proto __weak; const struct bpf_func_proto bpf_map_lookup_percpu_elem_proto __weak; const struct bpf_func_proto bpf_spin_lock_proto __weak; const struct bpf_func_proto bpf_spin_unlock_proto __weak; const struct bpf_func_proto bpf_jiffies64_proto __weak; const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak; const struct bpf_func_proto bpf_ktime_get_tai_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_current_ancestor_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_snprintf_btf_proto __weak; const struct bpf_func_proto bpf_seq_printf_btf_proto __weak; const struct bpf_func_proto bpf_set_retval_proto __weak; const struct bpf_func_proto bpf_get_retval_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { return NULL; } const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void) { return NULL; } const struct bpf_func_proto * __weak bpf_get_perf_event_read_value_proto(void) { return NULL; } u64 __weak bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) { return -ENOTSUPP; } EXPORT_SYMBOL_GPL(bpf_event_output); /* Always built-in helper functions. */ const struct bpf_func_proto bpf_tail_call_proto = { .func = NULL, .gpl_only = false, .ret_type = RET_VOID, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; /* Stub for JITs that only support cBPF. eBPF programs are interpreted. * It is encouraged to implement bpf_int_jit_compile() instead, so that * eBPF and implicitly also cBPF can get JITed! */ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) { return prog; } /* Stub for JITs that support eBPF. All cBPF code gets transformed into * eBPF by the kernel and is later compiled by bpf_int_jit_compile(). */ void __weak bpf_jit_compile(struct bpf_prog *prog) { } bool __weak bpf_helper_changes_pkt_data(enum bpf_func_id func_id) { return false; } /* Return TRUE if the JIT backend wants verifier to enable sub-register usage * analysis code and wants explicit zero extension inserted by verifier. * Otherwise, return FALSE. * * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if * you don't override this. JITs that don't want these extra insns can detect * them using insn_is_zext. */ bool __weak bpf_jit_needs_zext(void) { return false; } /* By default, enable the verifier's mitigations against Spectre v1 and v4 for * all archs. The value returned must not change at runtime as there is * currently no support for reloading programs that were loaded without * mitigations. */ bool __weak bpf_jit_bypass_spec_v1(void) { return false; } bool __weak bpf_jit_bypass_spec_v4(void) { return false; } /* Return true if the JIT inlines the call to the helper corresponding to * the imm. * * The verifier will not patch the insn->imm for the call to the helper if * this returns true. */ bool __weak bpf_jit_inlines_helper_call(s32 imm) { return false; } /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ bool __weak bpf_jit_supports_subprog_tailcalls(void) { return false; } bool __weak bpf_jit_supports_percpu_insn(void) { return false; } bool __weak bpf_jit_supports_kfunc_call(void) { return false; } bool __weak bpf_jit_supports_far_kfunc_call(void) { return false; } bool __weak bpf_jit_supports_arena(void) { return false; } bool __weak bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) { return false; } u64 __weak bpf_arch_uaddress_limit(void) { #if defined(CONFIG_64BIT) && defined(CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) return TASK_SIZE; #else return 0; #endif } /* Return TRUE if the JIT backend satisfies the following two conditions: * 1) JIT backend supports atomic_xchg() on pointer-sized words. * 2) Under the specific arch, the implementation of xchg() is the same * as atomic_xchg() on pointer-sized words. */ bool __weak bpf_jit_supports_ptr_xchg(void) { return false; } /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) { return -EFAULT; } int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2) { return -ENOTSUPP; } void * __weak bpf_arch_text_copy(void *dst, void *src, size_t len) { return ERR_PTR(-ENOTSUPP); } int __weak bpf_arch_text_invalidate(void *dst, size_t len) { return -ENOTSUPP; } bool __weak bpf_jit_supports_exceptions(void) { return false; } bool __weak bpf_jit_supports_private_stack(void) { return false; } void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { } bool __weak bpf_jit_supports_timed_may_goto(void) { return false; } u64 __weak arch_bpf_timed_may_goto(void) { return 0; } static noinline void bpf_prog_report_may_goto_violation(void) { #ifdef CONFIG_BPF_SYSCALL struct bpf_stream_stage ss; struct bpf_prog *prog; prog = bpf_prog_find_from_stack(); if (!prog) return; bpf_stream_stage(ss, prog, BPF_STDERR, ({ bpf_stream_printk(ss, "ERROR: Timeout detected for may_goto instruction\n"); bpf_stream_dump_stack(ss); })); #endif } u64 bpf_check_timed_may_goto(struct bpf_timed_may_goto *p) { u64 time = ktime_get_mono_fast_ns(); /* Populate the timestamp for this stack frame, and refresh count. */ if (!p->timestamp) { p->timestamp = time; return BPF_MAX_TIMED_LOOPS; } /* Check if we've exhausted our time slice, and zero count. */ if (unlikely(time - p->timestamp >= (NSEC_PER_SEC / 4))) { bpf_prog_report_may_goto_violation(); return 0; } /* Refresh the count for the stack frame. */ return BPF_MAX_TIMED_LOOPS; } /* for configs without MMU or 32-bit */ __weak const struct bpf_map_ops arena_map_ops; __weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena) { return 0; } __weak u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena) { return 0; } #ifdef CONFIG_BPF_SYSCALL static int __init bpf_global_ma_init(void) { int ret; ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false); bpf_global_ma_set = !ret; return ret; } late_initcall(bpf_global_ma_init); #endif DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key); EXPORT_SYMBOL(bpf_stats_enabled_key); /* All definitions of tracepoints related to BPF. */ #define CREATE_TRACE_POINTS #include <linux/bpf_trace.h> EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx); #ifdef CONFIG_BPF_SYSCALL int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char **filep, const char **linep, int *nump) { int idx = -1, insn_start, insn_end, len; struct bpf_line_info *linfo; void **jited_linfo; struct btf *btf; int nr_linfo; btf = prog->aux->btf; linfo = prog->aux->linfo; jited_linfo = prog->aux->jited_linfo; if (!btf || !linfo || !jited_linfo) return -EINVAL; len = prog->aux->func ? prog->aux->func[prog->aux->func_idx]->len : prog->len; linfo = &prog->aux->linfo[prog->aux->linfo_idx]; jited_linfo = &prog->aux->jited_linfo[prog->aux->linfo_idx]; insn_start = linfo[0].insn_off; insn_end = insn_start + len; nr_linfo = prog->aux->nr_linfo - prog->aux->linfo_idx; for (int i = 0; i < nr_linfo && linfo[i].insn_off >= insn_start && linfo[i].insn_off < insn_end; i++) { if (jited_linfo[i] >= (void *)ip) break; idx = i; } if (idx == -1) return -ENOENT; /* Get base component of the file path. */ *filep = btf_name_by_offset(btf, linfo[idx].file_name_off); *filep = kbasename(*filep); /* Obtain the source line, and strip whitespace in prefix. */ *linep = btf_name_by_offset(btf, linfo[idx].line_off); while (isspace(**linep)) *linep += 1; *nump = BPF_LINE_INFO_LINE_NUM(linfo[idx].line_col); return 0; } struct walk_stack_ctx { struct bpf_prog *prog; }; static bool find_from_stack_cb(void *cookie, u64 ip, u64 sp, u64 bp) { struct walk_stack_ctx *ctxp = cookie; struct bpf_prog *prog; /* * The RCU read lock is held to safely traverse the latch tree, but we * don't need its protection when accessing the prog, since it has an * active stack frame on the current stack trace, and won't disappear. */ rcu_read_lock(); prog = bpf_prog_ksym_find(ip); rcu_read_unlock(); if (!prog) return true; if (bpf_is_subprog(prog)) return true; ctxp->prog = prog; return false; } struct bpf_prog *bpf_prog_find_from_stack(void) { struct walk_stack_ctx ctx = {}; arch_bpf_stack_walk(find_from_stack_cb, &ctx); return ctx.prog; } #endif
450 450 256 255 188 1 19 776 787 249 253 252 218 776 432 832 4 6665 6440 785 54 1310 513 219 6760 6550 6768 752 750 1336 6769 510 238 3324 487 2 3322 1 2 1 252 2 558 559 2 553 342 557 556 767 769 768 770 633 592 9 253 638 4 15 17 643 642 640 642 644 252 253 415 4 641 220 6778 762 6769 6727 472 293 58 602 3316 4559 1929 672 557 673 667 400 423 467 466 111 456 472 457 53 459 455 442 2 3 471 469 38 37 1 35 1 2 3 4 3 3 628 26 412 432 422 38 252 252 2 251 252 251 255 407 412 410 415 251 251 2 252 250 251 36 37 37 36 82 247 1 27 28 83 82 82 83 83 252 253 249 253 36 252 252 253 253 253 253 253 251 251 253 366 484 252 253 251 251 253 252 251 250 252 252 250 252 85 86 253 251 96 167 134 30 168 15 253 48 232 176 59 3 59 1 59 54 8 59 3 231 4 193 42 218 4 217 218 207 749 751 502 79 750 1 1 1 1 55 19 48 55 55 55 25 44 54 253 2 253 253 55 252 238 15 252 252 253 252 253 4 251 251 250 252 252 2 250 249 251 252 249 251 252 252 251 253 13 251 45 25 55 25 44 47 19 54 54 53 1 6574 6625 6593 6099 5204 6623 6583 6439 6571 55 55 54 1 55 55 55 55 54 54 54 54 54 53 1 1 53 54 54 24 45 54 48 18 14 14 216 219 218 218 4 215 200 24 219 219 219 28 198 219 219 218 219 219 219 219 207 217 216 44 23 167 188 23 168 189 166 23 47 219 46 23 166 232 219 19 219 235 235 177 57 234 235 289 427 50 645 23 191 742 752 749 49 750 738 132 133 133 2 2 49 48 49 48 34 1 25 34 15 17 26 322 97 316 308 322 318 318 53 53 53 53 53 49 49 49 15 35 49 48 511 410 218 85 411 125 507 508 366 296 1 510 121 248 333 508 413 124 15 15 16 2 14 2 2 197 99 31 1 180 92 409 90 88 76 14 90 853 232 240 141 90 16 513 48 53 469 528 24 686 80 859 459 49 219 1 18 511 53 234 293 857 468 752 409 708 243 745 636 620 125 553 89 445 529 466 220 198 199 110 138 199 200 200 198 198 588 446 186 192 189 188 9 192 191 446 1031 1028 258 985 989 1030 189 1026 1032 1026 13 174 693 515 623 2 625 628 626 629 630 628 631 624 509 2365 2375 2250 62 808 799 121 2355 754 2352 625 2355 2354 2353 649 689 40 40 40 40 40 37 40 40 40 40 40 12 40 10 5 15 16 40 40 40 40 18 138 138 138 138 138 137 137 3 137 3 1209 1215 258 1734 256 255 18 138 3 197 200 110 138 138 137 137 138 40 40 39 40 40 18 1 40 40 4 4 4 4 4 4 4 4 4 4 189 186 4 4 189 2 189 105 105 106 106 326 328 324 326 517 64 456 15 449 155 513 521 522 240 503 502 858 855 832 308 306 4 435 292 231 11 11 292 293 56 200 197 1033 4 1032 102 100 1025 4 1019 164 159 22 787 21 3 4 4 4 789 261 783 788 1 253 842 854 5 5 5 5 1 2 5 4 1 96 97 97 96 97 525 319 318 1 322 319 322 22 22 21 200 200 97 106 105 106 106 106 106 106 106 104 106 106 105 402 402 290 136 35 5584 5584 64 5585 5597 48 1607 5587 113 5585 515 448 511 446 514 511 515 513 451 446 440 514 510 514 445 449 449 451 450 514 448 448 451 512 449 450 450 447 448 447 448 515 514 449 450 449 447 447 451 514 515 511 509 513 513 515 515 511 515 514 513 514 511 512 515 512 446 515 513 508 512 515 512 515 515 514 515 515 515 514 515 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 7223 7224 7225 7226 7227 7228 7229 7230 7231 7232 7233 7234 7235 7236 7237 7238 7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253 7254 7255 7256 7257 7258 7259 7260 7261 7262 7263 7264 7265 7266 7267 7268 7269 7270 7271 7272 7273 7274 7275 7276 7277 7278 7279 7280 7281 7282 7283 7284 7285 7286 7287 7288 7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315 7316 7317 7318 7319 7320 7321 7322 7323 7324 7325 7326 7327 7328 7329 7330 7331 7332 7333 7334 7335 7336 7337 7338 7339 7340 7341 7342 7343 7344 7345 7346 7347 7348 7349 7350 7351 7352 7353 7354 7355 7356 7357 7358 7359 7360 7361 7362 7363 7364 7365 7366 7367 7368 7369 7370 7371 7372 7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388 7389 7390 7391 7392 7393 7394 7395 7396 7397 7398 7399 7400 7401 7402 7403 7404 7405 7406 7407 7408 7409 7410 7411 7412 7413 7414 7415 7416 7417 7418 7419 7420 7421 7422 7423 7424 7425 7426 7427 7428 7429 7430 7431 7432 7433 7434 7435 7436 7437 7438 7439 7440 7441 7442 7443 7444 7445 7446 7447 7448 7449 7450 7451 7452 7453 7454 7455 7456 7457 7458 7459 7460 7461 7462 7463 7464 7465 7466 7467 7468 7469 7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480 7481 7482 7483 7484 7485 7486 7487 7488 7489 7490 7491 7492 7493 7494 7495 7496 7497 7498 7499 7500 7501 7502 7503 7504 7505 7506 7507 7508 7509 7510 7511 7512 7513 7514 7515 7516 7517 7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531 7532 7533 7534 7535 7536 7537 7538 7539 7540 7541 7542 7543 7544 7545 7546 7547 7548 7549 7550 7551 7552 7553 7554 7555 7556 7557 7558 7559 7560 7561 7562 7563 7564 7565 7566 7567 7568 7569 7570 7571 7572 7573 7574 7575 7576 7577 7578 7579 7580 7581 7582 7583 7584 7585 7586 7587 7588 7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600 7601 7602 7603 7604 7605 7606 7607 7608 7609 7610 7611 7612 7613 7614 7615 7616 7617 7618 7619 7620 7621 7622 7623 7624 7625 7626 7627 7628 7629 7630 7631 7632 7633 7634 7635 7636 7637 7638 7639 7640 7641 7642 7643 7644 7645 7646 7647 7648 7649 7650 7651 7652 7653 7654 7655 7656 7657 7658 7659 7660 7661 7662 7663 7664 7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676 7677 7678 7679 7680 7681 7682 7683 7684 7685 7686 7687 7688 7689 7690 7691 7692 7693 7694 7695 7696 // SPDX-License-Identifier: GPL-2.0+ /* * Maple Tree implementation * Copyright (c) 2018-2022 Oracle Corporation * Authors: Liam R. Howlett <Liam.Howlett@oracle.com> * Matthew Wilcox <willy@infradead.org> * Copyright (c) 2023 ByteDance * Author: Peng Zhang <zhangpeng.00@bytedance.com> */ /* * DOC: Interesting implementation details of the Maple Tree * * Each node type has a number of slots for entries and a number of slots for * pivots. In the case of dense nodes, the pivots are implied by the position * and are simply the slot index + the minimum of the node. * * In regular B-Tree terms, pivots are called keys. The term pivot is used to * indicate that the tree is specifying ranges. Pivots may appear in the * subtree with an entry attached to the value whereas keys are unique to a * specific position of a B-tree. Pivot values are inclusive of the slot with * the same index. * * * The following illustrates the layout of a range64 nodes slots and pivots. * * * Slots -> | 0 | 1 | 2 | ... | 12 | 13 | 14 | 15 | * ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ ┬ * │ │ │ │ │ │ │ │ └─ Implied maximum * │ │ │ │ │ │ │ └─ Pivot 14 * │ │ │ │ │ │ └─ Pivot 13 * │ │ │ │ │ └─ Pivot 12 * │ │ │ │ └─ Pivot 11 * │ │ │ └─ Pivot 2 * │ │ └─ Pivot 1 * │ └─ Pivot 0 * └─ Implied minimum * * Slot contents: * Internal (non-leaf) nodes contain pointers to other nodes. * Leaf nodes contain entries. * * The location of interest is often referred to as an offset. All offsets have * a slot, but the last offset has an implied pivot from the node above (or * UINT_MAX for the root node. * * Ranges complicate certain write activities. When modifying any of * the B-tree variants, it is known that one entry will either be added or * deleted. When modifying the Maple Tree, one store operation may overwrite * the entire data set, or one half of the tree, or the middle half of the tree. * */ #include <linux/maple_tree.h> #include <linux/xarray.h> #include <linux/types.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/limits.h> #include <asm/barrier.h> #define CREATE_TRACE_POINTS #include <trace/events/maple_tree.h> /* * Kernel pointer hashing renders much of the maple tree dump useless as tagged * pointers get hashed to arbitrary values. * * If CONFIG_DEBUG_VM_MAPLE_TREE is set we are in a debug mode where it is * permissible to bypass this. Otherwise remain cautious and retain the hashing. * * Userland doesn't know about %px so also use %p there. */ #if defined(__KERNEL__) && defined(CONFIG_DEBUG_VM_MAPLE_TREE) #define PTR_FMT "%px" #else #define PTR_FMT "%p" #endif #define MA_ROOT_PARENT 1 /* * Maple state flags * * MA_STATE_BULK - Bulk insert mode * * MA_STATE_REBALANCE - Indicate a rebalance during bulk insert * * MA_STATE_PREALLOC - Preallocated nodes, WARN_ON allocation */ #define MA_STATE_BULK 1 #define MA_STATE_REBALANCE 2 #define MA_STATE_PREALLOC 4 #define ma_parent_ptr(x) ((struct maple_pnode *)(x)) #define mas_tree_parent(x) ((unsigned long)(x->tree) | MA_ROOT_PARENT) #define ma_mnode_ptr(x) ((struct maple_node *)(x)) #define ma_enode_ptr(x) ((struct maple_enode *)(x)) static struct kmem_cache *maple_node_cache; #ifdef CONFIG_DEBUG_MAPLE_TREE static const unsigned long mt_max[] = { [maple_dense] = MAPLE_NODE_SLOTS, [maple_leaf_64] = ULONG_MAX, [maple_range_64] = ULONG_MAX, [maple_arange_64] = ULONG_MAX, }; #define mt_node_max(x) mt_max[mte_node_type(x)] #endif static const unsigned char mt_slots[] = { [maple_dense] = MAPLE_NODE_SLOTS, [maple_leaf_64] = MAPLE_RANGE64_SLOTS, [maple_range_64] = MAPLE_RANGE64_SLOTS, [maple_arange_64] = MAPLE_ARANGE64_SLOTS, }; #define mt_slot_count(x) mt_slots[mte_node_type(x)] static const unsigned char mt_pivots[] = { [maple_dense] = 0, [maple_leaf_64] = MAPLE_RANGE64_SLOTS - 1, [maple_range_64] = MAPLE_RANGE64_SLOTS - 1, [maple_arange_64] = MAPLE_ARANGE64_SLOTS - 1, }; #define mt_pivot_count(x) mt_pivots[mte_node_type(x)] static const unsigned char mt_min_slots[] = { [maple_dense] = MAPLE_NODE_SLOTS / 2, [maple_leaf_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, [maple_range_64] = (MAPLE_RANGE64_SLOTS / 2) - 2, [maple_arange_64] = (MAPLE_ARANGE64_SLOTS / 2) - 1, }; #define mt_min_slot_count(x) mt_min_slots[mte_node_type(x)] #define MAPLE_BIG_NODE_SLOTS (MAPLE_RANGE64_SLOTS * 2 + 2) #define MAPLE_BIG_NODE_GAPS (MAPLE_ARANGE64_SLOTS * 2 + 1) struct maple_big_node { unsigned long pivot[MAPLE_BIG_NODE_SLOTS - 1]; union { struct maple_enode *slot[MAPLE_BIG_NODE_SLOTS]; struct { unsigned long padding[MAPLE_BIG_NODE_GAPS]; unsigned long gap[MAPLE_BIG_NODE_GAPS]; }; }; unsigned char b_end; enum maple_type type; }; /* * The maple_subtree_state is used to build a tree to replace a segment of an * existing tree in a more atomic way. Any walkers of the older tree will hit a * dead node and restart on updates. */ struct maple_subtree_state { struct ma_state *orig_l; /* Original left side of subtree */ struct ma_state *orig_r; /* Original right side of subtree */ struct ma_state *l; /* New left side of subtree */ struct ma_state *m; /* New middle of subtree (rare) */ struct ma_state *r; /* New right side of subtree */ struct ma_topiary *free; /* nodes to be freed */ struct ma_topiary *destroy; /* Nodes to be destroyed (walked and freed) */ struct maple_big_node *bn; }; #ifdef CONFIG_KASAN_STACK /* Prevent mas_wr_bnode() from exceeding the stack frame limit */ #define noinline_for_kasan noinline_for_stack #else #define noinline_for_kasan inline #endif /* Functions */ static inline struct maple_node *mt_alloc_one(gfp_t gfp) { return kmem_cache_alloc(maple_node_cache, gfp); } static inline int mt_alloc_bulk(gfp_t gfp, size_t size, void **nodes) { return kmem_cache_alloc_bulk(maple_node_cache, gfp, size, nodes); } static inline void mt_free_one(struct maple_node *node) { kmem_cache_free(maple_node_cache, node); } static inline void mt_free_bulk(size_t size, void __rcu **nodes) { kmem_cache_free_bulk(maple_node_cache, size, (void **)nodes); } /* * ma_free_rcu() - Use rcu callback to free a maple node * @node: The node to free * * The maple tree uses the parent pointer to indicate this node is no longer in * use and will be freed. */ static void ma_free_rcu(struct maple_node *node) { WARN_ON(node->parent != ma_parent_ptr(node)); kfree_rcu(node, rcu); } static void mt_set_height(struct maple_tree *mt, unsigned char height) { unsigned int new_flags = mt->ma_flags; new_flags &= ~MT_FLAGS_HEIGHT_MASK; MT_BUG_ON(mt, height > MAPLE_HEIGHT_MAX); new_flags |= height << MT_FLAGS_HEIGHT_OFFSET; mt->ma_flags = new_flags; } static unsigned int mas_mt_height(struct ma_state *mas) { return mt_height(mas->tree); } static inline unsigned int mt_attr(struct maple_tree *mt) { return mt->ma_flags & ~MT_FLAGS_HEIGHT_MASK; } static __always_inline enum maple_type mte_node_type( const struct maple_enode *entry) { return ((unsigned long)entry >> MAPLE_NODE_TYPE_SHIFT) & MAPLE_NODE_TYPE_MASK; } static __always_inline bool ma_is_dense(const enum maple_type type) { return type < maple_leaf_64; } static __always_inline bool ma_is_leaf(const enum maple_type type) { return type < maple_range_64; } static __always_inline bool mte_is_leaf(const struct maple_enode *entry) { return ma_is_leaf(mte_node_type(entry)); } /* * We also reserve values with the bottom two bits set to '10' which are * below 4096 */ static __always_inline bool mt_is_reserved(const void *entry) { return ((unsigned long)entry < MAPLE_RESERVED_RANGE) && xa_is_internal(entry); } static __always_inline void mas_set_err(struct ma_state *mas, long err) { mas->node = MA_ERROR(err); mas->status = ma_error; } static __always_inline bool mas_is_ptr(const struct ma_state *mas) { return mas->status == ma_root; } static __always_inline bool mas_is_start(const struct ma_state *mas) { return mas->status == ma_start; } static __always_inline bool mas_is_none(const struct ma_state *mas) { return mas->status == ma_none; } static __always_inline bool mas_is_paused(const struct ma_state *mas) { return mas->status == ma_pause; } static __always_inline bool mas_is_overflow(struct ma_state *mas) { return mas->status == ma_overflow; } static inline bool mas_is_underflow(struct ma_state *mas) { return mas->status == ma_underflow; } static __always_inline struct maple_node *mte_to_node( const struct maple_enode *entry) { return (struct maple_node *)((unsigned long)entry & ~MAPLE_NODE_MASK); } /* * mte_to_mat() - Convert a maple encoded node to a maple topiary node. * @entry: The maple encoded node * * Return: a maple topiary pointer */ static inline struct maple_topiary *mte_to_mat(const struct maple_enode *entry) { return (struct maple_topiary *) ((unsigned long)entry & ~MAPLE_NODE_MASK); } /* * mas_mn() - Get the maple state node. * @mas: The maple state * * Return: the maple node (not encoded - bare pointer). */ static inline struct maple_node *mas_mn(const struct ma_state *mas) { return mte_to_node(mas->node); } /* * mte_set_node_dead() - Set a maple encoded node as dead. * @mn: The maple encoded node. */ static inline void mte_set_node_dead(struct maple_enode *mn) { mte_to_node(mn)->parent = ma_parent_ptr(mte_to_node(mn)); smp_wmb(); /* Needed for RCU */ } /* Bit 1 indicates the root is a node */ #define MAPLE_ROOT_NODE 0x02 /* maple_type stored bit 3-6 */ #define MAPLE_ENODE_TYPE_SHIFT 0x03 /* Bit 2 means a NULL somewhere below */ #define MAPLE_ENODE_NULL 0x04 static inline struct maple_enode *mt_mk_node(const struct maple_node *node, enum maple_type type) { return (void *)((unsigned long)node | (type << MAPLE_ENODE_TYPE_SHIFT) | MAPLE_ENODE_NULL); } static inline void *mte_mk_root(const struct maple_enode *node) { return (void *)((unsigned long)node | MAPLE_ROOT_NODE); } static inline void *mte_safe_root(const struct maple_enode *node) { return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE); } static inline void __maybe_unused *mte_set_full(const struct maple_enode *node) { return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL); } static inline void __maybe_unused *mte_clear_full(const struct maple_enode *node) { return (void *)((unsigned long)node | MAPLE_ENODE_NULL); } static inline bool __maybe_unused mte_has_null(const struct maple_enode *node) { return (unsigned long)node & MAPLE_ENODE_NULL; } static __always_inline bool ma_is_root(struct maple_node *node) { return ((unsigned long)node->parent & MA_ROOT_PARENT); } static __always_inline bool mte_is_root(const struct maple_enode *node) { return ma_is_root(mte_to_node(node)); } static inline bool mas_is_root_limits(const struct ma_state *mas) { return !mas->min && mas->max == ULONG_MAX; } static __always_inline bool mt_is_alloc(struct maple_tree *mt) { return (mt->ma_flags & MT_FLAGS_ALLOC_RANGE); } /* * The Parent Pointer * Excluding root, the parent pointer is 256B aligned like all other tree nodes. * When storing a 32 or 64 bit values, the offset can fit into 5 bits. The 16 * bit values need an extra bit to store the offset. This extra bit comes from * a reuse of the last bit in the node type. This is possible by using bit 1 to * indicate if bit 2 is part of the type or the slot. * * Note types: * 0x??1 = Root * 0x?00 = 16 bit nodes * 0x010 = 32 bit nodes * 0x110 = 64 bit nodes * * Slot size and alignment * 0b??1 : Root * 0b?00 : 16 bit values, type in 0-1, slot in 2-7 * 0b010 : 32 bit values, type in 0-2, slot in 3-7 * 0b110 : 64 bit values, type in 0-2, slot in 3-7 */ #define MAPLE_PARENT_ROOT 0x01 #define MAPLE_PARENT_SLOT_SHIFT 0x03 #define MAPLE_PARENT_SLOT_MASK 0xF8 #define MAPLE_PARENT_16B_SLOT_SHIFT 0x02 #define MAPLE_PARENT_16B_SLOT_MASK 0xFC #define MAPLE_PARENT_RANGE64 0x06 #define MAPLE_PARENT_RANGE32 0x04 #define MAPLE_PARENT_NOT_RANGE16 0x02 /* * mte_parent_shift() - Get the parent shift for the slot storage. * @parent: The parent pointer cast as an unsigned long * Return: The shift into that pointer to the star to of the slot */ static inline unsigned long mte_parent_shift(unsigned long parent) { /* Note bit 1 == 0 means 16B */ if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) return MAPLE_PARENT_SLOT_SHIFT; return MAPLE_PARENT_16B_SLOT_SHIFT; } /* * mte_parent_slot_mask() - Get the slot mask for the parent. * @parent: The parent pointer cast as an unsigned long. * Return: The slot mask for that parent. */ static inline unsigned long mte_parent_slot_mask(unsigned long parent) { /* Note bit 1 == 0 means 16B */ if (likely(parent & MAPLE_PARENT_NOT_RANGE16)) return MAPLE_PARENT_SLOT_MASK; return MAPLE_PARENT_16B_SLOT_MASK; } /* * mas_parent_type() - Return the maple_type of the parent from the stored * parent type. * @mas: The maple state * @enode: The maple_enode to extract the parent's enum * Return: The node->parent maple_type */ static inline enum maple_type mas_parent_type(struct ma_state *mas, struct maple_enode *enode) { unsigned long p_type; p_type = (unsigned long)mte_to_node(enode)->parent; if (WARN_ON(p_type & MAPLE_PARENT_ROOT)) return 0; p_type &= MAPLE_NODE_MASK; p_type &= ~mte_parent_slot_mask(p_type); switch (p_type) { case MAPLE_PARENT_RANGE64: /* or MAPLE_PARENT_ARANGE64 */ if (mt_is_alloc(mas->tree)) return maple_arange_64; return maple_range_64; } return 0; } /* * mas_set_parent() - Set the parent node and encode the slot * @mas: The maple state * @enode: The encoded maple node. * @parent: The encoded maple node that is the parent of @enode. * @slot: The slot that @enode resides in @parent. * * Slot number is encoded in the enode->parent bit 3-6 or 2-6, depending on the * parent type. */ static inline void mas_set_parent(struct ma_state *mas, struct maple_enode *enode, const struct maple_enode *parent, unsigned char slot) { unsigned long val = (unsigned long)parent; unsigned long shift; unsigned long type; enum maple_type p_type = mte_node_type(parent); MAS_BUG_ON(mas, p_type == maple_dense); MAS_BUG_ON(mas, p_type == maple_leaf_64); switch (p_type) { case maple_range_64: case maple_arange_64: shift = MAPLE_PARENT_SLOT_SHIFT; type = MAPLE_PARENT_RANGE64; break; default: case maple_dense: case maple_leaf_64: shift = type = 0; break; } val &= ~MAPLE_NODE_MASK; /* Clear all node metadata in parent */ val |= (slot << shift) | type; mte_to_node(enode)->parent = ma_parent_ptr(val); } /* * mte_parent_slot() - get the parent slot of @enode. * @enode: The encoded maple node. * * Return: The slot in the parent node where @enode resides. */ static __always_inline unsigned int mte_parent_slot(const struct maple_enode *enode) { unsigned long val = (unsigned long)mte_to_node(enode)->parent; if (unlikely(val & MA_ROOT_PARENT)) return 0; /* * Okay to use MAPLE_PARENT_16B_SLOT_MASK as the last bit will be lost * by shift if the parent shift is MAPLE_PARENT_SLOT_SHIFT */ return (val & MAPLE_PARENT_16B_SLOT_MASK) >> mte_parent_shift(val); } /* * mte_parent() - Get the parent of @node. * @enode: The encoded maple node. * * Return: The parent maple node. */ static __always_inline struct maple_node *mte_parent(const struct maple_enode *enode) { return (void *)((unsigned long) (mte_to_node(enode)->parent) & ~MAPLE_NODE_MASK); } /* * ma_dead_node() - check if the @enode is dead. * @enode: The encoded maple node * * Return: true if dead, false otherwise. */ static __always_inline bool ma_dead_node(const struct maple_node *node) { struct maple_node *parent; /* Do not reorder reads from the node prior to the parent check */ smp_rmb(); parent = (void *)((unsigned long) node->parent & ~MAPLE_NODE_MASK); return (parent == node); } /* * mte_dead_node() - check if the @enode is dead. * @enode: The encoded maple node * * Return: true if dead, false otherwise. */ static __always_inline bool mte_dead_node(const struct maple_enode *enode) { struct maple_node *node; node = mte_to_node(enode); return ma_dead_node(node); } /* * mas_allocated() - Get the number of nodes allocated in a maple state. * @mas: The maple state * * The ma_state alloc member is overloaded to hold a pointer to the first * allocated node or to the number of requested nodes to allocate. If bit 0 is * set, then the alloc contains the number of requested nodes. If there is an * allocated node, then the total allocated nodes is in that node. * * Return: The total number of nodes allocated */ static inline unsigned long mas_allocated(const struct ma_state *mas) { if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) return 0; return mas->alloc->total; } /* * mas_set_alloc_req() - Set the requested number of allocations. * @mas: the maple state * @count: the number of allocations. * * The requested number of allocations is either in the first allocated node, * located in @mas->alloc->request_count, or directly in @mas->alloc if there is * no allocated node. Set the request either in the node or do the necessary * encoding to store in @mas->alloc directly. */ static inline void mas_set_alloc_req(struct ma_state *mas, unsigned long count) { if (!mas->alloc || ((unsigned long)mas->alloc & 0x1)) { if (!count) mas->alloc = NULL; else mas->alloc = (struct maple_alloc *)(((count) << 1U) | 1U); return; } mas->alloc->request_count = count; } /* * mas_alloc_req() - get the requested number of allocations. * @mas: The maple state * * The alloc count is either stored directly in @mas, or in * @mas->alloc->request_count if there is at least one node allocated. Decode * the request count if it's stored directly in @mas->alloc. * * Return: The allocation request count. */ static inline unsigned int mas_alloc_req(const struct ma_state *mas) { if ((unsigned long)mas->alloc & 0x1) return (unsigned long)(mas->alloc) >> 1; else if (mas->alloc) return mas->alloc->request_count; return 0; } /* * ma_pivots() - Get a pointer to the maple node pivots. * @node: the maple node * @type: the node type * * In the event of a dead node, this array may be %NULL * * Return: A pointer to the maple node pivots */ static inline unsigned long *ma_pivots(struct maple_node *node, enum maple_type type) { switch (type) { case maple_arange_64: return node->ma64.pivot; case maple_range_64: case maple_leaf_64: return node->mr64.pivot; case maple_dense: return NULL; } return NULL; } /* * ma_gaps() - Get a pointer to the maple node gaps. * @node: the maple node * @type: the node type * * Return: A pointer to the maple node gaps */ static inline unsigned long *ma_gaps(struct maple_node *node, enum maple_type type) { switch (type) { case maple_arange_64: return node->ma64.gap; case maple_range_64: case maple_leaf_64: case maple_dense: return NULL; } return NULL; } /* * mas_safe_pivot() - get the pivot at @piv or mas->max. * @mas: The maple state * @pivots: The pointer to the maple node pivots * @piv: The pivot to fetch * @type: The maple node type * * Return: The pivot at @piv within the limit of the @pivots array, @mas->max * otherwise. */ static __always_inline unsigned long mas_safe_pivot(const struct ma_state *mas, unsigned long *pivots, unsigned char piv, enum maple_type type) { if (piv >= mt_pivots[type]) return mas->max; return pivots[piv]; } /* * mas_safe_min() - Return the minimum for a given offset. * @mas: The maple state * @pivots: The pointer to the maple node pivots * @offset: The offset into the pivot array * * Return: The minimum range value that is contained in @offset. */ static inline unsigned long mas_safe_min(struct ma_state *mas, unsigned long *pivots, unsigned char offset) { if (likely(offset)) return pivots[offset - 1] + 1; return mas->min; } /* * mte_set_pivot() - Set a pivot to a value in an encoded maple node. * @mn: The encoded maple node * @piv: The pivot offset * @val: The value of the pivot */ static inline void mte_set_pivot(struct maple_enode *mn, unsigned char piv, unsigned long val) { struct maple_node *node = mte_to_node(mn); enum maple_type type = mte_node_type(mn); BUG_ON(piv >= mt_pivots[type]); switch (type) { case maple_range_64: case maple_leaf_64: node->mr64.pivot[piv] = val; break; case maple_arange_64: node->ma64.pivot[piv] = val; break; case maple_dense: break; } } /* * ma_slots() - Get a pointer to the maple node slots. * @mn: The maple node * @mt: The maple node type * * Return: A pointer to the maple node slots */ static inline void __rcu **ma_slots(struct maple_node *mn, enum maple_type mt) { switch (mt) { case maple_arange_64: return mn->ma64.slot; case maple_range_64: case maple_leaf_64: return mn->mr64.slot; case maple_dense: return mn->slot; } return NULL; } static inline bool mt_write_locked(const struct maple_tree *mt) { return mt_external_lock(mt) ? mt_write_lock_is_held(mt) : lockdep_is_held(&mt->ma_lock); } static __always_inline bool mt_locked(const struct maple_tree *mt) { return mt_external_lock(mt) ? mt_lock_is_held(mt) : lockdep_is_held(&mt->ma_lock); } static __always_inline void *mt_slot(const struct maple_tree *mt, void __rcu **slots, unsigned char offset) { return rcu_dereference_check(slots[offset], mt_locked(mt)); } static __always_inline void *mt_slot_locked(struct maple_tree *mt, void __rcu **slots, unsigned char offset) { return rcu_dereference_protected(slots[offset], mt_write_locked(mt)); } /* * mas_slot_locked() - Get the slot value when holding the maple tree lock. * @mas: The maple state * @slots: The pointer to the slots * @offset: The offset into the slots array to fetch * * Return: The entry stored in @slots at the @offset. */ static __always_inline void *mas_slot_locked(struct ma_state *mas, void __rcu **slots, unsigned char offset) { return mt_slot_locked(mas->tree, slots, offset); } /* * mas_slot() - Get the slot value when not holding the maple tree lock. * @mas: The maple state * @slots: The pointer to the slots * @offset: The offset into the slots array to fetch * * Return: The entry stored in @slots at the @offset */ static __always_inline void *mas_slot(struct ma_state *mas, void __rcu **slots, unsigned char offset) { return mt_slot(mas->tree, slots, offset); } /* * mas_root() - Get the maple tree root. * @mas: The maple state. * * Return: The pointer to the root of the tree */ static __always_inline void *mas_root(struct ma_state *mas) { return rcu_dereference_check(mas->tree->ma_root, mt_locked(mas->tree)); } static inline void *mt_root_locked(struct maple_tree *mt) { return rcu_dereference_protected(mt->ma_root, mt_write_locked(mt)); } /* * mas_root_locked() - Get the maple tree root when holding the maple tree lock. * @mas: The maple state. * * Return: The pointer to the root of the tree */ static inline void *mas_root_locked(struct ma_state *mas) { return mt_root_locked(mas->tree); } static inline struct maple_metadata *ma_meta(struct maple_node *mn, enum maple_type mt) { switch (mt) { case maple_arange_64: return &mn->ma64.meta; default: return &mn->mr64.meta; } } /* * ma_set_meta() - Set the metadata information of a node. * @mn: The maple node * @mt: The maple node type * @offset: The offset of the highest sub-gap in this node. * @end: The end of the data in this node. */ static inline void ma_set_meta(struct maple_node *mn, enum maple_type mt, unsigned char offset, unsigned char end) { struct maple_metadata *meta = ma_meta(mn, mt); meta->gap = offset; meta->end = end; } /* * mt_clear_meta() - clear the metadata information of a node, if it exists * @mt: The maple tree * @mn: The maple node * @type: The maple node type */ static inline void mt_clear_meta(struct maple_tree *mt, struct maple_node *mn, enum maple_type type) { struct maple_metadata *meta; unsigned long *pivots; void __rcu **slots; void *next; switch (type) { case maple_range_64: pivots = mn->mr64.pivot; if (unlikely(pivots[MAPLE_RANGE64_SLOTS - 2])) { slots = mn->mr64.slot; next = mt_slot_locked(mt, slots, MAPLE_RANGE64_SLOTS - 1); if (unlikely((mte_to_node(next) && mte_node_type(next)))) return; /* no metadata, could be node */ } fallthrough; case maple_arange_64: meta = ma_meta(mn, type); break; default: return; } meta->gap = 0; meta->end = 0; } /* * ma_meta_end() - Get the data end of a node from the metadata * @mn: The maple node * @mt: The maple node type */ static inline unsigned char ma_meta_end(struct maple_node *mn, enum maple_type mt) { struct maple_metadata *meta = ma_meta(mn, mt); return meta->end; } /* * ma_meta_gap() - Get the largest gap location of a node from the metadata * @mn: The maple node */ static inline unsigned char ma_meta_gap(struct maple_node *mn) { return mn->ma64.meta.gap; } /* * ma_set_meta_gap() - Set the largest gap location in a nodes metadata * @mn: The maple node * @mt: The maple node type * @offset: The location of the largest gap. */ static inline void ma_set_meta_gap(struct maple_node *mn, enum maple_type mt, unsigned char offset) { struct maple_metadata *meta = ma_meta(mn, mt); meta->gap = offset; } /* * mat_add() - Add a @dead_enode to the ma_topiary of a list of dead nodes. * @mat: the ma_topiary, a linked list of dead nodes. * @dead_enode: the node to be marked as dead and added to the tail of the list * * Add the @dead_enode to the linked list in @mat. */ static inline void mat_add(struct ma_topiary *mat, struct maple_enode *dead_enode) { mte_set_node_dead(dead_enode); mte_to_mat(dead_enode)->next = NULL; if (!mat->tail) { mat->tail = mat->head = dead_enode; return; } mte_to_mat(mat->tail)->next = dead_enode; mat->tail = dead_enode; } static void mt_free_walk(struct rcu_head *head); static void mt_destroy_walk(struct maple_enode *enode, struct maple_tree *mt, bool free); /* * mas_mat_destroy() - Free all nodes and subtrees in a dead list. * @mas: the maple state * @mat: the ma_topiary linked list of dead nodes to free. * * Destroy walk a dead list. */ static void mas_mat_destroy(struct ma_state *mas, struct ma_topiary *mat) { struct maple_enode *next; struct maple_node *node; bool in_rcu = mt_in_rcu(mas->tree); while (mat->head) { next = mte_to_mat(mat->head)->next; node = mte_to_node(mat->head); mt_destroy_walk(mat->head, mas->tree, !in_rcu); if (in_rcu) call_rcu(&node->rcu, mt_free_walk); mat->head = next; } } /* * mas_descend() - Descend into the slot stored in the ma_state. * @mas: the maple state. * * Note: Not RCU safe, only use in write side or debug code. */ static inline void mas_descend(struct ma_state *mas) { enum maple_type type; unsigned long *pivots; struct maple_node *node; void __rcu **slots; node = mas_mn(mas); type = mte_node_type(mas->node); pivots = ma_pivots(node, type); slots = ma_slots(node, type); if (mas->offset) mas->min = pivots[mas->offset - 1] + 1; mas->max = mas_safe_pivot(mas, pivots, mas->offset, type); mas->node = mas_slot(mas, slots, mas->offset); } /* * mte_set_gap() - Set a maple node gap. * @mn: The encoded maple node * @gap: The offset of the gap to set * @val: The gap value */ static inline void mte_set_gap(const struct maple_enode *mn, unsigned char gap, unsigned long val) { switch (mte_node_type(mn)) { default: break; case maple_arange_64: mte_to_node(mn)->ma64.gap[gap] = val; break; } } /* * mas_ascend() - Walk up a level of the tree. * @mas: The maple state * * Sets the @mas->max and @mas->min for the parent node of mas->node. This * may cause several levels of walking up to find the correct min and max. * May find a dead node which will cause a premature return. * Return: 1 on dead node, 0 otherwise */ static int mas_ascend(struct ma_state *mas) { struct maple_enode *p_enode; /* parent enode. */ struct maple_enode *a_enode; /* ancestor enode. */ struct maple_node *a_node; /* ancestor node. */ struct maple_node *p_node; /* parent node. */ unsigned char a_slot; enum maple_type a_type; unsigned long min, max; unsigned long *pivots; bool set_max = false, set_min = false; a_node = mas_mn(mas); if (ma_is_root(a_node)) { mas->offset = 0; return 0; } p_node = mte_parent(mas->node); if (unlikely(a_node == p_node)) return 1; a_type = mas_parent_type(mas, mas->node); mas->offset = mte_parent_slot(mas->node); a_enode = mt_mk_node(p_node, a_type); /* Check to make sure all parent information is still accurate */ if (p_node != mte_parent(mas->node)) return 1; mas->node = a_enode; if (mte_is_root(a_enode)) { mas->max = ULONG_MAX; mas->min = 0; return 0; } min = 0; max = ULONG_MAX; /* * !mas->offset implies that parent node min == mas->min. * mas->offset > 0 implies that we need to walk up to find the * implied pivot min. */ if (!mas->offset) { min = mas->min; set_min = true; } if (mas->max == ULONG_MAX) set_max = true; do { p_enode = a_enode; a_type = mas_parent_type(mas, p_enode); a_node = mte_parent(p_enode); a_slot = mte_parent_slot(p_enode); a_enode = mt_mk_node(a_node, a_type); pivots = ma_pivots(a_node, a_type); if (unlikely(ma_dead_node(a_node))) return 1; if (!set_min && a_slot) { set_min = true; min = pivots[a_slot - 1] + 1; } if (!set_max && a_slot < mt_pivots[a_type]) { set_max = true; max = pivots[a_slot]; } if (unlikely(ma_dead_node(a_node))) return 1; if (unlikely(ma_is_root(a_node))) break; } while (!set_min || !set_max); mas->max = max; mas->min = min; return 0; } /* * mas_pop_node() - Get a previously allocated maple node from the maple state. * @mas: The maple state * * Return: A pointer to a maple node. */ static inline struct maple_node *mas_pop_node(struct ma_state *mas) { struct maple_alloc *ret, *node = mas->alloc; unsigned long total = mas_allocated(mas); unsigned int req = mas_alloc_req(mas); /* nothing or a request pending. */ if (WARN_ON(!total)) return NULL; if (total == 1) { /* single allocation in this ma_state */ mas->alloc = NULL; ret = node; goto single_node; } if (node->node_count == 1) { /* Single allocation in this node. */ mas->alloc = node->slot[0]; mas->alloc->total = node->total - 1; ret = node; goto new_head; } node->total--; ret = node->slot[--node->node_count]; node->slot[node->node_count] = NULL; single_node: new_head: if (req) { req++; mas_set_alloc_req(mas, req); } memset(ret, 0, sizeof(*ret)); return (struct maple_node *)ret; } /* * mas_push_node() - Push a node back on the maple state allocation. * @mas: The maple state * @used: The used maple node * * Stores the maple node back into @mas->alloc for reuse. Updates allocated and * requested node count as necessary. */ static inline void mas_push_node(struct ma_state *mas, struct maple_node *used) { struct maple_alloc *reuse = (struct maple_alloc *)used; struct maple_alloc *head = mas->alloc; unsigned long count; unsigned int requested = mas_alloc_req(mas); count = mas_allocated(mas); reuse->request_count = 0; reuse->node_count = 0; if (count) { if (head->node_count < MAPLE_ALLOC_SLOTS) { head->slot[head->node_count++] = reuse; head->total++; goto done; } reuse->slot[0] = head; reuse->node_count = 1; } reuse->total = count + 1; mas->alloc = reuse; done: if (requested > 1) mas_set_alloc_req(mas, requested - 1); } /* * mas_alloc_nodes() - Allocate nodes into a maple state * @mas: The maple state * @gfp: The GFP Flags */ static inline void mas_alloc_nodes(struct ma_state *mas, gfp_t gfp) { struct maple_alloc *node; unsigned long allocated = mas_allocated(mas); unsigned int requested = mas_alloc_req(mas); unsigned int count; void **slots = NULL; unsigned int max_req = 0; if (!requested) return; mas_set_alloc_req(mas, 0); if (mas->mas_flags & MA_STATE_PREALLOC) { if (allocated) return; WARN_ON(!allocated); } if (!allocated || mas->alloc->node_count == MAPLE_ALLOC_SLOTS) { node = (struct maple_alloc *)mt_alloc_one(gfp); if (!node) goto nomem_one; if (allocated) { node->slot[0] = mas->alloc; node->node_count = 1; } else { node->node_count = 0; } mas->alloc = node; node->total = ++allocated; node->request_count = 0; requested--; } node = mas->alloc; while (requested) { max_req = MAPLE_ALLOC_SLOTS - node->node_count; slots = (void **)&node->slot[node->node_count]; max_req = min(requested, max_req); count = mt_alloc_bulk(gfp, max_req, slots); if (!count) goto nomem_bulk; if (node->node_count == 0) { node->slot[0]->node_count = 0; node->slot[0]->request_count = 0; } node->node_count += count; allocated += count; /* find a non-full node*/ do { node = node->slot[0]; } while (unlikely(node->node_count == MAPLE_ALLOC_SLOTS)); requested -= count; } mas->alloc->total = allocated; return; nomem_bulk: /* Clean up potential freed allocations on bulk failure */ memset(slots, 0, max_req * sizeof(unsigned long)); mas->alloc->total = allocated; nomem_one: mas_set_alloc_req(mas, requested); mas_set_err(mas, -ENOMEM); } /* * mas_free() - Free an encoded maple node * @mas: The maple state * @used: The encoded maple node to free. * * Uses rcu free if necessary, pushes @used back on the maple state allocations * otherwise. */ static inline void mas_free(struct ma_state *mas, struct maple_enode *used) { struct maple_node *tmp = mte_to_node(used); if (mt_in_rcu(mas->tree)) ma_free_rcu(tmp); else mas_push_node(mas, tmp); } /* * mas_node_count_gfp() - Check if enough nodes are allocated and request more * if there is not enough nodes. * @mas: The maple state * @count: The number of nodes needed * @gfp: the gfp flags */ static void mas_node_count_gfp(struct ma_state *mas, int count, gfp_t gfp) { unsigned long allocated = mas_allocated(mas); if (allocated < count) { mas_set_alloc_req(mas, count - allocated); mas_alloc_nodes(mas, gfp); } } /* * mas_node_count() - Check if enough nodes are allocated and request more if * there is not enough nodes. * @mas: The maple state * @count: The number of nodes needed * * Note: Uses GFP_NOWAIT for gfp flags. */ static void mas_node_count(struct ma_state *mas, int count) { return mas_node_count_gfp(mas, count, GFP_NOWAIT); } /* * mas_start() - Sets up maple state for operations. * @mas: The maple state. * * If mas->status == ma_start, then set the min, max and depth to * defaults. * * Return: * - If mas->node is an error or not mas_start, return NULL. * - If it's an empty tree: NULL & mas->status == ma_none * - If it's a single entry: The entry & mas->status == ma_root * - If it's a tree: NULL & mas->status == ma_active */ static inline struct maple_enode *mas_start(struct ma_state *mas) { if (likely(mas_is_start(mas))) { struct maple_enode *root; mas->min = 0; mas->max = ULONG_MAX; retry: mas->depth = 0; root = mas_root(mas); /* Tree with nodes */ if (likely(xa_is_node(root))) { mas->depth = 0; mas->status = ma_active; mas->node = mte_safe_root(root); mas->offset = 0; if (mte_dead_node(mas->node)) goto retry; return NULL; } mas->node = NULL; /* empty tree */ if (unlikely(!root)) { mas->status = ma_none; mas->offset = MAPLE_NODE_SLOTS; return NULL; } /* Single entry tree */ mas->status = ma_root; mas->offset = MAPLE_NODE_SLOTS; /* Single entry tree. */ if (mas->index > 0) return NULL; return root; } return NULL; } /* * ma_data_end() - Find the end of the data in a node. * @node: The maple node * @type: The maple node type * @pivots: The array of pivots in the node * @max: The maximum value in the node * * Uses metadata to find the end of the data when possible. * Return: The zero indexed last slot with data (may be null). */ static __always_inline unsigned char ma_data_end(struct maple_node *node, enum maple_type type, unsigned long *pivots, unsigned long max) { unsigned char offset; if (!pivots) return 0; if (type == maple_arange_64) return ma_meta_end(node, type); offset = mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); if (likely(pivots[offset] == max)) return offset; return mt_pivots[type]; } /* * mas_data_end() - Find the end of the data (slot). * @mas: the maple state * * This method is optimized to check the metadata of a node if the node type * supports data end metadata. * * Return: The zero indexed last slot with data (may be null). */ static inline unsigned char mas_data_end(struct ma_state *mas) { enum maple_type type; struct maple_node *node; unsigned char offset; unsigned long *pivots; type = mte_node_type(mas->node); node = mas_mn(mas); if (type == maple_arange_64) return ma_meta_end(node, type); pivots = ma_pivots(node, type); if (unlikely(ma_dead_node(node))) return 0; offset = mt_pivots[type] - 1; if (likely(!pivots[offset])) return ma_meta_end(node, type); if (likely(pivots[offset] == mas->max)) return offset; return mt_pivots[type]; } /* * mas_leaf_max_gap() - Returns the largest gap in a leaf node * @mas: the maple state * * Return: The maximum gap in the leaf. */ static unsigned long mas_leaf_max_gap(struct ma_state *mas) { enum maple_type mt; unsigned long pstart, gap, max_gap; struct maple_node *mn; unsigned long *pivots; void __rcu **slots; unsigned char i; unsigned char max_piv; mt = mte_node_type(mas->node); mn = mas_mn(mas); slots = ma_slots(mn, mt); max_gap = 0; if (unlikely(ma_is_dense(mt))) { gap = 0; for (i = 0; i < mt_slots[mt]; i++) { if (slots[i]) { if (gap > max_gap) max_gap = gap; gap = 0; } else { gap++; } } if (gap > max_gap) max_gap = gap; return max_gap; } /* * Check the first implied pivot optimizes the loop below and slot 1 may * be skipped if there is a gap in slot 0. */ pivots = ma_pivots(mn, mt); if (likely(!slots[0])) { max_gap = pivots[0] - mas->min + 1; i = 2; } else { i = 1; } /* reduce max_piv as the special case is checked before the loop */ max_piv = ma_data_end(mn, mt, pivots, mas->max) - 1; /* * Check end implied pivot which can only be a gap on the right most * node. */ if (unlikely(mas->max == ULONG_MAX) && !slots[max_piv + 1]) { gap = ULONG_MAX - pivots[max_piv]; if (gap > max_gap) max_gap = gap; if (max_gap > pivots[max_piv] - mas->min) return max_gap; } for (; i <= max_piv; i++) { /* data == no gap. */ if (likely(slots[i])) continue; pstart = pivots[i - 1]; gap = pivots[i] - pstart; if (gap > max_gap) max_gap = gap; /* There cannot be two gaps in a row. */ i++; } return max_gap; } /* * ma_max_gap() - Get the maximum gap in a maple node (non-leaf) * @node: The maple node * @gaps: The pointer to the gaps * @mt: The maple node type * @off: Pointer to store the offset location of the gap. * * Uses the metadata data end to scan backwards across set gaps. * * Return: The maximum gap value */ static inline unsigned long ma_max_gap(struct maple_node *node, unsigned long *gaps, enum maple_type mt, unsigned char *off) { unsigned char offset, i; unsigned long max_gap = 0; i = offset = ma_meta_end(node, mt); do { if (gaps[i] > max_gap) { max_gap = gaps[i]; offset = i; } } while (i--); *off = offset; return max_gap; } /* * mas_max_gap() - find the largest gap in a non-leaf node and set the slot. * @mas: The maple state. * * Return: The gap value. */ static inline unsigned long mas_max_gap(struct ma_state *mas) { unsigned long *gaps; unsigned char offset; enum maple_type mt; struct maple_node *node; mt = mte_node_type(mas->node); if (ma_is_leaf(mt)) return mas_leaf_max_gap(mas); node = mas_mn(mas); MAS_BUG_ON(mas, mt != maple_arange_64); offset = ma_meta_gap(node); gaps = ma_gaps(node, mt); return gaps[offset]; } /* * mas_parent_gap() - Set the parent gap and any gaps above, as needed * @mas: The maple state * @offset: The gap offset in the parent to set * @new: The new gap value. * * Set the parent gap then continue to set the gap upwards, using the metadata * of the parent to see if it is necessary to check the node above. */ static inline void mas_parent_gap(struct ma_state *mas, unsigned char offset, unsigned long new) { unsigned long meta_gap = 0; struct maple_node *pnode; struct maple_enode *penode; unsigned long *pgaps; unsigned char meta_offset; enum maple_type pmt; pnode = mte_parent(mas->node); pmt = mas_parent_type(mas, mas->node); penode = mt_mk_node(pnode, pmt); pgaps = ma_gaps(pnode, pmt); ascend: MAS_BUG_ON(mas, pmt != maple_arange_64); meta_offset = ma_meta_gap(pnode); meta_gap = pgaps[meta_offset]; pgaps[offset] = new; if (meta_gap == new) return; if (offset != meta_offset) { if (meta_gap > new) return; ma_set_meta_gap(pnode, pmt, offset); } else if (new < meta_gap) { new = ma_max_gap(pnode, pgaps, pmt, &meta_offset); ma_set_meta_gap(pnode, pmt, meta_offset); } if (ma_is_root(pnode)) return; /* Go to the parent node. */ pnode = mte_parent(penode); pmt = mas_parent_type(mas, penode); pgaps = ma_gaps(pnode, pmt); offset = mte_parent_slot(penode); penode = mt_mk_node(pnode, pmt); goto ascend; } /* * mas_update_gap() - Update a nodes gaps and propagate up if necessary. * @mas: the maple state. */ static inline void mas_update_gap(struct ma_state *mas) { unsigned char pslot; unsigned long p_gap; unsigned long max_gap; if (!mt_is_alloc(mas->tree)) return; if (mte_is_root(mas->node)) return; max_gap = mas_max_gap(mas); pslot = mte_parent_slot(mas->node); p_gap = ma_gaps(mte_parent(mas->node), mas_parent_type(mas, mas->node))[pslot]; if (p_gap != max_gap) mas_parent_gap(mas, pslot, max_gap); } /* * mas_adopt_children() - Set the parent pointer of all nodes in @parent to * @parent with the slot encoded. * @mas: the maple state (for the tree) * @parent: the maple encoded node containing the children. */ static inline void mas_adopt_children(struct ma_state *mas, struct maple_enode *parent) { enum maple_type type = mte_node_type(parent); struct maple_node *node = mte_to_node(parent); void __rcu **slots = ma_slots(node, type); unsigned long *pivots = ma_pivots(node, type); struct maple_enode *child; unsigned char offset; offset = ma_data_end(node, type, pivots, mas->max); do { child = mas_slot_locked(mas, slots, offset); mas_set_parent(mas, child, parent, offset); } while (offset--); } /* * mas_put_in_tree() - Put a new node in the tree, smp_wmb(), and mark the old * node as dead. * @mas: the maple state with the new node * @old_enode: The old maple encoded node to replace. * @new_height: if we are inserting a root node, update the height of the tree */ static inline void mas_put_in_tree(struct ma_state *mas, struct maple_enode *old_enode, char new_height) __must_hold(mas->tree->ma_lock) { unsigned char offset; void __rcu **slots; if (mte_is_root(mas->node)) { mas_mn(mas)->parent = ma_parent_ptr(mas_tree_parent(mas)); rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); mt_set_height(mas->tree, new_height); } else { offset = mte_parent_slot(mas->node); slots = ma_slots(mte_parent(mas->node), mas_parent_type(mas, mas->node)); rcu_assign_pointer(slots[offset], mas->node); } mte_set_node_dead(old_enode); } /* * mas_replace_node() - Replace a node by putting it in the tree, marking it * dead, and freeing it. * the parent encoding to locate the maple node in the tree. * @mas: the ma_state with @mas->node pointing to the new node. * @old_enode: The old maple encoded node. * @new_height: The new height of the tree as a result of the operation */ static inline void mas_replace_node(struct ma_state *mas, struct maple_enode *old_enode, unsigned char new_height) __must_hold(mas->tree->ma_lock) { mas_put_in_tree(mas, old_enode, new_height); mas_free(mas, old_enode); } /* * mas_find_child() - Find a child who has the parent @mas->node. * @mas: the maple state with the parent. * @child: the maple state to store the child. */ static inline bool mas_find_child(struct ma_state *mas, struct ma_state *child) __must_hold(mas->tree->ma_lock) { enum maple_type mt; unsigned char offset; unsigned char end; unsigned long *pivots; struct maple_enode *entry; struct maple_node *node; void __rcu **slots; mt = mte_node_type(mas->node); node = mas_mn(mas); slots = ma_slots(node, mt); pivots = ma_pivots(node, mt); end = ma_data_end(node, mt, pivots, mas->max); for (offset = mas->offset; offset <= end; offset++) { entry = mas_slot_locked(mas, slots, offset); if (mte_parent(entry) == node) { *child = *mas; mas->offset = offset + 1; child->offset = offset; mas_descend(child); child->offset = 0; return true; } } return false; } /* * mab_shift_right() - Shift the data in mab right. Note, does not clean out the * old data or set b_node->b_end. * @b_node: the maple_big_node * @shift: the shift count */ static inline void mab_shift_right(struct maple_big_node *b_node, unsigned char shift) { unsigned long size = b_node->b_end * sizeof(unsigned long); memmove(b_node->pivot + shift, b_node->pivot, size); memmove(b_node->slot + shift, b_node->slot, size); if (b_node->type == maple_arange_64) memmove(b_node->gap + shift, b_node->gap, size); } /* * mab_middle_node() - Check if a middle node is needed (unlikely) * @b_node: the maple_big_node that contains the data. * @split: the potential split location * @slot_count: the size that can be stored in a single node being considered. * * Return: true if a middle node is required. */ static inline bool mab_middle_node(struct maple_big_node *b_node, int split, unsigned char slot_count) { unsigned char size = b_node->b_end; if (size >= 2 * slot_count) return true; if (!b_node->slot[split] && (size >= 2 * slot_count - 1)) return true; return false; } /* * mab_no_null_split() - ensure the split doesn't fall on a NULL * @b_node: the maple_big_node with the data * @split: the suggested split location * @slot_count: the number of slots in the node being considered. * * Return: the split location. */ static inline int mab_no_null_split(struct maple_big_node *b_node, unsigned char split, unsigned char slot_count) { if (!b_node->slot[split]) { /* * If the split is less than the max slot && the right side will * still be sufficient, then increment the split on NULL. */ if ((split < slot_count - 1) && (b_node->b_end - split) > (mt_min_slots[b_node->type])) split++; else split--; } return split; } /* * mab_calc_split() - Calculate the split location and if there needs to be two * splits. * @mas: The maple state * @bn: The maple_big_node with the data * @mid_split: The second split, if required. 0 otherwise. * * Return: The first split location. The middle split is set in @mid_split. */ static inline int mab_calc_split(struct ma_state *mas, struct maple_big_node *bn, unsigned char *mid_split) { unsigned char b_end = bn->b_end; int split = b_end / 2; /* Assume equal split. */ unsigned char slot_count = mt_slots[bn->type]; /* * To support gap tracking, all NULL entries are kept together and a node cannot * end on a NULL entry, with the exception of the left-most leaf. The * limitation means that the split of a node must be checked for this condition * and be able to put more data in one direction or the other. */ if (unlikely((mas->mas_flags & MA_STATE_BULK))) { *mid_split = 0; split = b_end - mt_min_slots[bn->type]; if (!ma_is_leaf(bn->type)) return split; mas->mas_flags |= MA_STATE_REBALANCE; if (!bn->slot[split]) split--; return split; } /* * Although extremely rare, it is possible to enter what is known as the 3-way * split scenario. The 3-way split comes about by means of a store of a range * that overwrites the end and beginning of two full nodes. The result is a set * of entries that cannot be stored in 2 nodes. Sometimes, these two nodes can * also be located in different parent nodes which are also full. This can * carry upwards all the way to the root in the worst case. */ if (unlikely(mab_middle_node(bn, split, slot_count))) { split = b_end / 3; *mid_split = split * 2; } else { *mid_split = 0; } /* Avoid ending a node on a NULL entry */ split = mab_no_null_split(bn, split, slot_count); if (unlikely(*mid_split)) *mid_split = mab_no_null_split(bn, *mid_split, slot_count); return split; } /* * mas_mab_cp() - Copy data from a maple state inclusively to a maple_big_node * and set @b_node->b_end to the next free slot. * @mas: The maple state * @mas_start: The starting slot to copy * @mas_end: The end slot to copy (inclusively) * @b_node: The maple_big_node to place the data * @mab_start: The starting location in maple_big_node to store the data. */ static inline void mas_mab_cp(struct ma_state *mas, unsigned char mas_start, unsigned char mas_end, struct maple_big_node *b_node, unsigned char mab_start) { enum maple_type mt; struct maple_node *node; void __rcu **slots; unsigned long *pivots, *gaps; int i = mas_start, j = mab_start; unsigned char piv_end; node = mas_mn(mas); mt = mte_node_type(mas->node); pivots = ma_pivots(node, mt); if (!i) { b_node->pivot[j] = pivots[i++]; if (unlikely(i > mas_end)) goto complete; j++; } piv_end = min(mas_end, mt_pivots[mt]); for (; i < piv_end; i++, j++) { b_node->pivot[j] = pivots[i]; if (unlikely(!b_node->pivot[j])) goto complete; if (unlikely(mas->max == b_node->pivot[j])) goto complete; } b_node->pivot[j] = mas_safe_pivot(mas, pivots, i, mt); complete: b_node->b_end = ++j; j -= mab_start; slots = ma_slots(node, mt); memcpy(b_node->slot + mab_start, slots + mas_start, sizeof(void *) * j); if (!ma_is_leaf(mt) && mt_is_alloc(mas->tree)) { gaps = ma_gaps(node, mt); memcpy(b_node->gap + mab_start, gaps + mas_start, sizeof(unsigned long) * j); } } /* * mas_leaf_set_meta() - Set the metadata of a leaf if possible. * @node: The maple node * @mt: The maple type * @end: The node end */ static inline void mas_leaf_set_meta(struct maple_node *node, enum maple_type mt, unsigned char end) { if (end < mt_slots[mt] - 1) ma_set_meta(node, mt, 0, end); } /* * mab_mas_cp() - Copy data from maple_big_node to a maple encoded node. * @b_node: the maple_big_node that has the data * @mab_start: the start location in @b_node. * @mab_end: The end location in @b_node (inclusively) * @mas: The maple state with the maple encoded node. */ static inline void mab_mas_cp(struct maple_big_node *b_node, unsigned char mab_start, unsigned char mab_end, struct ma_state *mas, bool new_max) { int i, j = 0; enum maple_type mt = mte_node_type(mas->node); struct maple_node *node = mte_to_node(mas->node); void __rcu **slots = ma_slots(node, mt); unsigned long *pivots = ma_pivots(node, mt); unsigned long *gaps = NULL; unsigned char end; if (mab_end - mab_start > mt_pivots[mt]) mab_end--; if (!pivots[mt_pivots[mt] - 1]) slots[mt_pivots[mt]] = NULL; i = mab_start; do { pivots[j++] = b_node->pivot[i++]; } while (i <= mab_end && likely(b_node->pivot[i])); memcpy(slots, b_node->slot + mab_start, sizeof(void *) * (i - mab_start)); if (new_max) mas->max = b_node->pivot[i - 1]; end = j - 1; if (likely(!ma_is_leaf(mt) && mt_is_alloc(mas->tree))) { unsigned long max_gap = 0; unsigned char offset = 0; gaps = ma_gaps(node, mt); do { gaps[--j] = b_node->gap[--i]; if (gaps[j] > max_gap) { offset = j; max_gap = gaps[j]; } } while (j); ma_set_meta(node, mt, offset, end); } else { mas_leaf_set_meta(node, mt, end); } } /* * mas_bulk_rebalance() - Rebalance the end of a tree after a bulk insert. * @mas: The maple state * @end: The maple node end * @mt: The maple node type */ static inline void mas_bulk_rebalance(struct ma_state *mas, unsigned char end, enum maple_type mt) { if (!(mas->mas_flags & MA_STATE_BULK)) return; if (mte_is_root(mas->node)) return; if (end > mt_min_slots[mt]) { mas->mas_flags &= ~MA_STATE_REBALANCE; return; } } /* * mas_store_b_node() - Store an @entry into the b_node while also copying the * data from a maple encoded node. * @wr_mas: the maple write state * @b_node: the maple_big_node to fill with data * @offset_end: the offset to end copying * * Return: The actual end of the data stored in @b_node */ static noinline_for_kasan void mas_store_b_node(struct ma_wr_state *wr_mas, struct maple_big_node *b_node, unsigned char offset_end) { unsigned char slot; unsigned char b_end; /* Possible underflow of piv will wrap back to 0 before use. */ unsigned long piv; struct ma_state *mas = wr_mas->mas; b_node->type = wr_mas->type; b_end = 0; slot = mas->offset; if (slot) { /* Copy start data up to insert. */ mas_mab_cp(mas, 0, slot - 1, b_node, 0); b_end = b_node->b_end; piv = b_node->pivot[b_end - 1]; } else piv = mas->min - 1; if (piv + 1 < mas->index) { /* Handle range starting after old range */ b_node->slot[b_end] = wr_mas->content; if (!wr_mas->content) b_node->gap[b_end] = mas->index - 1 - piv; b_node->pivot[b_end++] = mas->index - 1; } /* Store the new entry. */ mas->offset = b_end; b_node->slot[b_end] = wr_mas->entry; b_node->pivot[b_end] = mas->last; /* Appended. */ if (mas->last >= mas->max) goto b_end; /* Handle new range ending before old range ends */ piv = mas_safe_pivot(mas, wr_mas->pivots, offset_end, wr_mas->type); if (piv > mas->last) { if (piv == ULONG_MAX) mas_bulk_rebalance(mas, b_node->b_end, wr_mas->type); if (offset_end != slot) wr_mas->content = mas_slot_locked(mas, wr_mas->slots, offset_end); b_node->slot[++b_end] = wr_mas->content; if (!wr_mas->content) b_node->gap[b_end] = piv - mas->last + 1; b_node->pivot[b_end] = piv; } slot = offset_end + 1; if (slot > mas->end) goto b_end; /* Copy end data to the end of the node. */ mas_mab_cp(mas, slot, mas->end + 1, b_node, ++b_end); b_node->b_end--; return; b_end: b_node->b_end = b_end; } /* * mas_prev_sibling() - Find the previous node with the same parent. * @mas: the maple state * * Return: True if there is a previous sibling, false otherwise. */ static inline bool mas_prev_sibling(struct ma_state *mas) { unsigned int p_slot = mte_parent_slot(mas->node); /* For root node, p_slot is set to 0 by mte_parent_slot(). */ if (!p_slot) return false; mas_ascend(mas); mas->offset = p_slot - 1; mas_descend(mas); return true; } /* * mas_next_sibling() - Find the next node with the same parent. * @mas: the maple state * * Return: true if there is a next sibling, false otherwise. */ static inline bool mas_next_sibling(struct ma_state *mas) { MA_STATE(parent, mas->tree, mas->index, mas->last); if (mte_is_root(mas->node)) return false; parent = *mas; mas_ascend(&parent); parent.offset = mte_parent_slot(mas->node) + 1; if (parent.offset > mas_data_end(&parent)) return false; *mas = parent; mas_descend(mas); return true; } /* * mas_node_or_none() - Set the enode and state. * @mas: the maple state * @enode: The encoded maple node. * * Set the node to the enode and the status. */ static inline void mas_node_or_none(struct ma_state *mas, struct maple_enode *enode) { if (enode) { mas->node = enode; mas->status = ma_active; } else { mas->node = NULL; mas->status = ma_none; } } /* * mas_wr_node_walk() - Find the correct offset for the index in the @mas. * If @mas->index cannot be found within the containing * node, we traverse to the last entry in the node. * @wr_mas: The maple write state * * Uses mas_slot_locked() and does not need to worry about dead nodes. */ static inline void mas_wr_node_walk(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; unsigned char count, offset; if (unlikely(ma_is_dense(wr_mas->type))) { wr_mas->r_max = wr_mas->r_min = mas->index; mas->offset = mas->index = mas->min; return; } wr_mas->node = mas_mn(wr_mas->mas); wr_mas->pivots = ma_pivots(wr_mas->node, wr_mas->type); count = mas->end = ma_data_end(wr_mas->node, wr_mas->type, wr_mas->pivots, mas->max); offset = mas->offset; while (offset < count && mas->index > wr_mas->pivots[offset]) offset++; wr_mas->r_max = offset < count ? wr_mas->pivots[offset] : mas->max; wr_mas->r_min = mas_safe_min(mas, wr_mas->pivots, offset); wr_mas->offset_end = mas->offset = offset; } /* * mast_rebalance_next() - Rebalance against the next node * @mast: The maple subtree state */ static inline void mast_rebalance_next(struct maple_subtree_state *mast) { unsigned char b_end = mast->bn->b_end; mas_mab_cp(mast->orig_r, 0, mt_slot_count(mast->orig_r->node), mast->bn, b_end); mast->orig_r->last = mast->orig_r->max; } /* * mast_rebalance_prev() - Rebalance against the previous node * @mast: The maple subtree state */ static inline void mast_rebalance_prev(struct maple_subtree_state *mast) { unsigned char end = mas_data_end(mast->orig_l) + 1; unsigned char b_end = mast->bn->b_end; mab_shift_right(mast->bn, end); mas_mab_cp(mast->orig_l, 0, end - 1, mast->bn, 0); mast->l->min = mast->orig_l->min; mast->orig_l->index = mast->orig_l->min; mast->bn->b_end = end + b_end; mast->l->offset += end; } /* * mast_spanning_rebalance() - Rebalance nodes with nearest neighbour favouring * the node to the right. Checking the nodes to the right then the left at each * level upwards until root is reached. * Data is copied into the @mast->bn. * @mast: The maple_subtree_state. */ static inline bool mast_spanning_rebalance(struct maple_subtree_state *mast) { struct ma_state r_tmp = *mast->orig_r; struct ma_state l_tmp = *mast->orig_l; unsigned char depth = 0; do { mas_ascend(mast->orig_r); mas_ascend(mast->orig_l); depth++; if (mast->orig_r->offset < mas_data_end(mast->orig_r)) { mast->orig_r->offset++; do { mas_descend(mast->orig_r); mast->orig_r->offset = 0; } while (--depth); mast_rebalance_next(mast); *mast->orig_l = l_tmp; return true; } else if (mast->orig_l->offset != 0) { mast->orig_l->offset--; do { mas_descend(mast->orig_l); mast->orig_l->offset = mas_data_end(mast->orig_l); } while (--depth); mast_rebalance_prev(mast); *mast->orig_r = r_tmp; return true; } } while (!mte_is_root(mast->orig_r->node)); *mast->orig_r = r_tmp; *mast->orig_l = l_tmp; return false; } /* * mast_ascend() - Ascend the original left and right maple states. * @mast: the maple subtree state. * * Ascend the original left and right sides. Set the offsets to point to the * data already in the new tree (@mast->l and @mast->r). */ static inline void mast_ascend(struct maple_subtree_state *mast) { MA_WR_STATE(wr_mas, mast->orig_r, NULL); mas_ascend(mast->orig_l); mas_ascend(mast->orig_r); mast->orig_r->offset = 0; mast->orig_r->index = mast->r->max; /* last should be larger than or equal to index */ if (mast->orig_r->last < mast->orig_r->index) mast->orig_r->last = mast->orig_r->index; wr_mas.type = mte_node_type(mast->orig_r->node); mas_wr_node_walk(&wr_mas); /* Set up the left side of things */ mast->orig_l->offset = 0; mast->orig_l->index = mast->l->min; wr_mas.mas = mast->orig_l; wr_mas.type = mte_node_type(mast->orig_l->node); mas_wr_node_walk(&wr_mas); mast->bn->type = wr_mas.type; } /* * mas_new_ma_node() - Create and return a new maple node. Helper function. * @mas: the maple state with the allocations. * @b_node: the maple_big_node with the type encoding. * * Use the node type from the maple_big_node to allocate a new node from the * ma_state. This function exists mainly for code readability. * * Return: A new maple encoded node */ static inline struct maple_enode *mas_new_ma_node(struct ma_state *mas, struct maple_big_node *b_node) { return mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), b_node->type); } /* * mas_mab_to_node() - Set up right and middle nodes * * @mas: the maple state that contains the allocations. * @b_node: the node which contains the data. * @left: The pointer which will have the left node * @right: The pointer which may have the right node * @middle: the pointer which may have the middle node (rare) * @mid_split: the split location for the middle node * * Return: the split of left. */ static inline unsigned char mas_mab_to_node(struct ma_state *mas, struct maple_big_node *b_node, struct maple_enode **left, struct maple_enode **right, struct maple_enode **middle, unsigned char *mid_split) { unsigned char split = 0; unsigned char slot_count = mt_slots[b_node->type]; *left = mas_new_ma_node(mas, b_node); *right = NULL; *middle = NULL; *mid_split = 0; if (b_node->b_end < slot_count) { split = b_node->b_end; } else { split = mab_calc_split(mas, b_node, mid_split); *right = mas_new_ma_node(mas, b_node); } if (*mid_split) *middle = mas_new_ma_node(mas, b_node); return split; } /* * mab_set_b_end() - Add entry to b_node at b_node->b_end and increment the end * pointer. * @b_node: the big node to add the entry * @mas: the maple state to get the pivot (mas->max) * @entry: the entry to add, if NULL nothing happens. */ static inline void mab_set_b_end(struct maple_big_node *b_node, struct ma_state *mas, void *entry) { if (!entry) return; b_node->slot[b_node->b_end] = entry; if (mt_is_alloc(mas->tree)) b_node->gap[b_node->b_end] = mas_max_gap(mas); b_node->pivot[b_node->b_end++] = mas->max; } /* * mas_set_split_parent() - combine_then_separate helper function. Sets the parent * of @mas->node to either @left or @right, depending on @slot and @split * * @mas: the maple state with the node that needs a parent * @left: possible parent 1 * @right: possible parent 2 * @slot: the slot the mas->node was placed * @split: the split location between @left and @right */ static inline void mas_set_split_parent(struct ma_state *mas, struct maple_enode *left, struct maple_enode *right, unsigned char *slot, unsigned char split) { if (mas_is_none(mas)) return; if ((*slot) <= split) mas_set_parent(mas, mas->node, left, *slot); else if (right) mas_set_parent(mas, mas->node, right, (*slot) - split - 1); (*slot)++; } /* * mte_mid_split_check() - Check if the next node passes the mid-split * @l: Pointer to left encoded maple node. * @m: Pointer to middle encoded maple node. * @r: Pointer to right encoded maple node. * @slot: The offset * @split: The split location. * @mid_split: The middle split. */ static inline void mte_mid_split_check(struct maple_enode **l, struct maple_enode **r, struct maple_enode *right, unsigned char slot, unsigned char *split, unsigned char mid_split) { if (*r == right) return; if (slot < mid_split) return; *l = *r; *r = right; *split = mid_split; } /* * mast_set_split_parents() - Helper function to set three nodes parents. Slot * is taken from @mast->l. * @mast: the maple subtree state * @left: the left node * @right: the right node * @split: the split location. */ static inline void mast_set_split_parents(struct maple_subtree_state *mast, struct maple_enode *left, struct maple_enode *middle, struct maple_enode *right, unsigned char split, unsigned char mid_split) { unsigned char slot; struct maple_enode *l = left; struct maple_enode *r = right; if (mas_is_none(mast->l)) return; if (middle) r = middle; slot = mast->l->offset; mte_mid_split_check(&l, &r, right, slot, &split, mid_split); mas_set_split_parent(mast->l, l, r, &slot, split); mte_mid_split_check(&l, &r, right, slot, &split, mid_split); mas_set_split_parent(mast->m, l, r, &slot, split); mte_mid_split_check(&l, &r, right, slot, &split, mid_split); mas_set_split_parent(mast->r, l, r, &slot, split); } /* * mas_topiary_node() - Dispose of a single node * @mas: The maple state for pushing nodes * @in_rcu: If the tree is in rcu mode * * The node will either be RCU freed or pushed back on the maple state. */ static inline void mas_topiary_node(struct ma_state *mas, struct ma_state *tmp_mas, bool in_rcu) { struct maple_node *tmp; struct maple_enode *enode; if (mas_is_none(tmp_mas)) return; enode = tmp_mas->node; tmp = mte_to_node(enode); mte_set_node_dead(enode); if (in_rcu) ma_free_rcu(tmp); else mas_push_node(mas, tmp); } /* * mas_topiary_replace() - Replace the data with new data, then repair the * parent links within the new tree. Iterate over the dead sub-tree and collect * the dead subtrees and topiary the nodes that are no longer of use. * * The new tree will have up to three children with the correct parent. Keep * track of the new entries as they need to be followed to find the next level * of new entries. * * The old tree will have up to three children with the old parent. Keep track * of the old entries as they may have more nodes below replaced. Nodes within * [index, last] are dead subtrees, others need to be freed and followed. * * @mas: The maple state pointing at the new data * @old_enode: The maple encoded node being replaced * @new_height: The new height of the tree as a result of the operation * */ static inline void mas_topiary_replace(struct ma_state *mas, struct maple_enode *old_enode, unsigned char new_height) { struct ma_state tmp[3], tmp_next[3]; MA_TOPIARY(subtrees, mas->tree); bool in_rcu; int i, n; /* Place data in tree & then mark node as old */ mas_put_in_tree(mas, old_enode, new_height); /* Update the parent pointers in the tree */ tmp[0] = *mas; tmp[0].offset = 0; tmp[1].status = ma_none; tmp[2].status = ma_none; while (!mte_is_leaf(tmp[0].node)) { n = 0; for (i = 0; i < 3; i++) { if (mas_is_none(&tmp[i])) continue; while (n < 3) { if (!mas_find_child(&tmp[i], &tmp_next[n])) break; n++; } mas_adopt_children(&tmp[i], tmp[i].node); } if (MAS_WARN_ON(mas, n == 0)) break; while (n < 3) tmp_next[n++].status = ma_none; for (i = 0; i < 3; i++) tmp[i] = tmp_next[i]; } /* Collect the old nodes that need to be discarded */ if (mte_is_leaf(old_enode)) return mas_free(mas, old_enode); tmp[0] = *mas; tmp[0].offset = 0; tmp[0].node = old_enode; tmp[1].status = ma_none; tmp[2].status = ma_none; in_rcu = mt_in_rcu(mas->tree); do { n = 0; for (i = 0; i < 3; i++) { if (mas_is_none(&tmp[i])) continue; while (n < 3) { if (!mas_find_child(&tmp[i], &tmp_next[n])) break; if ((tmp_next[n].min >= tmp_next->index) && (tmp_next[n].max <= tmp_next->last)) { mat_add(&subtrees, tmp_next[n].node); tmp_next[n].status = ma_none; } else { n++; } } } if (MAS_WARN_ON(mas, n == 0)) break; while (n < 3) tmp_next[n++].status = ma_none; for (i = 0; i < 3; i++) { mas_topiary_node(mas, &tmp[i], in_rcu); tmp[i] = tmp_next[i]; } } while (!mte_is_leaf(tmp[0].node)); for (i = 0; i < 3; i++) mas_topiary_node(mas, &tmp[i], in_rcu); mas_mat_destroy(mas, &subtrees); } /* * mas_wmb_replace() - Write memory barrier and replace * @mas: The maple state * @old_enode: The old maple encoded node that is being replaced. * @new_height: The new height of the tree as a result of the operation * * Updates gap as necessary. */ static inline void mas_wmb_replace(struct ma_state *mas, struct maple_enode *old_enode, unsigned char new_height) { /* Insert the new data in the tree */ mas_topiary_replace(mas, old_enode, new_height); if (mte_is_leaf(mas->node)) return; mas_update_gap(mas); } /* * mast_cp_to_nodes() - Copy data out to nodes. * @mast: The maple subtree state * @left: The left encoded maple node * @middle: The middle encoded maple node * @right: The right encoded maple node * @split: The location to split between left and (middle ? middle : right) * @mid_split: The location to split between middle and right. */ static inline void mast_cp_to_nodes(struct maple_subtree_state *mast, struct maple_enode *left, struct maple_enode *middle, struct maple_enode *right, unsigned char split, unsigned char mid_split) { bool new_lmax = true; mas_node_or_none(mast->l, left); mas_node_or_none(mast->m, middle); mas_node_or_none(mast->r, right); mast->l->min = mast->orig_l->min; if (split == mast->bn->b_end) { mast->l->max = mast->orig_r->max; new_lmax = false; } mab_mas_cp(mast->bn, 0, split, mast->l, new_lmax); if (middle) { mab_mas_cp(mast->bn, 1 + split, mid_split, mast->m, true); mast->m->min = mast->bn->pivot[split] + 1; split = mid_split; } mast->r->max = mast->orig_r->max; if (right) { mab_mas_cp(mast->bn, 1 + split, mast->bn->b_end, mast->r, false); mast->r->min = mast->bn->pivot[split] + 1; } } /* * mast_combine_cp_left - Copy in the original left side of the tree into the * combined data set in the maple subtree state big node. * @mast: The maple subtree state */ static inline void mast_combine_cp_left(struct maple_subtree_state *mast) { unsigned char l_slot = mast->orig_l->offset; if (!l_slot) return; mas_mab_cp(mast->orig_l, 0, l_slot - 1, mast->bn, 0); } /* * mast_combine_cp_right: Copy in the original right side of the tree into the * combined data set in the maple subtree state big node. * @mast: The maple subtree state */ static inline void mast_combine_cp_right(struct maple_subtree_state *mast) { if (mast->bn->pivot[mast->bn->b_end - 1] >= mast->orig_r->max) return; mas_mab_cp(mast->orig_r, mast->orig_r->offset + 1, mt_slot_count(mast->orig_r->node), mast->bn, mast->bn->b_end); mast->orig_r->last = mast->orig_r->max; } /* * mast_sufficient: Check if the maple subtree state has enough data in the big * node to create at least one sufficient node * @mast: the maple subtree state */ static inline bool mast_sufficient(struct maple_subtree_state *mast) { if (mast->bn->b_end > mt_min_slot_count(mast->orig_l->node)) return true; return false; } /* * mast_overflow: Check if there is too much data in the subtree state for a * single node. * @mast: The maple subtree state */ static inline bool mast_overflow(struct maple_subtree_state *mast) { if (mast->bn->b_end > mt_slot_count(mast->orig_l->node)) return true; return false; } static inline void *mtree_range_walk(struct ma_state *mas) { unsigned long *pivots; unsigned char offset; struct maple_node *node; struct maple_enode *next, *last; enum maple_type type; void __rcu **slots; unsigned char end; unsigned long max, min; unsigned long prev_max, prev_min; next = mas->node; min = mas->min; max = mas->max; do { last = next; node = mte_to_node(next); type = mte_node_type(next); pivots = ma_pivots(node, type); end = ma_data_end(node, type, pivots, max); prev_min = min; prev_max = max; if (pivots[0] >= mas->index) { offset = 0; max = pivots[0]; goto next; } offset = 1; while (offset < end) { if (pivots[offset] >= mas->index) { max = pivots[offset]; break; } offset++; } min = pivots[offset - 1] + 1; next: slots = ma_slots(node, type); next = mt_slot(mas->tree, slots, offset); if (unlikely(ma_dead_node(node))) goto dead_node; } while (!ma_is_leaf(type)); mas->end = end; mas->offset = offset; mas->index = min; mas->last = max; mas->min = prev_min; mas->max = prev_max; mas->node = last; return (void *)next; dead_node: mas_reset(mas); return NULL; } /* * mas_spanning_rebalance() - Rebalance across two nodes which may not be peers. * @mas: The starting maple state * @mast: The maple_subtree_state, keeps track of 4 maple states. * @count: The estimated count of iterations needed. * * Follow the tree upwards from @l_mas and @r_mas for @count, or until the root * is hit. First @b_node is split into two entries which are inserted into the * next iteration of the loop. @b_node is returned populated with the final * iteration. @mas is used to obtain allocations. orig_l_mas keeps track of the * nodes that will remain active by using orig_l_mas->index and orig_l_mas->last * to account of what has been copied into the new sub-tree. The update of * orig_l_mas->last is used in mas_consume to find the slots that will need to * be either freed or destroyed. orig_l_mas->depth keeps track of the height of * the new sub-tree in case the sub-tree becomes the full tree. */ static void mas_spanning_rebalance(struct ma_state *mas, struct maple_subtree_state *mast, unsigned char count) { unsigned char split, mid_split; unsigned char slot = 0; unsigned char new_height = 0; /* used if node is a new root */ struct maple_enode *left = NULL, *middle = NULL, *right = NULL; struct maple_enode *old_enode; MA_STATE(l_mas, mas->tree, mas->index, mas->index); MA_STATE(r_mas, mas->tree, mas->index, mas->last); MA_STATE(m_mas, mas->tree, mas->index, mas->index); /* * The tree needs to be rebalanced and leaves need to be kept at the same level. * Rebalancing is done by use of the ``struct maple_topiary``. */ mast->l = &l_mas; mast->m = &m_mas; mast->r = &r_mas; l_mas.status = r_mas.status = m_mas.status = ma_none; /* Check if this is not root and has sufficient data. */ if (((mast->orig_l->min != 0) || (mast->orig_r->max != ULONG_MAX)) && unlikely(mast->bn->b_end <= mt_min_slots[mast->bn->type])) mast_spanning_rebalance(mast); /* * Each level of the tree is examined and balanced, pushing data to the left or * right, or rebalancing against left or right nodes is employed to avoid * rippling up the tree to limit the amount of churn. Once a new sub-section of * the tree is created, there may be a mix of new and old nodes. The old nodes * will have the incorrect parent pointers and currently be in two trees: the * original tree and the partially new tree. To remedy the parent pointers in * the old tree, the new data is swapped into the active tree and a walk down * the tree is performed and the parent pointers are updated. * See mas_topiary_replace() for more information. */ while (count--) { mast->bn->b_end--; mast->bn->type = mte_node_type(mast->orig_l->node); split = mas_mab_to_node(mas, mast->bn, &left, &right, &middle, &mid_split); mast_set_split_parents(mast, left, middle, right, split, mid_split); mast_cp_to_nodes(mast, left, middle, right, split, mid_split); new_height++; /* * Copy data from next level in the tree to mast->bn from next * iteration */ memset(mast->bn, 0, sizeof(struct maple_big_node)); mast->bn->type = mte_node_type(left); /* Root already stored in l->node. */ if (mas_is_root_limits(mast->l)) goto new_root; mast_ascend(mast); mast_combine_cp_left(mast); l_mas.offset = mast->bn->b_end; mab_set_b_end(mast->bn, &l_mas, left); mab_set_b_end(mast->bn, &m_mas, middle); mab_set_b_end(mast->bn, &r_mas, right); /* Copy anything necessary out of the right node. */ mast_combine_cp_right(mast); mast->orig_l->last = mast->orig_l->max; if (mast_sufficient(mast)) { if (mast_overflow(mast)) continue; if (mast->orig_l->node == mast->orig_r->node) { /* * The data in b_node should be stored in one * node and in the tree */ slot = mast->l->offset; break; } continue; } /* May be a new root stored in mast->bn */ if (mas_is_root_limits(mast->orig_l)) break; mast_spanning_rebalance(mast); /* rebalancing from other nodes may require another loop. */ if (!count) count++; } l_mas.node = mt_mk_node(ma_mnode_ptr(mas_pop_node(mas)), mte_node_type(mast->orig_l->node)); mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, &l_mas, true); new_height++; mas_set_parent(mas, left, l_mas.node, slot); if (middle) mas_set_parent(mas, middle, l_mas.node, ++slot); if (right) mas_set_parent(mas, right, l_mas.node, ++slot); if (mas_is_root_limits(mast->l)) { new_root: mas_mn(mast->l)->parent = ma_parent_ptr(mas_tree_parent(mas)); while (!mte_is_root(mast->orig_l->node)) mast_ascend(mast); } else { mas_mn(&l_mas)->parent = mas_mn(mast->orig_l)->parent; } old_enode = mast->orig_l->node; mas->depth = l_mas.depth; mas->node = l_mas.node; mas->min = l_mas.min; mas->max = l_mas.max; mas->offset = l_mas.offset; mas_wmb_replace(mas, old_enode, new_height); mtree_range_walk(mas); return; } /* * mas_rebalance() - Rebalance a given node. * @mas: The maple state * @b_node: The big maple node. * * Rebalance two nodes into a single node or two new nodes that are sufficient. * Continue upwards until tree is sufficient. */ static inline void mas_rebalance(struct ma_state *mas, struct maple_big_node *b_node) { char empty_count = mas_mt_height(mas); struct maple_subtree_state mast; unsigned char shift, b_end = ++b_node->b_end; MA_STATE(l_mas, mas->tree, mas->index, mas->last); MA_STATE(r_mas, mas->tree, mas->index, mas->last); trace_ma_op(__func__, mas); /* * Rebalancing occurs if a node is insufficient. Data is rebalanced * against the node to the right if it exists, otherwise the node to the * left of this node is rebalanced against this node. If rebalancing * causes just one node to be produced instead of two, then the parent * is also examined and rebalanced if it is insufficient. Every level * tries to combine the data in the same way. If one node contains the * entire range of the tree, then that node is used as a new root node. */ mast.orig_l = &l_mas; mast.orig_r = &r_mas; mast.bn = b_node; mast.bn->type = mte_node_type(mas->node); l_mas = r_mas = *mas; if (mas_next_sibling(&r_mas)) { mas_mab_cp(&r_mas, 0, mt_slot_count(r_mas.node), b_node, b_end); r_mas.last = r_mas.index = r_mas.max; } else { mas_prev_sibling(&l_mas); shift = mas_data_end(&l_mas) + 1; mab_shift_right(b_node, shift); mas->offset += shift; mas_mab_cp(&l_mas, 0, shift - 1, b_node, 0); b_node->b_end = shift + b_end; l_mas.index = l_mas.last = l_mas.min; } return mas_spanning_rebalance(mas, &mast, empty_count); } /* * mas_destroy_rebalance() - Rebalance left-most node while destroying the maple * state. * @mas: The maple state * @end: The end of the left-most node. * * During a mass-insert event (such as forking), it may be necessary to * rebalance the left-most node when it is not sufficient. */ static inline void mas_destroy_rebalance(struct ma_state *mas, unsigned char end) { enum maple_type mt = mte_node_type(mas->node); struct maple_node reuse, *newnode, *parent, *new_left, *left, *node; struct maple_enode *eparent, *old_eparent; unsigned char offset, tmp, split = mt_slots[mt] / 2; void __rcu **l_slots, **slots; unsigned long *l_pivs, *pivs, gap; bool in_rcu = mt_in_rcu(mas->tree); unsigned char new_height = mas_mt_height(mas); MA_STATE(l_mas, mas->tree, mas->index, mas->last); l_mas = *mas; mas_prev_sibling(&l_mas); /* set up node. */ if (in_rcu) { newnode = mas_pop_node(mas); } else { newnode = &reuse; } node = mas_mn(mas); newnode->parent = node->parent; slots = ma_slots(newnode, mt); pivs = ma_pivots(newnode, mt); left = mas_mn(&l_mas); l_slots = ma_slots(left, mt); l_pivs = ma_pivots(left, mt); if (!l_slots[split]) split++; tmp = mas_data_end(&l_mas) - split; memcpy(slots, l_slots + split + 1, sizeof(void *) * tmp); memcpy(pivs, l_pivs + split + 1, sizeof(unsigned long) * tmp); pivs[tmp] = l_mas.max; memcpy(slots + tmp, ma_slots(node, mt), sizeof(void *) * end); memcpy(pivs + tmp, ma_pivots(node, mt), sizeof(unsigned long) * end); l_mas.max = l_pivs[split]; mas->min = l_mas.max + 1; old_eparent = mt_mk_node(mte_parent(l_mas.node), mas_parent_type(&l_mas, l_mas.node)); tmp += end; if (!in_rcu) { unsigned char max_p = mt_pivots[mt]; unsigned char max_s = mt_slots[mt]; if (tmp < max_p) memset(pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); if (tmp < mt_slots[mt]) memset(slots + tmp, 0, sizeof(void *) * (max_s - tmp)); memcpy(node, newnode, sizeof(struct maple_node)); ma_set_meta(node, mt, 0, tmp - 1); mte_set_pivot(old_eparent, mte_parent_slot(l_mas.node), l_pivs[split]); /* Remove data from l_pivs. */ tmp = split + 1; memset(l_pivs + tmp, 0, sizeof(unsigned long) * (max_p - tmp)); memset(l_slots + tmp, 0, sizeof(void *) * (max_s - tmp)); ma_set_meta(left, mt, 0, split); eparent = old_eparent; goto done; } /* RCU requires replacing both l_mas, mas, and parent. */ mas->node = mt_mk_node(newnode, mt); ma_set_meta(newnode, mt, 0, tmp); new_left = mas_pop_node(mas); new_left->parent = left->parent; mt = mte_node_type(l_mas.node); slots = ma_slots(new_left, mt); pivs = ma_pivots(new_left, mt); memcpy(slots, l_slots, sizeof(void *) * split); memcpy(pivs, l_pivs, sizeof(unsigned long) * split); ma_set_meta(new_left, mt, 0, split); l_mas.node = mt_mk_node(new_left, mt); /* replace parent. */ offset = mte_parent_slot(mas->node); mt = mas_parent_type(&l_mas, l_mas.node); parent = mas_pop_node(mas); slots = ma_slots(parent, mt); pivs = ma_pivots(parent, mt); memcpy(parent, mte_to_node(old_eparent), sizeof(struct maple_node)); rcu_assign_pointer(slots[offset], mas->node); rcu_assign_pointer(slots[offset - 1], l_mas.node); pivs[offset - 1] = l_mas.max; eparent = mt_mk_node(parent, mt); done: gap = mas_leaf_max_gap(mas); mte_set_gap(eparent, mte_parent_slot(mas->node), gap); gap = mas_leaf_max_gap(&l_mas); mte_set_gap(eparent, mte_parent_slot(l_mas.node), gap); mas_ascend(mas); if (in_rcu) { mas_replace_node(mas, old_eparent, new_height); mas_adopt_children(mas, mas->node); } mas_update_gap(mas); } /* * mas_split_final_node() - Split the final node in a subtree operation. * @mast: the maple subtree state * @mas: The maple state */ static inline void mas_split_final_node(struct maple_subtree_state *mast, struct ma_state *mas) { struct maple_enode *ancestor; if (mte_is_root(mas->node)) { if (mt_is_alloc(mas->tree)) mast->bn->type = maple_arange_64; else mast->bn->type = maple_range_64; } /* * Only a single node is used here, could be root. * The Big_node data should just fit in a single node. */ ancestor = mas_new_ma_node(mas, mast->bn); mas_set_parent(mas, mast->l->node, ancestor, mast->l->offset); mas_set_parent(mas, mast->r->node, ancestor, mast->r->offset); mte_to_node(ancestor)->parent = mas_mn(mas)->parent; mast->l->node = ancestor; mab_mas_cp(mast->bn, 0, mt_slots[mast->bn->type] - 1, mast->l, true); mas->offset = mast->bn->b_end - 1; } /* * mast_fill_bnode() - Copy data into the big node in the subtree state * @mast: The maple subtree state * @mas: the maple state * @skip: The number of entries to skip for new nodes insertion. */ static inline void mast_fill_bnode(struct maple_subtree_state *mast, struct ma_state *mas, unsigned char skip) { bool cp = true; unsigned char split; memset(mast->bn, 0, sizeof(struct maple_big_node)); if (mte_is_root(mas->node)) { cp = false; } else { mas_ascend(mas); mas->offset = mte_parent_slot(mas->node); } if (cp && mast->l->offset) mas_mab_cp(mas, 0, mast->l->offset - 1, mast->bn, 0); split = mast->bn->b_end; mab_set_b_end(mast->bn, mast->l, mast->l->node); mast->r->offset = mast->bn->b_end; mab_set_b_end(mast->bn, mast->r, mast->r->node); if (mast->bn->pivot[mast->bn->b_end - 1] == mas->max) cp = false; if (cp) mas_mab_cp(mas, split + skip, mt_slot_count(mas->node) - 1, mast->bn, mast->bn->b_end); mast->bn->b_end--; mast->bn->type = mte_node_type(mas->node); } /* * mast_split_data() - Split the data in the subtree state big node into regular * nodes. * @mast: The maple subtree state * @mas: The maple state * @split: The location to split the big node */ static inline void mast_split_data(struct maple_subtree_state *mast, struct ma_state *mas, unsigned char split) { unsigned char p_slot; mab_mas_cp(mast->bn, 0, split, mast->l, true); mte_set_pivot(mast->r->node, 0, mast->r->max); mab_mas_cp(mast->bn, split + 1, mast->bn->b_end, mast->r, false); mast->l->offset = mte_parent_slot(mas->node); mast->l->max = mast->bn->pivot[split]; mast->r->min = mast->l->max + 1; if (mte_is_leaf(mas->node)) return; p_slot = mast->orig_l->offset; mas_set_split_parent(mast->orig_l, mast->l->node, mast->r->node, &p_slot, split); mas_set_split_parent(mast->orig_r, mast->l->node, mast->r->node, &p_slot, split); } /* * mas_push_data() - Instead of splitting a node, it is beneficial to push the * data to the right or left node if there is room. * @mas: The maple state * @mast: The maple subtree state * @left: Push left or not. * * Keeping the height of the tree low means faster lookups. * * Return: True if pushed, false otherwise. */ static inline bool mas_push_data(struct ma_state *mas, struct maple_subtree_state *mast, bool left) { unsigned char slot_total = mast->bn->b_end; unsigned char end, space, split; MA_STATE(tmp_mas, mas->tree, mas->index, mas->last); tmp_mas = *mas; tmp_mas.depth = mast->l->depth; if (left && !mas_prev_sibling(&tmp_mas)) return false; else if (!left && !mas_next_sibling(&tmp_mas)) return false; end = mas_data_end(&tmp_mas); slot_total += end; space = 2 * mt_slot_count(mas->node) - 2; /* -2 instead of -1 to ensure there isn't a triple split */ if (ma_is_leaf(mast->bn->type)) space--; if (mas->max == ULONG_MAX) space--; if (slot_total >= space) return false; /* Get the data; Fill mast->bn */ mast->bn->b_end++; if (left) { mab_shift_right(mast->bn, end + 1); mas_mab_cp(&tmp_mas, 0, end, mast->bn, 0); mast->bn->b_end = slot_total + 1; } else { mas_mab_cp(&tmp_mas, 0, end, mast->bn, mast->bn->b_end); } /* Configure mast for splitting of mast->bn */ split = mt_slots[mast->bn->type] - 2; if (left) { /* Switch mas to prev node */ *mas = tmp_mas; /* Start using mast->l for the left side. */ tmp_mas.node = mast->l->node; *mast->l = tmp_mas; } else { tmp_mas.node = mast->r->node; *mast->r = tmp_mas; split = slot_total - split; } split = mab_no_null_split(mast->bn, split, mt_slots[mast->bn->type]); /* Update parent slot for split calculation. */ if (left) mast->orig_l->offset += end + 1; mast_split_data(mast, mas, split); mast_fill_bnode(mast, mas, 2); mas_split_final_node(mast, mas); return true; } /* * mas_split() - Split data that is too big for one node into two. * @mas: The maple state * @b_node: The maple big node */ static void mas_split(struct ma_state *mas, struct maple_big_node *b_node) { struct maple_subtree_state mast; int height = 0; unsigned int orig_height = mas_mt_height(mas); unsigned char mid_split, split = 0; struct maple_enode *old; /* * Splitting is handled differently from any other B-tree; the Maple * Tree splits upwards. Splitting up means that the split operation * occurs when the walk of the tree hits the leaves and not on the way * down. The reason for splitting up is that it is impossible to know * how much space will be needed until the leaf is (or leaves are) * reached. Since overwriting data is allowed and a range could * overwrite more than one range or result in changing one entry into 3 * entries, it is impossible to know if a split is required until the * data is examined. * * Splitting is a balancing act between keeping allocations to a minimum * and avoiding a 'jitter' event where a tree is expanded to make room * for an entry followed by a contraction when the entry is removed. To * accomplish the balance, there are empty slots remaining in both left * and right nodes after a split. */ MA_STATE(l_mas, mas->tree, mas->index, mas->last); MA_STATE(r_mas, mas->tree, mas->index, mas->last); MA_STATE(prev_l_mas, mas->tree, mas->index, mas->last); MA_STATE(prev_r_mas, mas->tree, mas->index, mas->last); trace_ma_op(__func__, mas); mast.l = &l_mas; mast.r = &r_mas; mast.orig_l = &prev_l_mas; mast.orig_r = &prev_r_mas; mast.bn = b_node; while (height++ <= orig_height) { if (mt_slots[b_node->type] > b_node->b_end) { mas_split_final_node(&mast, mas); break; } l_mas = r_mas = *mas; l_mas.node = mas_new_ma_node(mas, b_node); r_mas.node = mas_new_ma_node(mas, b_node); /* * Another way that 'jitter' is avoided is to terminate a split up early if the * left or right node has space to spare. This is referred to as "pushing left" * or "pushing right" and is similar to the B* tree, except the nodes left or * right can rarely be reused due to RCU, but the ripple upwards is halted which * is a significant savings. */ /* Try to push left. */ if (mas_push_data(mas, &mast, true)) { height++; break; } /* Try to push right. */ if (mas_push_data(mas, &mast, false)) { height++; break; } split = mab_calc_split(mas, b_node, &mid_split); mast_split_data(&mast, mas, split); /* * Usually correct, mab_mas_cp in the above call overwrites * r->max. */ mast.r->max = mas->max; mast_fill_bnode(&mast, mas, 1); prev_l_mas = *mast.l; prev_r_mas = *mast.r; } /* Set the original node as dead */ old = mas->node; mas->node = l_mas.node; mas_wmb_replace(mas, old, height); mtree_range_walk(mas); return; } /* * mas_commit_b_node() - Commit the big node into the tree. * @wr_mas: The maple write state * @b_node: The maple big node */ static noinline_for_kasan void mas_commit_b_node(struct ma_wr_state *wr_mas, struct maple_big_node *b_node) { enum store_type type = wr_mas->mas->store_type; WARN_ON_ONCE(type != wr_rebalance && type != wr_split_store); if (type == wr_rebalance) return mas_rebalance(wr_mas->mas, b_node); return mas_split(wr_mas->mas, b_node); } /* * mas_root_expand() - Expand a root to a node * @mas: The maple state * @entry: The entry to store into the tree */ static inline void mas_root_expand(struct ma_state *mas, void *entry) { void *contents = mas_root_locked(mas); enum maple_type type = maple_leaf_64; struct maple_node *node; void __rcu **slots; unsigned long *pivots; int slot = 0; node = mas_pop_node(mas); pivots = ma_pivots(node, type); slots = ma_slots(node, type); node->parent = ma_parent_ptr(mas_tree_parent(mas)); mas->node = mt_mk_node(node, type); mas->status = ma_active; if (mas->index) { if (contents) { rcu_assign_pointer(slots[slot], contents); if (likely(mas->index > 1)) slot++; } pivots[slot++] = mas->index - 1; } rcu_assign_pointer(slots[slot], entry); mas->offset = slot; pivots[slot] = mas->last; if (mas->last != ULONG_MAX) pivots[++slot] = ULONG_MAX; mt_set_height(mas->tree, 1); ma_set_meta(node, maple_leaf_64, 0, slot); /* swap the new root into the tree */ rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); return; } /* * mas_store_root() - Storing value into root. * @mas: The maple state * @entry: The entry to store. * * There is no root node now and we are storing a value into the root - this * function either assigns the pointer or expands into a node. */ static inline void mas_store_root(struct ma_state *mas, void *entry) { if (!entry) { if (!mas->index) rcu_assign_pointer(mas->tree->ma_root, NULL); } else if (likely((mas->last != 0) || (mas->index != 0))) mas_root_expand(mas, entry); else if (((unsigned long) (entry) & 3) == 2) mas_root_expand(mas, entry); else { rcu_assign_pointer(mas->tree->ma_root, entry); mas->status = ma_start; } } /* * mas_is_span_wr() - Check if the write needs to be treated as a write that * spans the node. * @wr_mas: The maple write state * * Spanning writes are writes that start in one node and end in another OR if * the write of a %NULL will cause the node to end with a %NULL. * * Return: True if this is a spanning write, false otherwise. */ static bool mas_is_span_wr(struct ma_wr_state *wr_mas) { unsigned long max = wr_mas->r_max; unsigned long last = wr_mas->mas->last; enum maple_type type = wr_mas->type; void *entry = wr_mas->entry; /* Contained in this pivot, fast path */ if (last < max) return false; if (ma_is_leaf(type)) { max = wr_mas->mas->max; if (last < max) return false; } if (last == max) { /* * The last entry of leaf node cannot be NULL unless it is the * rightmost node (writing ULONG_MAX), otherwise it spans slots. */ if (entry || last == ULONG_MAX) return false; } trace_ma_write(__func__, wr_mas->mas, wr_mas->r_max, entry); return true; } static inline void mas_wr_walk_descend(struct ma_wr_state *wr_mas) { wr_mas->type = mte_node_type(wr_mas->mas->node); mas_wr_node_walk(wr_mas); wr_mas->slots = ma_slots(wr_mas->node, wr_mas->type); } static inline void mas_wr_walk_traverse(struct ma_wr_state *wr_mas) { wr_mas->mas->max = wr_mas->r_max; wr_mas->mas->min = wr_mas->r_min; wr_mas->mas->node = wr_mas->content; wr_mas->mas->offset = 0; wr_mas->mas->depth++; } /* * mas_wr_walk() - Walk the tree for a write. * @wr_mas: The maple write state * * Uses mas_slot_locked() and does not need to worry about dead nodes. * * Return: True if it's contained in a node, false on spanning write. */ static bool mas_wr_walk(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; while (true) { mas_wr_walk_descend(wr_mas); if (unlikely(mas_is_span_wr(wr_mas))) return false; wr_mas->content = mas_slot_locked(mas, wr_mas->slots, mas->offset); if (ma_is_leaf(wr_mas->type)) return true; if (mas->end < mt_slots[wr_mas->type] - 1) wr_mas->vacant_height = mas->depth + 1; if (ma_is_root(mas_mn(mas))) { /* root needs more than 2 entries to be sufficient + 1 */ if (mas->end > 2) wr_mas->sufficient_height = 1; } else if (mas->end > mt_min_slots[wr_mas->type] + 1) wr_mas->sufficient_height = mas->depth + 1; mas_wr_walk_traverse(wr_mas); } return true; } static void mas_wr_walk_index(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; while (true) { mas_wr_walk_descend(wr_mas); wr_mas->content = mas_slot_locked(mas, wr_mas->slots, mas->offset); if (ma_is_leaf(wr_mas->type)) return; mas_wr_walk_traverse(wr_mas); } } /* * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. * @l_wr_mas: The left maple write state * @r_wr_mas: The right maple write state */ static inline void mas_extend_spanning_null(struct ma_wr_state *l_wr_mas, struct ma_wr_state *r_wr_mas) { struct ma_state *r_mas = r_wr_mas->mas; struct ma_state *l_mas = l_wr_mas->mas; unsigned char l_slot; l_slot = l_mas->offset; if (!l_wr_mas->content) l_mas->index = l_wr_mas->r_min; if ((l_mas->index == l_wr_mas->r_min) && (l_slot && !mas_slot_locked(l_mas, l_wr_mas->slots, l_slot - 1))) { if (l_slot > 1) l_mas->index = l_wr_mas->pivots[l_slot - 2] + 1; else l_mas->index = l_mas->min; l_mas->offset = l_slot - 1; } if (!r_wr_mas->content) { if (r_mas->last < r_wr_mas->r_max) r_mas->last = r_wr_mas->r_max; r_mas->offset++; } else if ((r_mas->last == r_wr_mas->r_max) && (r_mas->last < r_mas->max) && !mas_slot_locked(r_mas, r_wr_mas->slots, r_mas->offset + 1)) { r_mas->last = mas_safe_pivot(r_mas, r_wr_mas->pivots, r_wr_mas->type, r_mas->offset + 1); r_mas->offset++; } } static inline void *mas_state_walk(struct ma_state *mas) { void *entry; entry = mas_start(mas); if (mas_is_none(mas)) return NULL; if (mas_is_ptr(mas)) return entry; return mtree_range_walk(mas); } /* * mtree_lookup_walk() - Internal quick lookup that does not keep maple state up * to date. * * @mas: The maple state. * * Note: Leaves mas in undesirable state. * Return: The entry for @mas->index or %NULL on dead node. */ static inline void *mtree_lookup_walk(struct ma_state *mas) { unsigned long *pivots; unsigned char offset; struct maple_node *node; struct maple_enode *next; enum maple_type type; void __rcu **slots; unsigned char end; next = mas->node; do { node = mte_to_node(next); type = mte_node_type(next); pivots = ma_pivots(node, type); end = mt_pivots[type]; offset = 0; do { if (pivots[offset] >= mas->index) break; } while (++offset < end); slots = ma_slots(node, type); next = mt_slot(mas->tree, slots, offset); if (unlikely(ma_dead_node(node))) goto dead_node; } while (!ma_is_leaf(type)); return (void *)next; dead_node: mas_reset(mas); return NULL; } static void mte_destroy_walk(struct maple_enode *, struct maple_tree *); /* * mas_new_root() - Create a new root node that only contains the entry passed * in. * @mas: The maple state * @entry: The entry to store. * * Only valid when the index == 0 and the last == ULONG_MAX */ static inline void mas_new_root(struct ma_state *mas, void *entry) { struct maple_enode *root = mas_root_locked(mas); enum maple_type type = maple_leaf_64; struct maple_node *node; void __rcu **slots; unsigned long *pivots; WARN_ON_ONCE(mas->index || mas->last != ULONG_MAX); if (!entry) { mt_set_height(mas->tree, 0); rcu_assign_pointer(mas->tree->ma_root, entry); mas->status = ma_start; goto done; } node = mas_pop_node(mas); pivots = ma_pivots(node, type); slots = ma_slots(node, type); node->parent = ma_parent_ptr(mas_tree_parent(mas)); mas->node = mt_mk_node(node, type); mas->status = ma_active; rcu_assign_pointer(slots[0], entry); pivots[0] = mas->last; mt_set_height(mas->tree, 1); rcu_assign_pointer(mas->tree->ma_root, mte_mk_root(mas->node)); done: if (xa_is_node(root)) mte_destroy_walk(root, mas->tree); return; } /* * mas_wr_spanning_store() - Create a subtree with the store operation completed * and new nodes where necessary, then place the sub-tree in the actual tree. * Note that mas is expected to point to the node which caused the store to * span. * @wr_mas: The maple write state */ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) { struct maple_subtree_state mast; struct maple_big_node b_node; struct ma_state *mas; unsigned char height; /* Left and Right side of spanning store */ MA_STATE(l_mas, NULL, 0, 0); MA_STATE(r_mas, NULL, 0, 0); MA_WR_STATE(r_wr_mas, &r_mas, wr_mas->entry); MA_WR_STATE(l_wr_mas, &l_mas, wr_mas->entry); /* * A store operation that spans multiple nodes is called a spanning * store and is handled early in the store call stack by the function * mas_is_span_wr(). When a spanning store is identified, the maple * state is duplicated. The first maple state walks the left tree path * to ``index``, the duplicate walks the right tree path to ``last``. * The data in the two nodes are combined into a single node, two nodes, * or possibly three nodes (see the 3-way split above). A ``NULL`` * written to the last entry of a node is considered a spanning store as * a rebalance is required for the operation to complete and an overflow * of data may happen. */ mas = wr_mas->mas; trace_ma_op(__func__, mas); if (unlikely(!mas->index && mas->last == ULONG_MAX)) return mas_new_root(mas, wr_mas->entry); /* * Node rebalancing may occur due to this store, so there may be three new * entries per level plus a new root. */ height = mas_mt_height(mas); /* * Set up right side. Need to get to the next offset after the spanning * store to ensure it's not NULL and to combine both the next node and * the node with the start together. */ r_mas = *mas; /* Avoid overflow, walk to next slot in the tree. */ if (r_mas.last + 1) r_mas.last++; r_mas.index = r_mas.last; mas_wr_walk_index(&r_wr_mas); r_mas.last = r_mas.index = mas->last; /* Set up left side. */ l_mas = *mas; mas_wr_walk_index(&l_wr_mas); if (!wr_mas->entry) { mas_extend_spanning_null(&l_wr_mas, &r_wr_mas); mas->offset = l_mas.offset; mas->index = l_mas.index; mas->last = l_mas.last = r_mas.last; } /* expanding NULLs may make this cover the entire range */ if (!l_mas.index && r_mas.last == ULONG_MAX) { mas_set_range(mas, 0, ULONG_MAX); return mas_new_root(mas, wr_mas->entry); } memset(&b_node, 0, sizeof(struct maple_big_node)); /* Copy l_mas and store the value in b_node. */ mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); /* Copy r_mas into b_node if there is anything to copy. */ if (r_mas.max > r_mas.last) mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, &b_node, b_node.b_end + 1); else b_node.b_end++; /* Stop spanning searches by searching for just index. */ l_mas.index = l_mas.last = mas->index; mast.bn = &b_node; mast.orig_l = &l_mas; mast.orig_r = &r_mas; /* Combine l_mas and r_mas and split them up evenly again. */ return mas_spanning_rebalance(mas, &mast, height + 1); } /* * mas_wr_node_store() - Attempt to store the value in a node * @wr_mas: The maple write state * * Attempts to reuse the node, but may allocate. */ static inline void mas_wr_node_store(struct ma_wr_state *wr_mas, unsigned char new_end) { struct ma_state *mas = wr_mas->mas; void __rcu **dst_slots; unsigned long *dst_pivots; unsigned char dst_offset, offset_end = wr_mas->offset_end; struct maple_node reuse, *newnode; unsigned char copy_size, node_pivots = mt_pivots[wr_mas->type]; bool in_rcu = mt_in_rcu(mas->tree); unsigned char height = mas_mt_height(mas); if (mas->last == wr_mas->end_piv) offset_end++; /* don't copy this offset */ else if (unlikely(wr_mas->r_max == ULONG_MAX)) mas_bulk_rebalance(mas, mas->end, wr_mas->type); /* set up node. */ if (in_rcu) { newnode = mas_pop_node(mas); } else { memset(&reuse, 0, sizeof(struct maple_node)); newnode = &reuse; } newnode->parent = mas_mn(mas)->parent; dst_pivots = ma_pivots(newnode, wr_mas->type); dst_slots = ma_slots(newnode, wr_mas->type); /* Copy from start to insert point */ memcpy(dst_pivots, wr_mas->pivots, sizeof(unsigned long) * mas->offset); memcpy(dst_slots, wr_mas->slots, sizeof(void *) * mas->offset); /* Handle insert of new range starting after old range */ if (wr_mas->r_min < mas->index) { rcu_assign_pointer(dst_slots[mas->offset], wr_mas->content); dst_pivots[mas->offset++] = mas->index - 1; } /* Store the new entry and range end. */ if (mas->offset < node_pivots) dst_pivots[mas->offset] = mas->last; rcu_assign_pointer(dst_slots[mas->offset], wr_mas->entry); /* * this range wrote to the end of the node or it overwrote the rest of * the data */ if (offset_end > mas->end) goto done; dst_offset = mas->offset + 1; /* Copy to the end of node if necessary. */ copy_size = mas->end - offset_end + 1; memcpy(dst_slots + dst_offset, wr_mas->slots + offset_end, sizeof(void *) * copy_size); memcpy(dst_pivots + dst_offset, wr_mas->pivots + offset_end, sizeof(unsigned long) * (copy_size - 1)); if (new_end < node_pivots) dst_pivots[new_end] = mas->max; done: mas_leaf_set_meta(newnode, maple_leaf_64, new_end); if (in_rcu) { struct maple_enode *old_enode = mas->node; mas->node = mt_mk_node(newnode, wr_mas->type); mas_replace_node(mas, old_enode, height); } else { memcpy(wr_mas->node, newnode, sizeof(struct maple_node)); } trace_ma_write(__func__, mas, 0, wr_mas->entry); mas_update_gap(mas); mas->end = new_end; return; } /* * mas_wr_slot_store: Attempt to store a value in a slot. * @wr_mas: the maple write state */ static inline void mas_wr_slot_store(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; unsigned char offset = mas->offset; void __rcu **slots = wr_mas->slots; bool gap = false; gap |= !mt_slot_locked(mas->tree, slots, offset); gap |= !mt_slot_locked(mas->tree, slots, offset + 1); if (wr_mas->offset_end - offset == 1) { if (mas->index == wr_mas->r_min) { /* Overwriting the range and a part of the next one */ rcu_assign_pointer(slots[offset], wr_mas->entry); wr_mas->pivots[offset] = mas->last; } else { /* Overwriting a part of the range and the next one */ rcu_assign_pointer(slots[offset + 1], wr_mas->entry); wr_mas->pivots[offset] = mas->index - 1; mas->offset++; /* Keep mas accurate. */ } } else { WARN_ON_ONCE(mt_in_rcu(mas->tree)); /* * Expand the range, only partially overwriting the previous and * next ranges */ gap |= !mt_slot_locked(mas->tree, slots, offset + 2); rcu_assign_pointer(slots[offset + 1], wr_mas->entry); wr_mas->pivots[offset] = mas->index - 1; wr_mas->pivots[offset + 1] = mas->last; mas->offset++; /* Keep mas accurate. */ } trace_ma_write(__func__, mas, 0, wr_mas->entry); /* * Only update gap when the new entry is empty or there is an empty * entry in the original two ranges. */ if (!wr_mas->entry || gap) mas_update_gap(mas); return; } static inline void mas_wr_extend_null(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; if (!wr_mas->slots[wr_mas->offset_end]) { /* If this one is null, the next and prev are not */ mas->last = wr_mas->end_piv; } else { /* Check next slot(s) if we are overwriting the end */ if ((mas->last == wr_mas->end_piv) && (mas->end != wr_mas->offset_end) && !wr_mas->slots[wr_mas->offset_end + 1]) { wr_mas->offset_end++; if (wr_mas->offset_end == mas->end) mas->last = mas->max; else mas->last = wr_mas->pivots[wr_mas->offset_end]; wr_mas->end_piv = mas->last; } } if (!wr_mas->content) { /* If this one is null, the next and prev are not */ mas->index = wr_mas->r_min; } else { /* Check prev slot if we are overwriting the start */ if (mas->index == wr_mas->r_min && mas->offset && !wr_mas->slots[mas->offset - 1]) { mas->offset--; wr_mas->r_min = mas->index = mas_safe_min(mas, wr_mas->pivots, mas->offset); wr_mas->r_max = wr_mas->pivots[mas->offset]; } } } static inline void mas_wr_end_piv(struct ma_wr_state *wr_mas) { while ((wr_mas->offset_end < wr_mas->mas->end) && (wr_mas->mas->last > wr_mas->pivots[wr_mas->offset_end])) wr_mas->offset_end++; if (wr_mas->offset_end < wr_mas->mas->end) wr_mas->end_piv = wr_mas->pivots[wr_mas->offset_end]; else wr_mas->end_piv = wr_mas->mas->max; } static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; unsigned char new_end = mas->end + 2; new_end -= wr_mas->offset_end - mas->offset; if (wr_mas->r_min == mas->index) new_end--; if (wr_mas->end_piv == mas->last) new_end--; return new_end; } /* * mas_wr_append: Attempt to append * @wr_mas: the maple write state * @new_end: The end of the node after the modification * * This is currently unsafe in rcu mode since the end of the node may be cached * by readers while the node contents may be updated which could result in * inaccurate information. */ static inline void mas_wr_append(struct ma_wr_state *wr_mas, unsigned char new_end) { struct ma_state *mas = wr_mas->mas; void __rcu **slots; unsigned char end = mas->end; if (new_end < mt_pivots[wr_mas->type]) { wr_mas->pivots[new_end] = wr_mas->pivots[end]; ma_set_meta(wr_mas->node, wr_mas->type, 0, new_end); } slots = wr_mas->slots; if (new_end == end + 1) { if (mas->last == wr_mas->r_max) { /* Append to end of range */ rcu_assign_pointer(slots[new_end], wr_mas->entry); wr_mas->pivots[end] = mas->index - 1; mas->offset = new_end; } else { /* Append to start of range */ rcu_assign_pointer(slots[new_end], wr_mas->content); wr_mas->pivots[end] = mas->last; rcu_assign_pointer(slots[end], wr_mas->entry); } } else { /* Append to the range without touching any boundaries. */ rcu_assign_pointer(slots[new_end], wr_mas->content); wr_mas->pivots