Total coverage: 15424 (2%)of 1335772
50 49 18 18 59 58 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/locks.c * * We implement four types of file locks: BSD locks, posix locks, open * file description locks, and leases. For details about BSD locks, * see the flock(2) man page; for details about the other three, see * fcntl(2). * * * Locking conflicts and dependencies: * If multiple threads attempt to lock the same byte (or flock the same file) * only one can be granted the lock, and other must wait their turn. * The first lock has been "applied" or "granted", the others are "waiting" * and are "blocked" by the "applied" lock.. * * Waiting and applied locks are all kept in trees whose properties are: * * - the root of a tree may be an applied or waiting lock. * - every other node in the tree is a waiting lock that * conflicts with every ancestor of that node. * * Every such tree begins life as a waiting singleton which obviously * satisfies the above properties. * * The only ways we modify trees preserve these properties: * * 1. We may add a new leaf node, but only after first verifying that it * conflicts with all of its ancestors. * 2. We may remove the root of a tree, creating a new singleton * tree from the root and N new trees rooted in the immediate * children. * 3. If the root of a tree is not currently an applied lock, we may * apply it (if possible). * 4. We may upgrade the root of the tree (either extend its range, * or upgrade its entire range from read to write). * * When an applied lock is modified in a way that reduces or downgrades any * part of its range, we remove all its children (2 above). This particularly * happens when a lock is unlocked. * * For each of those child trees we "wake up" the thread which is * waiting for the lock so it can continue handling as follows: if the * root of the tree applies, we do so (3). If it doesn't, it must * conflict with some applied lock. We remove (wake up) all of its children * (2), and add it is a new leaf to the tree rooted in the applied * lock (1). We then repeat the process recursively with those * children. * */ #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/filelock.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/security.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/time.h> #include <linux/rcupdate.h> #include <linux/pid_namespace.h> #include <linux/hashtable.h> #include <linux/percpu.h> #include <linux/sysctl.h> #define CREATE_TRACE_POINTS #include <trace/events/filelock.h> #include <linux/uaccess.h> static struct file_lock *file_lock(struct file_lock_core *flc) { return container_of(flc, struct file_lock, c); } static struct file_lease *file_lease(struct file_lock_core *flc) { return container_of(flc, struct file_lease, c); } static bool lease_breaking(struct file_lease *fl) { return fl->c.flc_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING); } static int target_leasetype(struct file_lease *fl) { if (fl->c.flc_flags & FL_UNLOCK_PENDING) return F_UNLCK; if (fl->c.flc_flags & FL_DOWNGRADE_PENDING) return F_RDLCK; return fl->c.flc_type; } static int leases_enable = 1; static int lease_break_time = 45; #ifdef CONFIG_SYSCTL static struct ctl_table locks_sysctls[] = { { .procname = "leases-enable", .data = &leases_enable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #ifdef CONFIG_MMU { .procname = "lease-break-time", .data = &lease_break_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, #endif /* CONFIG_MMU */ }; static int __init init_fs_locks_sysctls(void) { register_sysctl_init("fs", locks_sysctls); return 0; } early_initcall(init_fs_locks_sysctls); #endif /* CONFIG_SYSCTL */ /* * The global file_lock_list is only used for displaying /proc/locks, so we * keep a list on each CPU, with each list protected by its own spinlock. * Global serialization is done using file_rwsem. * * Note that alterations to the list also require that the relevant flc_lock is * held. */ struct file_lock_list_struct { spinlock_t lock; struct hlist_head hlist; }; static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list); DEFINE_STATIC_PERCPU_RWSEM(file_rwsem); /* * The blocked_hash is used to find POSIX lock loops for deadlock detection. * It is protected by blocked_lock_lock. * * We hash locks by lockowner in order to optimize searching for the lock a * particular lockowner is waiting on. * * FIXME: make this value scale via some heuristic? We generally will want more * buckets when we have more lockowners holding locks, but that's a little * difficult to determine without knowing what the workload will look like. */ #define BLOCKED_HASH_BITS 7 static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS); /* * This lock protects the blocked_hash. Generally, if you're accessing it, you * want to be holding this lock. * * In addition, it also protects the fl->fl_blocked_requests list, and the * fl->fl_blocker pointer for file_lock structures that are acting as lock * requests (in contrast to those that are acting as records of acquired locks). * * Note that when we acquire this lock in order to change the above fields, * we often hold the flc_lock as well. In certain cases, when reading the fields * protected by this lock, we can skip acquiring it iff we already hold the * flc_lock. */ static DEFINE_SPINLOCK(blocked_lock_lock); static struct kmem_cache *flctx_cache __ro_after_init; static struct kmem_cache *filelock_cache __ro_after_init; static struct kmem_cache *filelease_cache __ro_after_init; static struct file_lock_context * locks_get_lock_context(struct inode *inode, int type) { struct file_lock_context *ctx; /* paired with cmpxchg() below */ ctx = locks_inode_context(inode); if (likely(ctx) || type == F_UNLCK) goto out; ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL); if (!ctx) goto out; spin_lock_init(&ctx->flc_lock); INIT_LIST_HEAD(&ctx->flc_flock); INIT_LIST_HEAD(&ctx->flc_posix); INIT_LIST_HEAD(&ctx->flc_lease); /* * Assign the pointer if it's not already assigned. If it is, then * free the context we just allocated. */ if (cmpxchg(&inode->i_flctx, NULL, ctx)) { kmem_cache_free(flctx_cache, ctx); ctx = locks_inode_context(inode); } out: trace_locks_get_lock_context(inode, type, ctx); return ctx; } static void locks_dump_ctx_list(struct list_head *list, char *list_type) { struct file_lock_core *flc; list_for_each_entry(flc, list, flc_list) pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, flc->flc_owner, flc->flc_flags, flc->flc_type, flc->flc_pid); } static void locks_check_ctx_lists(struct inode *inode) { struct file_lock_context *ctx = inode->i_flctx; if (unlikely(!list_empty(&ctx->flc_flock) || !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_lease))) { pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n", MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); locks_dump_ctx_list(&ctx->flc_flock, "FLOCK"); locks_dump_ctx_list(&ctx->flc_posix, "POSIX"); locks_dump_ctx_list(&ctx->flc_lease, "LEASE"); } } static void locks_check_ctx_file_list(struct file *filp, struct list_head *list, char *list_type) { struct file_lock_core *flc; struct inode *inode = file_inode(filp); list_for_each_entry(flc, list, flc_list) if (flc->flc_file == filp) pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx " " fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino, flc->flc_owner, flc->flc_flags, flc->flc_type, flc->flc_pid); } void locks_free_lock_context(struct inode *inode) { struct file_lock_context *ctx = locks_inode_context(inode); if (unlikely(ctx)) { locks_check_ctx_lists(inode); kmem_cache_free(flctx_cache, ctx); } } static void locks_init_lock_heads(struct file_lock_core *flc) { INIT_HLIST_NODE(&flc->flc_link); INIT_LIST_HEAD(&flc->flc_list); INIT_LIST_HEAD(&flc->flc_blocked_requests); INIT_LIST_HEAD(&flc->flc_blocked_member); init_waitqueue_head(&flc->flc_wait); } /* Allocate an empty lock structure. */ struct file_lock *locks_alloc_lock(void) { struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); if (fl) locks_init_lock_heads(&fl->c); return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lock); /* Allocate an empty lock structure. */ struct file_lease *locks_alloc_lease(void) { struct file_lease *fl = kmem_cache_zalloc(filelease_cache, GFP_KERNEL); if (fl) locks_init_lock_heads(&fl->c); return fl; } EXPORT_SYMBOL_GPL(locks_alloc_lease); void locks_release_private(struct file_lock *fl) { struct file_lock_core *flc = &fl->c; BUG_ON(waitqueue_active(&flc->flc_wait)); BUG_ON(!list_empty(&flc->flc_list)); BUG_ON(!list_empty(&flc->flc_blocked_requests)); BUG_ON(!list_empty(&flc->flc_blocked_member)); BUG_ON(!hlist_unhashed(&flc->flc_link)); if (fl->fl_ops) { if (fl->fl_ops->fl_release_private) fl->fl_ops->fl_release_private(fl); fl->fl_ops = NULL; } if (fl->fl_lmops) { if (fl->fl_lmops->lm_put_owner) { fl->fl_lmops->lm_put_owner(flc->flc_owner); flc->flc_owner = NULL; } fl->fl_lmops = NULL; } } EXPORT_SYMBOL_GPL(locks_release_private); /** * locks_owner_has_blockers - Check for blocking lock requests * @flctx: file lock context * @owner: lock owner * * Return values: * %true: @owner has at least one blocker * %false: @owner has no blockers */ bool locks_owner_has_blockers(struct file_lock_context *flctx, fl_owner_t owner) { struct file_lock_core *flc; spin_lock(&flctx->flc_lock); list_for_each_entry(flc, &flctx->flc_posix, flc_list) { if (flc->flc_owner != owner) continue; if (!list_empty(&flc->flc_blocked_requests)) { spin_unlock(&flctx->flc_lock); return true; } } spin_unlock(&flctx->flc_lock); return false; } EXPORT_SYMBOL_GPL(locks_owner_has_blockers); /* Free a lock which is not in use. */ void locks_free_lock(struct file_lock *fl) { locks_release_private(fl); kmem_cache_free(filelock_cache, fl); } EXPORT_SYMBOL(locks_free_lock); /* Free a lease which is not in use. */ void locks_free_lease(struct file_lease *fl) { kmem_cache_free(filelease_cache, fl); } EXPORT_SYMBOL(locks_free_lease); static void locks_dispose_list(struct list_head *dispose) { struct file_lock_core *flc; while (!list_empty(dispose)) { flc = list_first_entry(dispose, struct file_lock_core, flc_list); list_del_init(&flc->flc_list); if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) locks_free_lease(file_lease(flc)); else locks_free_lock(file_lock(flc)); } } void locks_init_lock(struct file_lock *fl) { memset(fl, 0, sizeof(struct file_lock)); locks_init_lock_heads(&fl->c); } EXPORT_SYMBOL(locks_init_lock); void locks_init_lease(struct file_lease *fl) { memset(fl, 0, sizeof(*fl)); locks_init_lock_heads(&fl->c); } EXPORT_SYMBOL(locks_init_lease); /* * Initialize a new lock from an existing file_lock structure. */ void locks_copy_conflock(struct file_lock *new, struct file_lock *fl) { new->c.flc_owner = fl->c.flc_owner; new->c.flc_pid = fl->c.flc_pid; new->c.flc_file = NULL; new->c.flc_flags = fl->c.flc_flags; new->c.flc_type = fl->c.flc_type; new->fl_start = fl->fl_start; new->fl_end = fl->fl_end; new->fl_lmops = fl->fl_lmops; new->fl_ops = NULL; if (fl->fl_lmops) { if (fl->fl_lmops->lm_get_owner) fl->fl_lmops->lm_get_owner(fl->c.flc_owner); } } EXPORT_SYMBOL(locks_copy_conflock); void locks_copy_lock(struct file_lock *new, struct file_lock *fl) { /* "new" must be a freshly-initialized lock */ WARN_ON_ONCE(new->fl_ops); locks_copy_conflock(new, fl); new->c.flc_file = fl->c.flc_file; new->fl_ops = fl->fl_ops; if (fl->fl_ops) { if (fl->fl_ops->fl_copy_lock) fl->fl_ops->fl_copy_lock(new, fl); } } EXPORT_SYMBOL(locks_copy_lock); static void locks_move_blocks(struct file_lock *new, struct file_lock *fl) { struct file_lock *f; /* * As ctx->flc_lock is held, new requests cannot be added to * ->flc_blocked_requests, so we don't need a lock to check if it * is empty. */ if (list_empty(&fl->c.flc_blocked_requests)) return; spin_lock(&blocked_lock_lock); list_splice_init(&fl->c.flc_blocked_requests, &new->c.flc_blocked_requests); list_for_each_entry(f, &new->c.flc_blocked_requests, c.flc_blocked_member) f->c.flc_blocker = &new->c; spin_unlock(&blocked_lock_lock); } static inline int flock_translate_cmd(int cmd) { switch (cmd) { case LOCK_SH: return F_RDLCK; case LOCK_EX: return F_WRLCK; case LOCK_UN: return F_UNLCK; } return -EINVAL; } /* Fill in a file_lock structure with an appropriate FLOCK lock. */ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type) { locks_init_lock(fl); fl->c.flc_file = filp; fl->c.flc_owner = filp; fl->c.flc_pid = current->tgid; fl->c.flc_flags = FL_FLOCK; fl->c.flc_type = type; fl->fl_end = OFFSET_MAX; } static int assign_type(struct file_lock_core *flc, int type) { switch (type) { case F_RDLCK: case F_WRLCK: case F_UNLCK: flc->flc_type = type; break; default: return -EINVAL; } return 0; } static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock64 *l) { switch (l->l_whence) { case SEEK_SET: fl->fl_start = 0; break; case SEEK_CUR: fl->fl_start = filp->f_pos; break; case SEEK_END: fl->fl_start = i_size_read(file_inode(filp)); break; default: return -EINVAL; } if (l->l_start > OFFSET_MAX - fl->fl_start) return -EOVERFLOW; fl->fl_start += l->l_start; if (fl->fl_start < 0) return -EINVAL; /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ if (l->l_len > 0) { if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) return -EOVERFLOW; fl->fl_end = fl->fl_start + (l->l_len - 1); } else if (l->l_len < 0) { if (fl->fl_start + l->l_len < 0) return -EINVAL; fl->fl_end = fl->fl_start - 1; fl->fl_start += l->l_len; } else fl->fl_end = OFFSET_MAX; fl->c.flc_owner = current->files; fl->c.flc_pid = current->tgid; fl->c.flc_file = filp; fl->c.flc_flags = FL_POSIX; fl->fl_ops = NULL; fl->fl_lmops = NULL; return assign_type(&fl->c, l->l_type); } /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX * style lock. */ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, struct flock *l) { struct flock64 ll = { .l_type = l->l_type, .l_whence = l->l_whence, .l_start = l->l_start, .l_len = l->l_len, }; return flock64_to_posix_lock(filp, fl, &ll); } /* default lease lock manager operations */ static bool lease_break_callback(struct file_lease *fl) { kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG); return false; } static void lease_setup(struct file_lease *fl, void **priv) { struct file *filp = fl->c.flc_file; struct fasync_struct *fa = *priv; /* * fasync_insert_entry() returns the old entry if any. If there was no * old entry, then it used "priv" and inserted it into the fasync list. * Clear the pointer to indicate that it shouldn't be freed. */ if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa)) *priv = NULL; __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0); } static const struct lease_manager_operations lease_manager_ops = { .lm_break = lease_break_callback, .lm_change = lease_modify, .lm_setup = lease_setup, }; /* * Initialize a lease, use the default lock manager operations */ static int lease_init(struct file *filp, int type, struct file_lease *fl) { if (assign_type(&fl->c, type) != 0) return -EINVAL; fl->c.flc_owner = filp; fl->c.flc_pid = current->tgid; fl->c.flc_file = filp; fl->c.flc_flags = FL_LEASE; fl->fl_lmops = &lease_manager_ops; return 0; } /* Allocate a file_lock initialised to this type of lease */ static struct file_lease *lease_alloc(struct file *filp, int type) { struct file_lease *fl = locks_alloc_lease(); int error = -ENOMEM; if (fl == NULL) return ERR_PTR(error); error = lease_init(filp, type, fl); if (error) { locks_free_lease(fl); return ERR_PTR(error); } return fl; } /* Check if two locks overlap each other. */ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) { return ((fl1->fl_end >= fl2->fl_start) && (fl2->fl_end >= fl1->fl_start)); } /* * Check whether two locks have the same owner. */ static int posix_same_owner(struct file_lock_core *fl1, struct file_lock_core *fl2) { return fl1->flc_owner == fl2->flc_owner; } /* Must be called with the flc_lock held! */ static void locks_insert_global_locks(struct file_lock_core *flc) { struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list); percpu_rwsem_assert_held(&file_rwsem); spin_lock(&fll->lock); flc->flc_link_cpu = smp_processor_id(); hlist_add_head(&flc->flc_link, &fll->hlist); spin_unlock(&fll->lock); } /* Must be called with the flc_lock held! */ static void locks_delete_global_locks(struct file_lock_core *flc) { struct file_lock_list_struct *fll; percpu_rwsem_assert_held(&file_rwsem); /* * Avoid taking lock if already unhashed. This is safe since this check * is done while holding the flc_lock, and new insertions into the list * also require that it be held. */ if (hlist_unhashed(&flc->flc_link)) return; fll = per_cpu_ptr(&file_lock_list, flc->flc_link_cpu); spin_lock(&fll->lock); hlist_del_init(&flc->flc_link); spin_unlock(&fll->lock); } static unsigned long posix_owner_key(struct file_lock_core *flc) { return (unsigned long) flc->flc_owner; } static void locks_insert_global_blocked(struct file_lock_core *waiter) { lockdep_assert_held(&blocked_lock_lock); hash_add(blocked_hash, &waiter->flc_link, posix_owner_key(waiter)); } static void locks_delete_global_blocked(struct file_lock_core *waiter) { lockdep_assert_held(&blocked_lock_lock); hash_del(&waiter->flc_link); } /* Remove waiter from blocker's block list. * When blocker ends up pointing to itself then the list is empty. * * Must be called with blocked_lock_lock held. */ static void __locks_unlink_block(struct file_lock_core *waiter) { locks_delete_global_blocked(waiter); list_del_init(&waiter->flc_blocked_member); } static void __locks_wake_up_blocks(struct file_lock_core *blocker) { while (!list_empty(&blocker->flc_blocked_requests)) { struct file_lock_core *waiter; struct file_lock *fl; waiter = list_first_entry(&blocker->flc_blocked_requests, struct file_lock_core, flc_blocked_member); fl = file_lock(waiter); __locks_unlink_block(waiter); if ((waiter->flc_flags & (FL_POSIX | FL_FLOCK)) && fl->fl_lmops && fl->fl_lmops->lm_notify) fl->fl_lmops->lm_notify(fl); else locks_wake_up(fl); /* * The setting of flc_blocker to NULL marks the "done" * point in deleting a block. Paired with acquire at the top * of locks_delete_block(). */ smp_store_release(&waiter->flc_blocker, NULL); } } static int __locks_delete_block(struct file_lock_core *waiter) { int status = -ENOENT; /* * If fl_blocker is NULL, it won't be set again as this thread "owns" * the lock and is the only one that might try to claim the lock. * * We use acquire/release to manage fl_blocker so that we can * optimize away taking the blocked_lock_lock in many cases. * * The smp_load_acquire guarantees two things: * * 1/ that fl_blocked_requests can be tested locklessly. If something * was recently added to that list it must have been in a locked region * *before* the locked region when fl_blocker was set to NULL. * * 2/ that no other thread is accessing 'waiter', so it is safe to free * it. __locks_wake_up_blocks is careful not to touch waiter after * fl_blocker is released. * * If a lockless check of fl_blocker shows it to be NULL, we know that * no new locks can be inserted into its fl_blocked_requests list, and * can avoid doing anything further if the list is empty. */ if (!smp_load_acquire(&waiter->flc_blocker) && list_empty(&waiter->flc_blocked_requests)) return status; spin_lock(&blocked_lock_lock); if (waiter->flc_blocker) status = 0; __locks_wake_up_blocks(waiter); __locks_unlink_block(waiter); /* * The setting of fl_blocker to NULL marks the "done" point in deleting * a block. Paired with acquire at the top of this function. */ smp_store_release(&waiter->flc_blocker, NULL); spin_unlock(&blocked_lock_lock); return status; } /** * locks_delete_block - stop waiting for a file lock * @waiter: the lock which was waiting * * lockd/nfsd need to disconnect the lock while working on it. */ int locks_delete_block(struct file_lock *waiter) { return __locks_delete_block(&waiter->c); } EXPORT_SYMBOL(locks_delete_block); /* Insert waiter into blocker's block list. * We use a circular list so that processes can be easily woken up in * the order they blocked. The documentation doesn't require this but * it seems like the reasonable thing to do. * * Must be called with both the flc_lock and blocked_lock_lock held. The * fl_blocked_requests list itself is protected by the blocked_lock_lock, * but by ensuring that the flc_lock is also held on insertions we can avoid * taking the blocked_lock_lock in some cases when we see that the * fl_blocked_requests list is empty. * * Rather than just adding to the list, we check for conflicts with any existing * waiters, and add beneath any waiter that blocks the new waiter. * Thus wakeups don't happen until needed. */ static void __locks_insert_block(struct file_lock_core *blocker, struct file_lock_core *waiter, bool conflict(struct file_lock_core *, struct file_lock_core *)) { struct file_lock_core *flc; BUG_ON(!list_empty(&waiter->flc_blocked_member)); new_blocker: list_for_each_entry(flc, &blocker->flc_blocked_requests, flc_blocked_member) if (conflict(flc, waiter)) { blocker = flc; goto new_blocker; } waiter->flc_blocker = blocker; list_add_tail(&waiter->flc_blocked_member, &blocker->flc_blocked_requests); if ((blocker->flc_flags & (FL_POSIX|FL_OFDLCK)) == FL_POSIX) locks_insert_global_blocked(waiter); /* The requests in waiter->flc_blocked are known to conflict with * waiter, but might not conflict with blocker, or the requests * and lock which block it. So they all need to be woken. */ __locks_wake_up_blocks(waiter); } /* Must be called with flc_lock held. */ static void locks_insert_block(struct file_lock_core *blocker, struct file_lock_core *waiter, bool conflict(struct file_lock_core *, struct file_lock_core *)) { spin_lock(&blocked_lock_lock); __locks_insert_block(blocker, waiter, conflict); spin_unlock(&blocked_lock_lock); } /* * Wake up processes blocked waiting for blocker. * * Must be called with the inode->flc_lock held! */ static void locks_wake_up_blocks(struct file_lock_core *blocker) { /* * Avoid taking global lock if list is empty. This is safe since new * blocked requests are only added to the list under the flc_lock, and * the flc_lock is always held here. Note that removal from the * fl_blocked_requests list does not require the flc_lock, so we must * recheck list_empty() after acquiring the blocked_lock_lock. */ if (list_empty(&blocker->flc_blocked_requests)) return; spin_lock(&blocked_lock_lock); __locks_wake_up_blocks(blocker); spin_unlock(&blocked_lock_lock); } static void locks_insert_lock_ctx(struct file_lock_core *fl, struct list_head *before) { list_add_tail(&fl->flc_list, before); locks_insert_global_locks(fl); } static void locks_unlink_lock_ctx(struct file_lock_core *fl) { locks_delete_global_locks(fl); list_del_init(&fl->flc_list); locks_wake_up_blocks(fl); } static void locks_delete_lock_ctx(struct file_lock_core *fl, struct list_head *dispose) { locks_unlink_lock_ctx(fl); if (dispose) list_add(&fl->flc_list, dispose); else locks_free_lock(file_lock(fl)); } /* Determine if lock sys_fl blocks lock caller_fl. Common functionality * checks for shared/exclusive status of overlapping locks. */ static bool locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc) { if (sys_flc->flc_type == F_WRLCK) return true; if (caller_flc->flc_type == F_WRLCK) return true; return false; } /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific * checking before calling the locks_conflict(). */ static bool posix_locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc) { struct file_lock *caller_fl = file_lock(caller_flc); struct file_lock *sys_fl = file_lock(sys_flc); /* POSIX locks owned by the same process do not conflict with * each other. */ if (posix_same_owner(caller_flc, sys_flc)) return false; /* Check whether they overlap */ if (!locks_overlap(caller_fl, sys_fl)) return false; return locks_conflict(caller_flc, sys_flc); } /* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK * path so checks for additional GETLK-specific things like F_UNLCK. */ static bool posix_test_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl) { struct file_lock_core *caller = &caller_fl->c; struct file_lock_core *sys = &sys_fl->c; /* F_UNLCK checks any locks on the same fd. */ if (lock_is_unlock(caller_fl)) { if (!posix_same_owner(caller, sys)) return false; return locks_overlap(caller_fl, sys_fl); } return posix_locks_conflict(caller, sys); } /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific * checking before calling the locks_conflict(). */ static bool flock_locks_conflict(struct file_lock_core *caller_flc, struct file_lock_core *sys_flc) { /* FLOCK locks referring to the same filp do not conflict with * each other. */ if (caller_flc->flc_file == sys_flc->flc_file) return false; return locks_conflict(caller_flc, sys_flc); } void posix_test_lock(struct file *filp, struct file_lock *fl) { struct file_lock *cfl; struct file_lock_context *ctx; struct inode *inode = file_inode(filp); void *owner; void (*func)(void); ctx = locks_inode_context(inode); if (!ctx || list_empty_careful(&ctx->flc_posix)) { fl->c.flc_type = F_UNLCK; return; } retry: spin_lock(&ctx->flc_lock); list_for_each_entry(cfl, &ctx->flc_posix, c.flc_list) { if (!posix_test_locks_conflict(fl, cfl)) continue; if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable && (*cfl->fl_lmops->lm_lock_expirable)(cfl)) { owner = cfl->fl_lmops->lm_mod_owner; func = cfl->fl_lmops->lm_expire_lock; __module_get(owner); spin_unlock(&ctx->flc_lock); (*func)(); module_put(owner); goto retry; } locks_copy_conflock(fl, cfl); goto out; } fl->c.flc_type = F_UNLCK; out: spin_unlock(&ctx->flc_lock); return; } EXPORT_SYMBOL(posix_test_lock); /* * Deadlock detection: * * We attempt to detect deadlocks that are due purely to posix file * locks. * * We assume that a task can be waiting for at most one lock at a time. * So for any acquired lock, the process holding that lock may be * waiting on at most one other lock. That lock in turns may be held by * someone waiting for at most one other lock. Given a requested lock * caller_fl which is about to wait for a conflicting lock block_fl, we * follow this chain of waiters to ensure we are not about to create a * cycle. * * Since we do this before we ever put a process to sleep on a lock, we * are ensured that there is never a cycle; that is what guarantees that * the while() loop in posix_locks_deadlock() eventually completes. * * Note: the above assumption may not be true when handling lock * requests from a broken NFS client. It may also fail in the presence * of tasks (such as posix threads) sharing the same open file table. * To handle those cases, we just bail out after a few iterations. * * For FL_OFDLCK locks, the owner is the filp, not the files_struct. * Because the owner is not even nominally tied to a thread of * execution, the deadlock detection below can't reasonably work well. Just * skip it for those. * * In principle, we could do a more limited deadlock detection on FL_OFDLCK * locks that just checks for the case where two tasks are attempting to * upgrade from read to write locks on the same inode. */ #define MAX_DEADLK_ITERATIONS 10 /* Find a lock that the owner of the given @blocker is blocking on. */ static struct file_lock_core *what_owner_is_waiting_for(struct file_lock_core *blocker) { struct file_lock_core *flc; hash_for_each_possible(blocked_hash, flc, flc_link, posix_owner_key(blocker)) { if (posix_same_owner(flc, blocker)) { while (flc->flc_blocker) flc = flc->flc_blocker; return flc; } } return NULL; } /* Must be called with the blocked_lock_lock held! */ static bool posix_locks_deadlock(struct file_lock *caller_fl, struct file_lock *block_fl) { struct file_lock_core *caller = &caller_fl->c; struct file_lock_core *blocker = &block_fl->c; int i = 0; lockdep_assert_held(&blocked_lock_lock); /* * This deadlock detector can't reasonably detect deadlocks with * FL_OFDLCK locks, since they aren't owned by a process, per-se. */ if (caller->flc_flags & FL_OFDLCK) return false; while ((blocker = what_owner_is_waiting_for(blocker))) { if (i++ > MAX_DEADLK_ITERATIONS) return false; if (posix_same_owner(caller, blocker)) return true; } return false; } /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks * after any leases, but before any posix locks. * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ static int flock_lock_inode(struct inode *inode, struct file_lock *request) { struct file_lock *new_fl = NULL; struct file_lock *fl; struct file_lock_context *ctx; int error = 0; bool found = false; LIST_HEAD(dispose); ctx = locks_get_lock_context(inode, request->c.flc_type); if (!ctx) { if (request->c.flc_type != F_UNLCK) return -ENOMEM; return (request->c.flc_flags & FL_EXISTS) ? -ENOENT : 0; } if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK)) { new_fl = locks_alloc_lock(); if (!new_fl) return -ENOMEM; } percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); if (request->c.flc_flags & FL_ACCESS) goto find_conflict; list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) { if (request->c.flc_file != fl->c.flc_file) continue; if (request->c.flc_type == fl->c.flc_type) goto out; found = true; locks_delete_lock_ctx(&fl->c, &dispose); break; } if (lock_is_unlock(request)) { if ((request->c.flc_flags & FL_EXISTS) && !found) error = -ENOENT; goto out; } find_conflict: list_for_each_entry(fl, &ctx->flc_flock, c.flc_list) { if (!flock_locks_conflict(&request->c, &fl->c)) continue; error = -EAGAIN; if (!(request->c.flc_flags & FL_SLEEP)) goto out; error = FILE_LOCK_DEFERRED; locks_insert_block(&fl->c, &request->c, flock_locks_conflict); goto out; } if (request->c.flc_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); locks_move_blocks(new_fl, request); locks_insert_lock_ctx(&new_fl->c, &ctx->flc_flock); new_fl = NULL; error = 0; out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); if (new_fl) locks_free_lock(new_fl); locks_dispose_list(&dispose); trace_flock_lock_inode(inode, request, error); return error; } static int posix_lock_inode(struct inode *inode, struct file_lock *request, struct file_lock *conflock) { struct file_lock *fl, *tmp; struct file_lock *new_fl = NULL; struct file_lock *new_fl2 = NULL; struct file_lock *left = NULL; struct file_lock *right = NULL; struct file_lock_context *ctx; int error; bool added = false; LIST_HEAD(dispose); void *owner; void (*func)(void); ctx = locks_get_lock_context(inode, request->c.flc_type); if (!ctx) return lock_is_unlock(request) ? 0 : -ENOMEM; /* * We may need two file_lock structures for this operation, * so we get them in advance to avoid races. * * In some cases we can be sure, that no new locks will be needed */ if (!(request->c.flc_flags & FL_ACCESS) && (request->c.flc_type != F_UNLCK || request->fl_start != 0 || request->fl_end != OFFSET_MAX)) { new_fl = locks_alloc_lock(); new_fl2 = locks_alloc_lock(); } retry: percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); /* * New lock request. Walk all POSIX locks and look for conflicts. If * there are any, either return error or put the request on the * blocker's list of waiters and the global blocked_hash. */ if (request->c.flc_type != F_UNLCK) { list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) { if (!posix_locks_conflict(&request->c, &fl->c)) continue; if (fl->fl_lmops && fl->fl_lmops->lm_lock_expirable && (*fl->fl_lmops->lm_lock_expirable)(fl)) { owner = fl->fl_lmops->lm_mod_owner; func = fl->fl_lmops->lm_expire_lock; __module_get(owner); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); (*func)(); module_put(owner); goto retry; } if (conflock) locks_copy_conflock(conflock, fl); error = -EAGAIN; if (!(request->c.flc_flags & FL_SLEEP)) goto out; /* * Deadlock detection and insertion into the blocked * locks list must be done while holding the same lock! */ error = -EDEADLK; spin_lock(&blocked_lock_lock); /* * Ensure that we don't find any locks blocked on this * request during deadlock detection. */ __locks_wake_up_blocks(&request->c); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; __locks_insert_block(&fl->c, &request->c, posix_locks_conflict); } spin_unlock(&blocked_lock_lock); goto out; } } /* If we're just looking for a conflict, we're done. */ error = 0; if (request->c.flc_flags & FL_ACCESS) goto out; /* Find the first old lock with the same owner as the new lock */ list_for_each_entry(fl, &ctx->flc_posix, c.flc_list) { if (posix_same_owner(&request->c, &fl->c)) break; } /* Process locks with this owner. */ list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, c.flc_list) { if (!posix_same_owner(&request->c, &fl->c)) break; /* Detect adjacent or overlapping regions (if same lock type) */ if (request->c.flc_type == fl->c.flc_type) { /* In all comparisons of start vs end, use * "start - 1" rather than "end + 1". If end * is OFFSET_MAX, end + 1 will become negative. */ if (fl->fl_end < request->fl_start - 1) continue; /* If the next lock in the list has entirely bigger * addresses than the new one, insert the lock here. */ if (fl->fl_start - 1 > request->fl_end) break; /* If we come here, the new and old lock are of the * same type and adjacent or overlapping. Make one * lock yielding from the lower start address of both * locks to the higher end address. */ if (fl->fl_start > request->fl_start) fl->fl_start = request->fl_start; else request->fl_start = fl->fl_start; if (fl->fl_end < request->fl_end) fl->fl_end = request->fl_end; else request->fl_end = fl->fl_end; if (added) { locks_delete_lock_ctx(&fl->c, &dispose); continue; } request = fl; added = true; } else { /* Processing for different lock types is a bit * more complex. */ if (fl->fl_end < request->fl_start) continue; if (fl->fl_start > request->fl_end) break; if (lock_is_unlock(request)) added = true; if (fl->fl_start < request->fl_start) left = fl; /* If the next lock in the list has a higher end * address than the new one, insert the new one here. */ if (fl->fl_end > request->fl_end) { right = fl; break; } if (fl->fl_start >= request->fl_start) { /* The new lock completely replaces an old * one (This may happen several times). */ if (added) { locks_delete_lock_ctx(&fl->c, &dispose); continue; } /* * Replace the old lock with new_fl, and * remove the old one. It's safe to do the * insert here since we know that we won't be * using new_fl later, and that the lock is * just replacing an existing lock. */ error = -ENOLCK; if (!new_fl) goto out; locks_copy_lock(new_fl, request); locks_move_blocks(new_fl, request); request = new_fl; new_fl = NULL; locks_insert_lock_ctx(&request->c, &fl->c.flc_list); locks_delete_lock_ctx(&fl->c, &dispose); added = true; } } } /* * The above code only modifies existing locks in case of merging or * replacing. If new lock(s) need to be inserted all modifications are * done below this, so it's safe yet to bail out. */ error = -ENOLCK; /* "no luck" */ if (right && left == right && !new_fl2) goto out; error = 0; if (!added) { if (lock_is_unlock(request)) { if (request->c.flc_flags & FL_EXISTS) error = -ENOENT; goto out; } if (!new_fl) { error = -ENOLCK; goto out; } locks_copy_lock(new_fl, request); locks_move_blocks(new_fl, request); locks_insert_lock_ctx(&new_fl->c, &fl->c.flc_list); fl = new_fl; new_fl = NULL; } if (right) { if (left == right) { /* The new lock breaks the old one in two pieces, * so we have to use the second new lock. */ left = new_fl2; new_fl2 = NULL; locks_copy_lock(left, right); locks_insert_lock_ctx(&left->c, &fl->c.flc_list); } right->fl_start = request->fl_end + 1; locks_wake_up_blocks(&right->c); } if (left) { left->fl_end = request->fl_start - 1; locks_wake_up_blocks(&left->c); } out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); trace_posix_lock_inode(inode, request, error); /* * Free any unused locks. */ if (new_fl) locks_free_lock(new_fl); if (new_fl2) locks_free_lock(new_fl2); locks_dispose_list(&dispose); return error; } /** * posix_lock_file - Apply a POSIX-style lock to a file * @filp: The file to apply the lock to * @fl: The lock to be applied * @conflock: Place to return a copy of the conflicting lock, if found. * * Add a POSIX style lock to a file. * We merge adjacent & overlapping locks whenever possible. * POSIX locks are sorted by owner task, then by starting address * * Note that if called with an FL_EXISTS argument, the caller may determine * whether or not a lock was successfully freed by testing the return * value for -ENOENT. */ int posix_lock_file(struct file *filp, struct file_lock *fl, struct file_lock *conflock) { return posix_lock_inode(file_inode(filp), fl, conflock); } EXPORT_SYMBOL(posix_lock_file); /** * posix_lock_inode_wait - Apply a POSIX-style lock to a file * @inode: inode of file to which lock request should be applied * @fl: The lock to be applied * * Apply a POSIX style lock request to an inode. */ static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep (); for (;;) { error = posix_lock_inode(inode, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->c.flc_wait, list_empty(&fl->c.flc_blocked_member)); if (error) break; } locks_delete_block(fl); return error; } static void lease_clear_pending(struct file_lease *fl, int arg) { switch (arg) { case F_UNLCK: fl->c.flc_flags &= ~FL_UNLOCK_PENDING; fallthrough; case F_RDLCK: fl->c.flc_flags &= ~FL_DOWNGRADE_PENDING; } } /* We already had a lease on this file; just change its type */ int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) { int error = assign_type(&fl->c, arg); if (error) return error; lease_clear_pending(fl, arg); locks_wake_up_blocks(&fl->c); if (arg == F_UNLCK) { struct file *filp = fl->c.flc_file; f_delown(filp); filp->f_owner.signum = 0; fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync); if (fl->fl_fasync != NULL) { printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); fl->fl_fasync = NULL; } locks_delete_lock_ctx(&fl->c, dispose); } return 0; } EXPORT_SYMBOL(lease_modify); static bool past_time(unsigned long then) { if (!then) /* 0 is a special value meaning "this never expires": */ return false; return time_after(jiffies, then); } static void time_out_leases(struct inode *inode, struct list_head *dispose) { struct file_lock_context *ctx = inode->i_flctx; struct file_lease *fl, *tmp; lockdep_assert_held(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) { trace_time_out_leases(inode, fl); if (past_time(fl->fl_downgrade_time)) lease_modify(fl, F_RDLCK, dispose); if (past_time(fl->fl_break_time)) lease_modify(fl, F_UNLCK, dispose); } } static bool leases_conflict(struct file_lock_core *lc, struct file_lock_core *bc) { bool rc; struct file_lease *lease = file_lease(lc); struct file_lease *breaker = file_lease(bc); if (lease->fl_lmops->lm_breaker_owns_lease && lease->fl_lmops->lm_breaker_owns_lease(lease)) return false; if ((bc->flc_flags & FL_LAYOUT) != (lc->flc_flags & FL_LAYOUT)) { rc = false; goto trace; } if ((bc->flc_flags & FL_DELEG) && (lc->flc_flags & FL_LEASE)) { rc = false; goto trace; } rc = locks_conflict(bc, lc); trace: trace_leases_conflict(rc, lease, breaker); return rc; } static bool any_leases_conflict(struct inode *inode, struct file_lease *breaker) { struct file_lock_context *ctx = inode->i_flctx; struct file_lock_core *flc; lockdep_assert_held(&ctx->flc_lock); list_for_each_entry(flc, &ctx->flc_lease, flc_list) { if (leases_conflict(flc, &breaker->c)) return true; } return false; } /** * __break_lease - revoke all outstanding leases on file * @inode: the inode of the file to return * @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR: * break all leases * @type: FL_LEASE: break leases and delegations; FL_DELEG: break * only delegations * * break_lease (inlined for speed) has checked there already is at least * some kind of lock (maybe a lease) on this file. Leases are broken on * a call to open() or truncate(). This function can sleep unless you * specified %O_NONBLOCK to your open(). */ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { int error = 0; struct file_lock_context *ctx; struct file_lease *new_fl, *fl, *tmp; unsigned long break_time; int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); new_fl->c.flc_flags = type; /* typically we will check that ctx is non-NULL before calling */ ctx = locks_inode_context(inode); if (!ctx) { WARN_ON_ONCE(1); goto free_lock; } percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); if (!any_leases_conflict(inode, new_fl)) goto out; break_time = 0; if (lease_break_time > 0) { break_time = jiffies + lease_break_time * HZ; if (break_time == 0) break_time++; /* so that 0 means no break time */ } list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) { if (!leases_conflict(&fl->c, &new_fl->c)) continue; if (want_write) { if (fl->c.flc_flags & FL_UNLOCK_PENDING) continue; fl->c.flc_flags |= FL_UNLOCK_PENDING; fl->fl_break_time = break_time; } else { if (lease_breaking(fl)) continue; fl->c.flc_flags |= FL_DOWNGRADE_PENDING; fl->fl_downgrade_time = break_time; } if (fl->fl_lmops->lm_break(fl)) locks_delete_lock_ctx(&fl->c, &dispose); } if (list_empty(&ctx->flc_lease)) goto out; if (mode & O_NONBLOCK) { trace_break_lease_noblock(inode, new_fl); error = -EWOULDBLOCK; goto out; } restart: fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list); break_time = fl->fl_break_time; if (break_time != 0) break_time -= jiffies; if (break_time == 0) break_time++; locks_insert_block(&fl->c, &new_fl->c, leases_conflict); trace_break_lease_block(inode, new_fl); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); error = wait_event_interruptible_timeout(new_fl->c.flc_wait, list_empty(&new_fl->c.flc_blocked_member), break_time); percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); trace_break_lease_unblock(inode, new_fl); __locks_delete_block(&new_fl->c); if (error >= 0) { /* * Wait for the next conflicting lease that has not been * broken yet */ if (error == 0) time_out_leases(inode, &dispose); if (any_leases_conflict(inode, new_fl)) goto restart; error = 0; } out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); free_lock: locks_free_lease(new_fl); return error; } EXPORT_SYMBOL(__break_lease); /** * lease_get_mtime - update modified time of an inode with exclusive lease * @inode: the inode * @time: pointer to a timespec which contains the last modified time * * This is to force NFS clients to flush their caches for files with * exclusive leases. The justification is that if someone has an * exclusive lease, then they could be modifying it. */ void lease_get_mtime(struct inode *inode, struct timespec64 *time) { bool has_lease = false; struct file_lock_context *ctx; struct file_lock_core *flc; ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { spin_lock(&ctx->flc_lock); flc = list_first_entry_or_null(&ctx->flc_lease, struct file_lock_core, flc_list); if (flc && flc->flc_type == F_WRLCK) has_lease = true; spin_unlock(&ctx->flc_lock); } if (has_lease) *time = current_time(inode); } EXPORT_SYMBOL(lease_get_mtime); /** * fcntl_getlease - Enquire what lease is currently active * @filp: the file * * The value returned by this function will be one of * (if no lease break is pending): * * %F_RDLCK to indicate a shared lease is held. * * %F_WRLCK to indicate an exclusive lease is held. * * %F_UNLCK to indicate no lease is held. * * (if a lease break is pending): * * %F_RDLCK to indicate an exclusive lease needs to be * changed to a shared lease (or removed). * * %F_UNLCK to indicate the lease needs to be removed. * * XXX: sfr & willy disagree over whether F_INPROGRESS * should be returned to userspace. */ int fcntl_getlease(struct file *filp) { struct file_lease *fl; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int type = F_UNLCK; LIST_HEAD(dispose); ctx = locks_inode_context(inode); if (ctx && !list_empty_careful(&ctx->flc_lease)) { percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file != filp) continue; type = target_leasetype(fl); break; } spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); } return type; } /** * check_conflicting_open - see if the given file points to an inode that has * an existing open that would conflict with the * desired lease. * @filp: file to check * @arg: type of lease that we're trying to acquire * @flags: current lock flags * * Check to see if there's an existing open fd on this file that would * conflict with the lease we're trying to set. */ static int check_conflicting_open(struct file *filp, const int arg, int flags) { struct inode *inode = file_inode(filp); int self_wcount = 0, self_rcount = 0; if (flags & FL_LAYOUT) return 0; if (flags & FL_DELEG) /* We leave these checks to the caller */ return 0; if (arg == F_RDLCK) return inode_is_open_for_write(inode) ? -EAGAIN : 0; else if (arg != F_WRLCK) return 0; /* * Make sure that only read/write count is from lease requestor. * Note that this will result in denying write leases when i_writecount * is negative, which is what we want. (We shouldn't grant write leases * on files open for execution.) */ if (filp->f_mode & FMODE_WRITE) self_wcount = 1; else if (filp->f_mode & FMODE_READ) self_rcount = 1; if (atomic_read(&inode->i_writecount) != self_wcount || atomic_read(&inode->i_readcount) != self_rcount) return -EAGAIN; return 0; } static int generic_add_lease(struct file *filp, int arg, struct file_lease **flp, void **priv) { struct file_lease *fl, *my_fl = NULL, *lease; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; bool is_deleg = (*flp)->c.flc_flags & FL_DELEG; int error; LIST_HEAD(dispose); lease = *flp; trace_generic_add_lease(inode, lease); /* Note that arg is never F_UNLCK here */ ctx = locks_get_lock_context(inode, arg); if (!ctx) return -ENOMEM; /* * In the delegation case we need mutual exclusion with * a number of operations that take the i_mutex. We trylock * because delegations are an optional optimization, and if * there's some chance of a conflict--we'd rather not * bother, maybe that's a sign this just isn't a good file to * hand out a delegation on. */ if (is_deleg && !inode_trylock(inode)) return -EAGAIN; percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); time_out_leases(inode, &dispose); error = check_conflicting_open(filp, arg, lease->c.flc_flags); if (error) goto out; /* * At this point, we know that if there is an exclusive * lease on this file, then we hold it on this filp * (otherwise our open of this file would have blocked). * And if we are trying to acquire an exclusive lease, * then the file is not open by anyone (including us) * except for this filp. */ error = -EAGAIN; list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file == filp && fl->c.flc_owner == lease->c.flc_owner) { my_fl = fl; continue; } /* * No exclusive leases if someone else has a lease on * this file: */ if (arg == F_WRLCK) goto out; /* * Modifying our existing lease is OK, but no getting a * new lease if someone else is opening for write: */ if (fl->c.flc_flags & FL_UNLOCK_PENDING) goto out; } if (my_fl != NULL) { lease = my_fl; error = lease->fl_lmops->lm_change(lease, arg, &dispose); if (error) goto out; goto out_setup; } error = -EINVAL; if (!leases_enable) goto out; locks_insert_lock_ctx(&lease->c, &ctx->flc_lease); /* * The check in break_lease() is lockless. It's possible for another * open to race in after we did the earlier check for a conflicting * open but before the lease was inserted. Check again for a * conflicting open and cancel the lease if there is one. * * We also add a barrier here to ensure that the insertion of the lock * precedes these checks. */ smp_mb(); error = check_conflicting_open(filp, arg, lease->c.flc_flags); if (error) { locks_unlink_lock_ctx(&lease->c); goto out; } out_setup: if (lease->fl_lmops->lm_setup) lease->fl_lmops->lm_setup(lease, priv); out: spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); if (is_deleg) inode_unlock(inode); if (!error && !my_fl) *flp = NULL; return error; } static int generic_delete_lease(struct file *filp, void *owner) { int error = -EAGAIN; struct file_lease *fl, *victim = NULL; struct inode *inode = file_inode(filp); struct file_lock_context *ctx; LIST_HEAD(dispose); ctx = locks_inode_context(inode); if (!ctx) { trace_generic_delete_lease(inode, NULL); return error; } percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { if (fl->c.flc_file == filp && fl->c.flc_owner == owner) { victim = fl; break; } } trace_generic_delete_lease(inode, victim); if (victim) error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); return error; } /** * generic_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @flp: input - file_lock to use, output - file_lock inserted * @priv: private data for lm_setup (may be NULL if lm_setup * doesn't require it) * * The (input) flp->fl_lmops->lm_break function is required * by break_lease(). */ int generic_setlease(struct file *filp, int arg, struct file_lease **flp, void **priv) { switch (arg) { case F_UNLCK: return generic_delete_lease(filp, *priv); case F_RDLCK: case F_WRLCK: if (!(*flp)->fl_lmops->lm_break) { WARN_ON_ONCE(1); return -ENOLCK; } return generic_add_lease(filp, arg, flp, priv); default: return -EINVAL; } } EXPORT_SYMBOL(generic_setlease); /* * Kernel subsystems can register to be notified on any attempt to set * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd * to close files that it may have cached when there is an attempt to set a * conflicting lease. */ static struct srcu_notifier_head lease_notifier_chain; static inline void lease_notifier_chain_init(void) { srcu_init_notifier_head(&lease_notifier_chain); } static inline void setlease_notifier(int arg, struct file_lease *lease) { if (arg != F_UNLCK) srcu_notifier_call_chain(&lease_notifier_chain, arg, lease); } int lease_register_notifier(struct notifier_block *nb) { return srcu_notifier_chain_register(&lease_notifier_chain, nb); } EXPORT_SYMBOL_GPL(lease_register_notifier); void lease_unregister_notifier(struct notifier_block *nb) { srcu_notifier_chain_unregister(&lease_notifier_chain, nb); } EXPORT_SYMBOL_GPL(lease_unregister_notifier); int kernel_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) { if (lease) setlease_notifier(arg, *lease); if (filp->f_op->setlease) return filp->f_op->setlease(filp, arg, lease, priv); else return generic_setlease(filp, arg, lease, priv); } EXPORT_SYMBOL_GPL(kernel_setlease); /** * vfs_setlease - sets a lease on an open file * @filp: file pointer * @arg: type of lease to obtain * @lease: file_lock to use when adding a lease * @priv: private info for lm_setup when adding a lease (may be * NULL if lm_setup doesn't require it) * * Call this to establish a lease on the file. The "lease" argument is not * used for F_UNLCK requests and may be NULL. For commands that set or alter * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be * set; if not, this function will return -ENOLCK (and generate a scary-looking * stack trace). * * The "priv" pointer is passed directly to the lm_setup function as-is. It * may be NULL if the lm_setup operation doesn't require it. */ int vfs_setlease(struct file *filp, int arg, struct file_lease **lease, void **priv) { struct inode *inode = file_inode(filp); vfsuid_t vfsuid = i_uid_into_vfsuid(file_mnt_idmap(filp), inode); int error; if ((!vfsuid_eq_kuid(vfsuid, current_fsuid())) && !capable(CAP_LEASE)) return -EACCES; if (!S_ISREG(inode->i_mode)) return -EINVAL; error = security_file_lock(filp, arg); if (error) return error; return kernel_setlease(filp, arg, lease, priv); } EXPORT_SYMBOL_GPL(vfs_setlease); static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg) { struct file_lease *fl; struct fasync_struct *new; int error; fl = lease_alloc(filp, arg); if (IS_ERR(fl)) return PTR_ERR(fl); new = fasync_alloc(); if (!new) { locks_free_lease(fl); return -ENOMEM; } new->fa_fd = fd; error = vfs_setlease(filp, arg, &fl, (void **)&new); if (fl) locks_free_lease(fl); if (new) fasync_free(new); return error; } /** * fcntl_setlease - sets a lease on an open file * @fd: open file descriptor * @filp: file pointer * @arg: type of lease to obtain * * Call this fcntl to establish a lease on the file. * Note that you also need to call %F_SETSIG to * receive a signal when the lease is broken. */ int fcntl_setlease(unsigned int fd, struct file *filp, int arg) { if (arg == F_UNLCK) return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp); return do_fcntl_add_lease(fd, filp, arg); } /** * flock_lock_inode_wait - Apply a FLOCK-style lock to a file * @inode: inode of the file to apply to * @fl: The lock to be applied * * Apply a FLOCK style lock request to an inode. */ static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int error; might_sleep(); for (;;) { error = flock_lock_inode(inode, fl); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->c.flc_wait, list_empty(&fl->c.flc_blocked_member)); if (error) break; } locks_delete_block(fl); return error; } /** * locks_lock_inode_wait - Apply a lock to an inode * @inode: inode of the file to apply to * @fl: The lock to be applied * * Apply a POSIX or FLOCK style lock request to an inode. */ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { int res = 0; switch (fl->c.flc_flags & (FL_POSIX|FL_FLOCK)) { case FL_POSIX: res = posix_lock_inode_wait(inode, fl); break; case FL_FLOCK: res = flock_lock_inode_wait(inode, fl); break; default: BUG(); } return res; } EXPORT_SYMBOL(locks_lock_inode_wait); /** * sys_flock: - flock() system call. * @fd: the file descriptor to lock. * @cmd: the type of lock to apply. * * Apply a %FL_FLOCK style lock to an open file descriptor. * The @cmd can be one of: * * - %LOCK_SH -- a shared lock. * - %LOCK_EX -- an exclusive lock. * - %LOCK_UN -- remove an existing lock. * - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED) * * %LOCK_MAND support has been removed from the kernel. */ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { int can_sleep, error, type; struct file_lock fl; struct fd f; /* * LOCK_MAND locks were broken for a long time in that they never * conflicted with one another and didn't prevent any sort of open, * read or write activity. * * Just ignore these requests now, to preserve legacy behavior, but * throw a warning to let people know that they don't actually work. */ if (cmd & LOCK_MAND) { pr_warn_once("%s(%d): Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n", current->comm, current->pid); return 0; } type = flock_translate_cmd(cmd & ~LOCK_NB); if (type < 0) return type; error = -EBADF; f = fdget(fd); if (!f.file) return error; if (type != F_UNLCK && !(f.file->f_mode & (FMODE_READ | FMODE_WRITE))) goto out_putf; flock_make_lock(f.file, &fl, type); error = security_file_lock(f.file, fl.c.flc_type); if (error) goto out_putf; can_sleep = !(cmd & LOCK_NB); if (can_sleep) fl.c.flc_flags |= FL_SLEEP; if (f.file->f_op->flock) error = f.file->f_op->flock(f.file, (can_sleep) ? F_SETLKW : F_SETLK, &fl); else error = locks_lock_file_wait(f.file, &fl); locks_release_private(&fl); out_putf: fdput(f); return error; } /** * vfs_test_lock - test file byte range lock * @filp: The file to test lock for * @fl: The lock to test; also used to hold result * * Returns -ERRNO on failure. Indicates presence of conflicting lock by * setting conf->fl_type to something other than F_UNLCK. */ int vfs_test_lock(struct file *filp, struct file_lock *fl) { WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_GETLK, fl); posix_test_lock(filp, fl); return 0; } EXPORT_SYMBOL_GPL(vfs_test_lock); /** * locks_translate_pid - translate a file_lock's fl_pid number into a namespace * @fl: The file_lock who's fl_pid should be translated * @ns: The namespace into which the pid should be translated * * Used to translate a fl_pid into a namespace virtual pid number */ static pid_t locks_translate_pid(struct file_lock_core *fl, struct pid_namespace *ns) { pid_t vnr; struct pid *pid; if (fl->flc_flags & FL_OFDLCK) return -1; /* Remote locks report a negative pid value */ if (fl->flc_pid <= 0) return fl->flc_pid; /* * If the flock owner process is dead and its pid has been already * freed, the translation below won't work, but we still want to show * flock owner pid number in init pidns. */ if (ns == &init_pid_ns) return (pid_t) fl->flc_pid; rcu_read_lock(); pid = find_pid_ns(fl->flc_pid, &init_pid_ns); vnr = pid_nr_ns(pid, ns); rcu_read_unlock(); return vnr; } static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current)); #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via * legacy 32bit flock. */ if (fl->fl_start > OFFT_OFFSET_MAX) return -EOVERFLOW; if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX) return -EOVERFLOW; #endif flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; flock->l_whence = 0; flock->l_type = fl->c.flc_type; return 0; } #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { flock->l_pid = locks_translate_pid(&fl->c, task_active_pid_ns(current)); flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; flock->l_whence = 0; flock->l_type = fl->c.flc_type; } #endif /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock) { struct file_lock *fl; int error; fl = locks_alloc_lock(); if (fl == NULL) return -ENOMEM; error = -EINVAL; if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK && flock->l_type != F_WRLCK) goto out; error = flock_to_posix_lock(filp, fl, flock); if (error) goto out; if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock->l_pid != 0) goto out; fl->c.flc_flags |= FL_OFDLCK; fl->c.flc_owner = filp; } error = vfs_test_lock(filp, fl); if (error) goto out; flock->l_type = fl->c.flc_type; if (fl->c.flc_type != F_UNLCK) { error = posix_lock_to_flock(flock, fl); if (error) goto out; } out: locks_free_lock(fl); return error; } /** * vfs_lock_file - file byte range lock * @filp: The file to apply the lock to * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.) * @fl: The lock to be applied * @conf: Place to return a copy of the conflicting lock, if found. * * A caller that doesn't care about the conflicting lock may pass NULL * as the final argument. * * If the filesystem defines a private ->lock() method, then @conf will * be left unchanged; so a caller that cares should initialize it to * some acceptable default. * * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX * locks, the ->lock() interface may return asynchronously, before the lock has * been granted or denied by the underlying filesystem, if (and only if) * lm_grant is set. Additionally EXPORT_OP_ASYNC_LOCK in export_operations * flags need to be set. * * Callers expecting ->lock() to return asynchronously will only use F_SETLK, * not F_SETLKW; they will set FL_SLEEP if (and only if) the request is for a * blocking lock. When ->lock() does return asynchronously, it must return * FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock request completes. * If the request is for non-blocking lock the file system should return * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine * with the result. If the request timed out the callback routine will return a * nonzero return code and the file system should release the lock. The file * system is also responsible to keep a corresponding posix lock when it * grants a lock so the VFS can find out which locks are locally held and do * the correct lock cleanup when required. * The underlying filesystem must not drop the kernel lock or call * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED * return code. */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) { WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, cmd, fl); else return posix_lock_file(filp, fl, conf); } EXPORT_SYMBOL_GPL(vfs_lock_file); static int do_lock_file_wait(struct file *filp, unsigned int cmd, struct file_lock *fl) { int error; error = security_file_lock(filp, fl->c.flc_type); if (error) return error; for (;;) { error = vfs_lock_file(filp, cmd, fl, NULL); if (error != FILE_LOCK_DEFERRED) break; error = wait_event_interruptible(fl->c.flc_wait, list_empty(&fl->c.flc_blocked_member)); if (error) break; } locks_delete_block(fl); return error; } /* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */ static int check_fmode_for_setlk(struct file_lock *fl) { switch (fl->c.flc_type) { case F_RDLCK: if (!(fl->c.flc_file->f_mode & FMODE_READ)) return -EBADF; break; case F_WRLCK: if (!(fl->c.flc_file->f_mode & FMODE_WRITE)) return -EBADF; } return 0; } /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd, struct flock *flock) { struct file_lock *file_lock = locks_alloc_lock(); struct inode *inode = file_inode(filp); struct file *f; int error; if (file_lock == NULL) return -ENOLCK; error = flock_to_posix_lock(filp, file_lock, flock); if (error) goto out; error = check_fmode_for_setlk(file_lock); if (error) goto out; /* * If the cmd is requesting file-private locks, then set the * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLK; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; break; case F_OFD_SETLKW: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLKW; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; fallthrough; case F_SETLKW: file_lock->c.flc_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); /* * Attempt to detect a close/fcntl race and recover by releasing the * lock that was just acquired. There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && !(file_lock->c.flc_flags & FL_OFDLCK)) { struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ spin_lock(&files->file_lock); f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { file_lock->c.flc_type = F_UNLCK; error = do_lock_file_wait(filp, cmd, file_lock); WARN_ON_ONCE(error); error = -EBADF; } } out: trace_fcntl_setlk(inode, file_lock, error); locks_free_lock(file_lock); return error; } #if BITS_PER_LONG == 32 /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock) { struct file_lock *fl; int error; fl = locks_alloc_lock(); if (fl == NULL) return -ENOMEM; error = -EINVAL; if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK && flock->l_type != F_WRLCK) goto out; error = flock64_to_posix_lock(filp, fl, flock); if (error) goto out; if (cmd == F_OFD_GETLK) { error = -EINVAL; if (flock->l_pid != 0) goto out; fl->c.flc_flags |= FL_OFDLCK; fl->c.flc_owner = filp; } error = vfs_test_lock(filp, fl); if (error) goto out; flock->l_type = fl->c.flc_type; if (fl->c.flc_type != F_UNLCK) posix_lock_to_flock64(flock, fl); out: locks_free_lock(fl); return error; } /* Apply the lock described by l to an open file descriptor. * This implements both the F_SETLK and F_SETLKW commands of fcntl(). */ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd, struct flock64 *flock) { struct file_lock *file_lock = locks_alloc_lock(); struct file *f; int error; if (file_lock == NULL) return -ENOLCK; error = flock64_to_posix_lock(filp, file_lock, flock); if (error) goto out; error = check_fmode_for_setlk(file_lock); if (error) goto out; /* * If the cmd is requesting file-private locks, then set the * FL_OFDLCK flag and override the owner. */ switch (cmd) { case F_OFD_SETLK: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLK64; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; break; case F_OFD_SETLKW: error = -EINVAL; if (flock->l_pid != 0) goto out; cmd = F_SETLKW64; file_lock->c.flc_flags |= FL_OFDLCK; file_lock->c.flc_owner = filp; fallthrough; case F_SETLKW64: file_lock->c.flc_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); /* * Attempt to detect a close/fcntl race and recover by releasing the * lock that was just acquired. There is no need to do that when we're * unlocking though, or for OFD locks. */ if (!error && file_lock->c.flc_type != F_UNLCK && !(file_lock->c.flc_flags & FL_OFDLCK)) { struct files_struct *files = current->files; /* * We need that spin_lock here - it prevents reordering between * update of i_flctx->flc_posix and check for it done in * close(). rcu_read_lock() wouldn't do. */ spin_lock(&files->file_lock); f = files_lookup_fd_locked(files, fd); spin_unlock(&files->file_lock); if (f != filp) { file_lock->c.flc_type = F_UNLCK; error = do_lock_file_wait(filp, cmd, file_lock); WARN_ON_ONCE(error); error = -EBADF; } } out: locks_free_lock(file_lock); return error; } #endif /* BITS_PER_LONG == 32 */ /* * This function is called when the file is being removed * from the task's fd array. POSIX locks belonging to this task * are deleted at this time. */ void locks_remove_posix(struct file *filp, fl_owner_t owner) { int error; struct inode *inode = file_inode(filp); struct file_lock lock; struct file_lock_context *ctx; /* * If there are no locks held on this file, we don't need to call * posix_lock_file(). Another process could be setting a lock on this * file at the same time, but we wouldn't remove that lock anyway. */ ctx = locks_inode_context(inode); if (!ctx || list_empty(&ctx->flc_posix)) return; locks_init_lock(&lock); lock.c.flc_type = F_UNLCK; lock.c.flc_flags = FL_POSIX | FL_CLOSE; lock.fl_start = 0; lock.fl_end = OFFSET_MAX; lock.c.flc_owner = owner; lock.c.flc_pid = current->tgid; lock.c.flc_file = filp; lock.fl_ops = NULL; lock.fl_lmops = NULL; error = vfs_lock_file(filp, F_SETLK, &lock, NULL); if (lock.fl_ops && lock.fl_ops->fl_release_private) lock.fl_ops->fl_release_private(&lock); trace_locks_remove_posix(inode, &lock, error); } EXPORT_SYMBOL(locks_remove_posix); /* The i_flctx must be valid when calling into here */ static void locks_remove_flock(struct file *filp, struct file_lock_context *flctx) { struct file_lock fl; struct inode *inode = file_inode(filp); if (list_empty(&flctx->flc_flock)) return; flock_make_lock(filp, &fl, F_UNLCK); fl.c.flc_flags |= FL_CLOSE; if (filp->f_op->flock) filp->f_op->flock(filp, F_SETLKW, &fl); else flock_lock_inode(inode, &fl); if (fl.fl_ops && fl.fl_ops->fl_release_private) fl.fl_ops->fl_release_private(&fl); } /* The i_flctx must be valid when calling into here */ static void locks_remove_lease(struct file *filp, struct file_lock_context *ctx) { struct file_lease *fl, *tmp; LIST_HEAD(dispose); if (list_empty(&ctx->flc_lease)) return; percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) if (filp == fl->c.flc_file) lease_modify(fl, F_UNLCK, &dispose); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); locks_dispose_list(&dispose); } /* * This function is called on the last close of an open file. */ void locks_remove_file(struct file *filp) { struct file_lock_context *ctx; ctx = locks_inode_context(file_inode(filp)); if (!ctx) return; /* remove any OFD locks */ locks_remove_posix(filp, filp); /* remove flock locks */ locks_remove_flock(filp, ctx); /* remove any leases */ locks_remove_lease(filp, ctx); spin_lock(&ctx->flc_lock); locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX"); locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK"); locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE"); spin_unlock(&ctx->flc_lock); } /** * vfs_cancel_lock - file byte range unblock lock * @filp: The file to apply the unblock to * @fl: The lock to be unblocked * * Used by lock managers to cancel blocked requests */ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) { WARN_ON_ONCE(filp != fl->c.flc_file); if (filp->f_op->lock) return filp->f_op->lock(filp, F_CANCELLK, fl); return 0; } EXPORT_SYMBOL_GPL(vfs_cancel_lock); /** * vfs_inode_has_locks - are any file locks held on @inode? * @inode: inode to check for locks * * Return true if there are any FL_POSIX or FL_FLOCK locks currently * set on @inode. */ bool vfs_inode_has_locks(struct inode *inode) { struct file_lock_context *ctx; bool ret; ctx = locks_inode_context(inode); if (!ctx) return false; spin_lock(&ctx->flc_lock); ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); spin_unlock(&ctx->flc_lock); return ret; } EXPORT_SYMBOL_GPL(vfs_inode_has_locks); #ifdef CONFIG_PROC_FS #include <linux/proc_fs.h> #include <linux/seq_file.h> struct locks_iterator { int li_cpu; loff_t li_pos; }; static void lock_get_status(struct seq_file *f, struct file_lock_core *flc, loff_t id, char *pfx, int repeat) { struct inode *inode = NULL; unsigned int pid; struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb); int type = flc->flc_type; struct file_lock *fl = file_lock(flc); pid = locks_translate_pid(flc, proc_pidns); /* * If lock owner is dead (and pid is freed) or not visible in current * pidns, zero is shown as a pid value. Check lock info from * init_pid_ns to get saved lock pid value. */ if (flc->flc_file != NULL) inode = file_inode(flc->flc_file); seq_printf(f, "%lld: ", id); if (repeat) seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx); if (flc->flc_flags & FL_POSIX) { if (flc->flc_flags & FL_ACCESS) seq_puts(f, "ACCESS"); else if (flc->flc_flags & FL_OFDLCK) seq_puts(f, "OFDLCK"); else seq_puts(f, "POSIX "); seq_printf(f, " %s ", (inode == NULL) ? "*NOINODE*" : "ADVISORY "); } else if (flc->flc_flags & FL_FLOCK) { seq_puts(f, "FLOCK ADVISORY "); } else if (flc->flc_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT)) { struct file_lease *lease = file_lease(flc); type = target_leasetype(lease); if (flc->flc_flags & FL_DELEG) seq_puts(f, "DELEG "); else seq_puts(f, "LEASE "); if (lease_breaking(lease)) seq_puts(f, "BREAKING "); else if (flc->flc_file) seq_puts(f, "ACTIVE "); else seq_puts(f, "BREAKER "); } else { seq_puts(f, "UNKNOWN UNKNOWN "); } seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" : (type == F_RDLCK) ? "READ" : "UNLCK"); if (inode) { /* userspace relies on this representation of dev_t */ seq_printf(f, "%d %02x:%02x:%lu ", pid, MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev), inode->i_ino); } else { seq_printf(f, "%d <none>:0 ", pid); } if (flc->flc_flags & FL_POSIX) { if (fl->fl_end == OFFSET_MAX) seq_printf(f, "%Ld EOF\n", fl->fl_start); else seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end); } else { seq_puts(f, "0 EOF\n"); } } static struct file_lock_core *get_next_blocked_member(struct file_lock_core *node) { struct file_lock_core *tmp; /* NULL node or root node */ if (node == NULL || node->flc_blocker == NULL) return NULL; /* Next member in the linked list could be itself */ tmp = list_next_entry(node, flc_blocked_member); if (list_entry_is_head(tmp, &node->flc_blocker->flc_blocked_requests, flc_blocked_member) || tmp == node) { return NULL; } return tmp; } static int locks_show(struct seq_file *f, void *v) { struct locks_iterator *iter = f->private; struct file_lock_core *cur, *tmp; struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb); int level = 0; cur = hlist_entry(v, struct file_lock_core, flc_link); if (locks_translate_pid(cur, proc_pidns) == 0) return 0; /* View this crossed linked list as a binary tree, the first member of flc_blocked_requests * is the left child of current node, the next silibing in flc_blocked_member is the * right child, we can alse get the parent of current node from flc_blocker, so this * question becomes traversal of a binary tree */ while (cur != NULL) { if (level) lock_get_status(f, cur, iter->li_pos, "-> ", level); else lock_get_status(f, cur, iter->li_pos, "", level); if (!list_empty(&cur->flc_blocked_requests)) { /* Turn left */ cur = list_first_entry_or_null(&cur->flc_blocked_requests, struct file_lock_core, flc_blocked_member); level++; } else { /* Turn right */ tmp = get_next_blocked_member(cur); /* Fall back to parent node */ while (tmp == NULL && cur->flc_blocker != NULL) { cur = cur->flc_blocker; level--; tmp = get_next_blocked_member(cur); } cur = tmp; } } return 0; } static void __show_fd_locks(struct seq_file *f, struct list_head *head, int *id, struct file *filp, struct files_struct *files) { struct file_lock_core *fl; list_for_each_entry(fl, head, flc_list) { if (filp != fl->flc_file) continue; if (fl->flc_owner != files && fl->flc_owner != filp) continue; (*id)++; seq_puts(f, "lock:\t"); lock_get_status(f, fl, *id, "", 0); } } void show_fd_locks(struct seq_file *f, struct file *filp, struct files_struct *files) { struct inode *inode = file_inode(filp); struct file_lock_context *ctx; int id = 0; ctx = locks_inode_context(inode); if (!ctx) return; spin_lock(&ctx->flc_lock); __show_fd_locks(f, &ctx->flc_flock, &id, filp, files); __show_fd_locks(f, &ctx->flc_posix, &id, filp, files); __show_fd_locks(f, &ctx->flc_lease, &id, filp, files); spin_unlock(&ctx->flc_lock); } static void *locks_start(struct seq_file *f, loff_t *pos) __acquires(&blocked_lock_lock) { struct locks_iterator *iter = f->private; iter->li_pos = *pos + 1; percpu_down_write(&file_rwsem); spin_lock(&blocked_lock_lock); return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos); } static void *locks_next(struct seq_file *f, void *v, loff_t *pos) { struct locks_iterator *iter = f->private; ++iter->li_pos; return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos); } static void locks_stop(struct seq_file *f, void *v) __releases(&blocked_lock_lock) { spin_unlock(&blocked_lock_lock); percpu_up_write(&file_rwsem); } static const struct seq_operations locks_seq_operations = { .start = locks_start, .next = locks_next, .stop = locks_stop, .show = locks_show, }; static int __init proc_locks_init(void) { proc_create_seq_private("locks", 0, NULL, &locks_seq_operations, sizeof(struct locks_iterator), NULL); return 0; } fs_initcall(proc_locks_init); #endif static int __init filelock_init(void) { int i; flctx_cache = kmem_cache_create("file_lock_ctx", sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL); filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); filelease_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lease), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) { struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i); spin_lock_init(&fll->lock); INIT_HLIST_HEAD(&fll->hlist); } lease_notifier_chain_init(); return 0; } core_initcall(filelock_init);
215 223 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2005-2010 IBM Corporation * * Authors: * Mimi Zohar <zohar@us.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * * File: evm.h */ #ifndef __INTEGRITY_EVM_H #define __INTEGRITY_EVM_H #include <linux/xattr.h> #include <linux/security.h> #include "../integrity.h" #define EVM_INIT_HMAC 0x0001 #define EVM_INIT_X509 0x0002 #define EVM_ALLOW_METADATA_WRITES 0x0004 #define EVM_SETUP_COMPLETE 0x80000000 /* userland has signaled key load */ #define EVM_KEY_MASK (EVM_INIT_HMAC | EVM_INIT_X509) #define EVM_INIT_MASK (EVM_INIT_HMAC | EVM_INIT_X509 | EVM_SETUP_COMPLETE | \ EVM_ALLOW_METADATA_WRITES) struct xattr_list { struct list_head list; char *name; bool enabled; }; #define EVM_NEW_FILE 0x00000001 #define EVM_IMMUTABLE_DIGSIG 0x00000002 /* EVM integrity metadata associated with an inode */ struct evm_iint_cache { unsigned long flags; enum integrity_status evm_status:4; struct integrity_inode_attributes metadata_inode; }; extern struct lsm_blob_sizes evm_blob_sizes; static inline struct evm_iint_cache *evm_iint_inode(const struct inode *inode) { if (unlikely(!inode->i_security)) return NULL; return inode->i_security + evm_blob_sizes.lbs_inode; } extern int evm_initialized; #define EVM_ATTR_FSUUID 0x0001 extern int evm_hmac_attrs; /* List of EVM protected security xattrs */ extern struct list_head evm_config_xattrnames; struct evm_digest { struct ima_digest_data_hdr hdr; char digest[IMA_MAX_DIGEST_SIZE]; } __packed; int evm_protected_xattr(const char *req_xattr_name); int evm_init_key(void); int evm_update_evmxattr(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, size_t req_xattr_value_len); int evm_calc_hmac(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, size_t req_xattr_value_len, struct evm_digest *data, struct evm_iint_cache *iint); int evm_calc_hash(struct dentry *dentry, const char *req_xattr_name, const char *req_xattr_value, size_t req_xattr_value_len, char type, struct evm_digest *data, struct evm_iint_cache *iint); int evm_init_hmac(struct inode *inode, const struct xattr *xattrs, char *hmac_val); int evm_init_secfs(void); #endif
59 59 1 1 59 59 59 59 59 59 59 59 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Implementation of the access vector table type. * * Author : Stephen Smalley, <stephen.smalley.work@gmail.com> */ /* Updated: Frank Mayer <mayerf@tresys.com> and * Karl MacMillan <kmacmillan@tresys.com> * Added conditional policy language extensions * Copyright (C) 2003 Tresys Technology, LLC * * Updated: Yuichi Nakamura <ynakam@hitachisoft.jp> * Tuned number of hash slots for avtab to reduce memory usage */ #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/errno.h> #include "avtab.h" #include "policydb.h" static struct kmem_cache *avtab_node_cachep __ro_after_init; static struct kmem_cache *avtab_xperms_cachep __ro_after_init; /* Based on MurmurHash3, written by Austin Appleby and placed in the * public domain. */ static inline u32 avtab_hash(const struct avtab_key *keyp, u32 mask) { static const u32 c1 = 0xcc9e2d51; static const u32 c2 = 0x1b873593; static const u32 r1 = 15; static const u32 r2 = 13; static const u32 m = 5; static const u32 n = 0xe6546b64; u32 hash = 0; #define mix(input) \ do { \ u32 v = input; \ v *= c1; \ v = (v << r1) | (v >> (32 - r1)); \ v *= c2; \ hash ^= v; \ hash = (hash << r2) | (hash >> (32 - r2)); \ hash = hash * m + n; \ } while (0) mix(keyp->target_class); mix(keyp->target_type); mix(keyp->source_type); #undef mix hash ^= hash >> 16; hash *= 0x85ebca6b; hash ^= hash >> 13; hash *= 0xc2b2ae35; hash ^= hash >> 16; return hash & mask; } static struct avtab_node *avtab_insert_node(struct avtab *h, struct avtab_node **dst, const struct avtab_key *key, const struct avtab_datum *datum) { struct avtab_node *newnode; struct avtab_extended_perms *xperms; newnode = kmem_cache_zalloc(avtab_node_cachep, GFP_KERNEL); if (newnode == NULL) return NULL; newnode->key = *key; if (key->specified & AVTAB_XPERMS) { xperms = kmem_cache_zalloc(avtab_xperms_cachep, GFP_KERNEL); if (xperms == NULL) { kmem_cache_free(avtab_node_cachep, newnode); return NULL; } *xperms = *(datum->u.xperms); newnode->datum.u.xperms = xperms; } else { newnode->datum.u.data = datum->u.data; } newnode->next = *dst; *dst = newnode; h->nel++; return newnode; } static int avtab_node_cmp(const struct avtab_key *key1, const struct avtab_key *key2) { u16 specified = key1->specified & ~(AVTAB_ENABLED | AVTAB_ENABLED_OLD); if (key1->source_type == key2->source_type && key1->target_type == key2->target_type && key1->target_class == key2->target_class && (specified & key2->specified)) return 0; if (key1->source_type < key2->source_type) return -1; if (key1->source_type == key2->source_type && key1->target_type < key2->target_type) return -1; if (key1->source_type == key2->source_type && key1->target_type == key2->target_type && key1->target_class < key2->target_class) return -1; return 1; } static int avtab_insert(struct avtab *h, const struct avtab_key *key, const struct avtab_datum *datum) { u32 hvalue; struct avtab_node *prev, *cur, *newnode; int cmp; if (!h || !h->nslot || h->nel == U32_MAX) return -EINVAL; hvalue = avtab_hash(key, h->mask); for (prev = NULL, cur = h->htable[hvalue]; cur; prev = cur, cur = cur->next) { cmp = avtab_node_cmp(key, &cur->key); /* extended perms may not be unique */ if (cmp == 0 && !(key->specified & AVTAB_XPERMS)) return -EEXIST; if (cmp <= 0) break; } newnode = avtab_insert_node(h, prev ? &prev->next : &h->htable[hvalue], key, datum); if (!newnode) return -ENOMEM; return 0; } /* Unlike avtab_insert(), this function allow multiple insertions of the same * key/specified mask into the table, as needed by the conditional avtab. * It also returns a pointer to the node inserted. */ struct avtab_node *avtab_insert_nonunique(struct avtab *h, const struct avtab_key *key, const struct avtab_datum *datum) { u32 hvalue; struct avtab_node *prev, *cur; int cmp; if (!h || !h->nslot || h->nel == U32_MAX) return NULL; hvalue = avtab_hash(key, h->mask); for (prev = NULL, cur = h->htable[hvalue]; cur; prev = cur, cur = cur->next) { cmp = avtab_node_cmp(key, &cur->key); if (cmp <= 0) break; } return avtab_insert_node(h, prev ? &prev->next : &h->htable[hvalue], key, datum); } /* This search function returns a node pointer, and can be used in * conjunction with avtab_search_next_node() */ struct avtab_node *avtab_search_node(struct avtab *h, const struct avtab_key *key) { u32 hvalue; struct avtab_node *cur; int cmp; if (!h || !h->nslot) return NULL; hvalue = avtab_hash(key, h->mask); for (cur = h->htable[hvalue]; cur; cur = cur->next) { cmp = avtab_node_cmp(key, &cur->key); if (cmp == 0) return cur; if (cmp < 0) break; } return NULL; } struct avtab_node *avtab_search_node_next(struct avtab_node *node, u16 specified) { struct avtab_key tmp_key; struct avtab_node *cur; int cmp; if (!node) return NULL; tmp_key = node->key; tmp_key.specified = specified; for (cur = node->next; cur; cur = cur->next) { cmp = avtab_node_cmp(&tmp_key, &cur->key); if (cmp == 0) return cur; if (cmp < 0) break; } return NULL; } void avtab_destroy(struct avtab *h) { u32 i; struct avtab_node *cur, *temp; if (!h) return; for (i = 0; i < h->nslot; i++) { cur = h->htable[i]; while (cur) { temp = cur; cur = cur->next; if (temp->key.specified & AVTAB_XPERMS) kmem_cache_free(avtab_xperms_cachep, temp->datum.u.xperms); kmem_cache_free(avtab_node_cachep, temp); } } kvfree(h->htable); h->htable = NULL; h->nel = 0; h->nslot = 0; h->mask = 0; } void avtab_init(struct avtab *h) { h->htable = NULL; h->nel = 0; h->nslot = 0; h->mask = 0; } static int avtab_alloc_common(struct avtab *h, u32 nslot) { if (!nslot) return 0; h->htable = kvcalloc(nslot, sizeof(void *), GFP_KERNEL); if (!h->htable) return -ENOMEM; h->nslot = nslot; h->mask = nslot - 1; return 0; } int avtab_alloc(struct avtab *h, u32 nrules) { int rc; u32 nslot = 0; if (nrules != 0) { nslot = nrules > 3 ? rounddown_pow_of_two(nrules / 2) : 2; if (nslot > MAX_AVTAB_HASH_BUCKETS) nslot = MAX_AVTAB_HASH_BUCKETS; rc = avtab_alloc_common(h, nslot); if (rc) return rc; } pr_debug("SELinux: %d avtab hash slots, %d rules.\n", nslot, nrules); return 0; } int avtab_alloc_dup(struct avtab *new, const struct avtab *orig) { return avtab_alloc_common(new, orig->nslot); } #ifdef CONFIG_SECURITY_SELINUX_DEBUG void avtab_hash_eval(struct avtab *h, const char *tag) { u32 i, chain_len, slots_used, max_chain_len; unsigned long long chain2_len_sum; struct avtab_node *cur; slots_used = 0; max_chain_len = 0; chain2_len_sum = 0; for (i = 0; i < h->nslot; i++) { cur = h->htable[i]; if (cur) { slots_used++; chain_len = 0; while (cur) { chain_len++; cur = cur->next; } if (chain_len > max_chain_len) max_chain_len = chain_len; chain2_len_sum += (unsigned long long)chain_len * chain_len; } } pr_debug("SELinux: %s: %d entries and %d/%d buckets used, " "longest chain length %d, sum of chain length^2 %llu\n", tag, h->nel, slots_used, h->nslot, max_chain_len, chain2_len_sum); } #endif /* CONFIG_SECURITY_SELINUX_DEBUG */ /* clang-format off */ static const uint16_t spec_order[] = { AVTAB_ALLOWED, AVTAB_AUDITDENY, AVTAB_AUDITALLOW, AVTAB_TRANSITION, AVTAB_CHANGE, AVTAB_MEMBER, AVTAB_XPERMS_ALLOWED, AVTAB_XPERMS_AUDITALLOW, AVTAB_XPERMS_DONTAUDIT }; /* clang-format on */ int avtab_read_item(struct avtab *a, void *fp, struct policydb *pol, int (*insertf)(struct avtab *a, const struct avtab_key *k, const struct avtab_datum *d, void *p), void *p) { __le16 buf16[4]; u16 enabled; u32 items, items2, val, i; struct avtab_key key; struct avtab_datum datum; struct avtab_extended_perms xperms; __le32 buf32[ARRAY_SIZE(xperms.perms.p)]; int rc; unsigned int set, vers = pol->policyvers; memset(&key, 0, sizeof(struct avtab_key)); memset(&datum, 0, sizeof(struct avtab_datum)); if (vers < POLICYDB_VERSION_AVTAB) { rc = next_entry(buf32, fp, sizeof(u32)); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } items2 = le32_to_cpu(buf32[0]); if (items2 > ARRAY_SIZE(buf32)) { pr_err("SELinux: avtab: entry overflow\n"); return -EINVAL; } rc = next_entry(buf32, fp, sizeof(u32) * items2); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } items = 0; val = le32_to_cpu(buf32[items++]); key.source_type = (u16)val; if (key.source_type != val) { pr_err("SELinux: avtab: truncated source type\n"); return -EINVAL; } val = le32_to_cpu(buf32[items++]); key.target_type = (u16)val; if (key.target_type != val) { pr_err("SELinux: avtab: truncated target type\n"); return -EINVAL; } val = le32_to_cpu(buf32[items++]); key.target_class = (u16)val; if (key.target_class != val) { pr_err("SELinux: avtab: truncated target class\n"); return -EINVAL; } val = le32_to_cpu(buf32[items++]); enabled = (val & AVTAB_ENABLED_OLD) ? AVTAB_ENABLED : 0; if (!(val & (AVTAB_AV | AVTAB_TYPE))) { pr_err("SELinux: avtab: null entry\n"); return -EINVAL; } if ((val & AVTAB_AV) && (val & AVTAB_TYPE)) { pr_err("SELinux: avtab: entry has both access vectors and types\n"); return -EINVAL; } if (val & AVTAB_XPERMS) { pr_err("SELinux: avtab: entry has extended permissions\n"); return -EINVAL; } for (i = 0; i < ARRAY_SIZE(spec_order); i++) { if (val & spec_order[i]) { key.specified = spec_order[i] | enabled; datum.u.data = le32_to_cpu(buf32[items++]); rc = insertf(a, &key, &datum, p); if (rc) return rc; } } if (items != items2) { pr_err("SELinux: avtab: entry only had %d items, expected %d\n", items2, items); return -EINVAL; } return 0; } rc = next_entry(buf16, fp, sizeof(u16) * 4); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } items = 0; key.source_type = le16_to_cpu(buf16[items++]); key.target_type = le16_to_cpu(buf16[items++]); key.target_class = le16_to_cpu(buf16[items++]); key.specified = le16_to_cpu(buf16[items++]); if (!policydb_type_isvalid(pol, key.source_type) || !policydb_type_isvalid(pol, key.target_type) || !policydb_class_isvalid(pol, key.target_class)) { pr_err("SELinux: avtab: invalid type or class\n"); return -EINVAL; } set = hweight16(key.specified & (AVTAB_XPERMS | AVTAB_TYPE | AVTAB_AV)); if (!set || set > 1) { pr_err("SELinux: avtab: more than one specifier\n"); return -EINVAL; } if ((vers < POLICYDB_VERSION_XPERMS_IOCTL) && (key.specified & AVTAB_XPERMS)) { pr_err("SELinux: avtab: policy version %u does not " "support extended permissions rules and one " "was specified\n", vers); return -EINVAL; } else if (key.specified & AVTAB_XPERMS) { memset(&xperms, 0, sizeof(struct avtab_extended_perms)); rc = next_entry(&xperms.specified, fp, sizeof(u8)); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } rc = next_entry(&xperms.driver, fp, sizeof(u8)); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } rc = next_entry(buf32, fp, sizeof(u32) * ARRAY_SIZE(xperms.perms.p)); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } for (i = 0; i < ARRAY_SIZE(xperms.perms.p); i++) xperms.perms.p[i] = le32_to_cpu(buf32[i]); datum.u.xperms = &xperms; } else { rc = next_entry(buf32, fp, sizeof(u32)); if (rc) { pr_err("SELinux: avtab: truncated entry\n"); return rc; } datum.u.data = le32_to_cpu(*buf32); } if ((key.specified & AVTAB_TYPE) && !policydb_type_isvalid(pol, datum.u.data)) { pr_err("SELinux: avtab: invalid type\n"); return -EINVAL; } return insertf(a, &key, &datum, p); } static int avtab_insertf(struct avtab *a, const struct avtab_key *k, const struct avtab_datum *d, void *p) { return avtab_insert(a, k, d); } int avtab_read(struct avtab *a, void *fp, struct policydb *pol) { int rc; __le32 buf[1]; u32 nel, i; rc = next_entry(buf, fp, sizeof(u32)); if (rc < 0) { pr_err("SELinux: avtab: truncated table\n"); goto bad; } nel = le32_to_cpu(buf[0]); if (!nel) { pr_err("SELinux: avtab: table is empty\n"); rc = -EINVAL; goto bad; } rc = avtab_alloc(a, nel); if (rc) goto bad; for (i = 0; i < nel; i++) { rc = avtab_read_item(a, fp, pol, avtab_insertf, NULL); if (rc) { if (rc == -ENOMEM) pr_err("SELinux: avtab: out of memory\n"); else if (rc == -EEXIST) pr_err("SELinux: avtab: duplicate entry\n"); goto bad; } } rc = 0; out: return rc; bad: avtab_destroy(a); goto out; } int avtab_write_item(struct policydb *p, const struct avtab_node *cur, void *fp) { __le16 buf16[4]; __le32 buf32[ARRAY_SIZE(cur->datum.u.xperms->perms.p)]; int rc; unsigned int i; buf16[0] = cpu_to_le16(cur->key.source_type); buf16[1] = cpu_to_le16(cur->key.target_type); buf16[2] = cpu_to_le16(cur->key.target_class); buf16[3] = cpu_to_le16(cur->key.specified); rc = put_entry(buf16, sizeof(u16), 4, fp); if (rc) return rc; if (cur->key.specified & AVTAB_XPERMS) { rc = put_entry(&cur->datum.u.xperms->specified, sizeof(u8), 1, fp); if (rc) return rc; rc = put_entry(&cur->datum.u.xperms->driver, sizeof(u8), 1, fp); if (rc) return rc; for (i = 0; i < ARRAY_SIZE(cur->datum.u.xperms->perms.p); i++) buf32[i] = cpu_to_le32(cur->datum.u.xperms->perms.p[i]); rc = put_entry(buf32, sizeof(u32), ARRAY_SIZE(cur->datum.u.xperms->perms.p), fp); } else { buf32[0] = cpu_to_le32(cur->datum.u.data); rc = put_entry(buf32, sizeof(u32), 1, fp); } if (rc) return rc; return 0; } int avtab_write(struct policydb *p, struct avtab *a, void *fp) { u32 i; int rc = 0; struct avtab_node *cur; __le32 buf[1]; buf[0] = cpu_to_le32(a->nel); rc = put_entry(buf, sizeof(u32), 1, fp); if (rc) return rc; for (i = 0; i < a->nslot; i++) { for (cur = a->htable[i]; cur; cur = cur->next) { rc = avtab_write_item(p, cur, fp); if (rc) return rc; } } return rc; } void __init avtab_cache_init(void) { avtab_node_cachep = kmem_cache_create( "avtab_node", sizeof(struct avtab_node), 0, SLAB_PANIC, NULL); avtab_xperms_cachep = kmem_cache_create( "avtab_extended_perms", sizeof(struct avtab_extended_perms), 0, SLAB_PANIC, NULL); }
300 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 // SPDX-License-Identifier: GPL-2.0-only #include <linux/fault-inject.h> #include <linux/fault-inject-usercopy.h> static struct { struct fault_attr attr; } fail_usercopy = { .attr = FAULT_ATTR_INITIALIZER, }; static int __init setup_fail_usercopy(char *str) { return setup_fault_attr(&fail_usercopy.attr, str); } __setup("fail_usercopy=", setup_fail_usercopy); #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS static int __init fail_usercopy_debugfs(void) { struct dentry *dir; dir = fault_create_debugfs_attr("fail_usercopy", NULL, &fail_usercopy.attr); if (IS_ERR(dir)) return PTR_ERR(dir); return 0; } late_initcall(fail_usercopy_debugfs); #endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ bool should_fail_usercopy(void) { return should_fail(&fail_usercopy.attr, 1); } EXPORT_SYMBOL_GPL(should_fail_usercopy);
345 345 345 345 341 342 342 342 343 343 343 343 55 343 330 330 330 330 330 106 330 342 342 342 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 // SPDX-License-Identifier: GPL-2.0 /* * Lockless hierarchical page accounting & limiting * * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner */ #include <linux/page_counter.h> #include <linux/atomic.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/sched.h> #include <linux/bug.h> #include <asm/page.h> static void propagate_protected_usage(struct page_counter *c, unsigned long usage) { unsigned long protected, old_protected; long delta; if (!c->parent) return; protected = min(usage, READ_ONCE(c->min)); old_protected = atomic_long_read(&c->min_usage); if (protected != old_protected) { old_protected = atomic_long_xchg(&c->min_usage, protected); delta = protected - old_protected; if (delta) atomic_long_add(delta, &c->parent->children_min_usage); } protected = min(usage, READ_ONCE(c->low)); old_protected = atomic_long_read(&c->low_usage); if (protected != old_protected) { old_protected = atomic_long_xchg(&c->low_usage, protected); delta = protected - old_protected; if (delta) atomic_long_add(delta, &c->parent->children_low_usage); } } /** * page_counter_cancel - take pages out of the local counter * @counter: counter * @nr_pages: number of pages to cancel */ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) { long new; new = atomic_long_sub_return(nr_pages, &counter->usage); /* More uncharges than charges? */ if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n", new, nr_pages)) { new = 0; atomic_long_set(&counter->usage, new); } propagate_protected_usage(counter, new); } /** * page_counter_charge - hierarchically charge pages * @counter: counter * @nr_pages: number of pages to charge * * NOTE: This does not consider any configured counter limits. */ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; for (c = counter; c; c = c->parent) { long new; new = atomic_long_add_return(nr_pages, &c->usage); propagate_protected_usage(c, new); /* * This is indeed racy, but we can live with some * inaccuracy in the watermark. */ if (new > READ_ONCE(c->watermark)) WRITE_ONCE(c->watermark, new); } } /** * page_counter_try_charge - try to hierarchically charge pages * @counter: counter * @nr_pages: number of pages to charge * @fail: points first counter to hit its limit, if any * * Returns %true on success, or %false and @fail if the counter or one * of its ancestors has hit its configured limit. */ bool page_counter_try_charge(struct page_counter *counter, unsigned long nr_pages, struct page_counter **fail) { struct page_counter *c; for (c = counter; c; c = c->parent) { long new; /* * Charge speculatively to avoid an expensive CAS. If * a bigger charge fails, it might falsely lock out a * racing smaller charge and send it into reclaim * early, but the error is limited to the difference * between the two sizes, which is less than 2M/4M in * case of a THP locking out a regular page charge. * * The atomic_long_add_return() implies a full memory * barrier between incrementing the count and reading * the limit. When racing with page_counter_set_max(), * we either see the new limit or the setter sees the * counter has changed and retries. */ new = atomic_long_add_return(nr_pages, &c->usage); if (new > c->max) { atomic_long_sub(nr_pages, &c->usage); /* * This is racy, but we can live with some * inaccuracy in the failcnt which is only used * to report stats. */ data_race(c->failcnt++); *fail = c; goto failed; } propagate_protected_usage(c, new); /* * Just like with failcnt, we can live with some * inaccuracy in the watermark. */ if (new > READ_ONCE(c->watermark)) WRITE_ONCE(c->watermark, new); } return true; failed: for (c = counter; c != *fail; c = c->parent) page_counter_cancel(c, nr_pages); return false; } /** * page_counter_uncharge - hierarchically uncharge pages * @counter: counter * @nr_pages: number of pages to uncharge */ void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; for (c = counter; c; c = c->parent) page_counter_cancel(c, nr_pages); } /** * page_counter_set_max - set the maximum number of pages allowed * @counter: counter * @nr_pages: limit to set * * Returns 0 on success, -EBUSY if the current number of pages on the * counter already exceeds the specified limit. * * The caller must serialize invocations on the same counter. */ int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) { for (;;) { unsigned long old; long usage; /* * Update the limit while making sure that it's not * below the concurrently-changing counter value. * * The xchg implies two full memory barriers before * and after, so the read-swap-read is ordered and * ensures coherency with page_counter_try_charge(): * that function modifies the count before checking * the limit, so if it sees the old limit, we see the * modified counter and retry. */ usage = page_counter_read(counter); if (usage > nr_pages) return -EBUSY; old = xchg(&counter->max, nr_pages); if (page_counter_read(counter) <= usage || nr_pages >= old) return 0; counter->max = old; cond_resched(); } } /** * page_counter_set_min - set the amount of protected memory * @counter: counter * @nr_pages: value to set * * The caller must serialize invocations on the same counter. */ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; WRITE_ONCE(counter->min, nr_pages); for (c = counter; c; c = c->parent) propagate_protected_usage(c, atomic_long_read(&c->usage)); } /** * page_counter_set_low - set the amount of protected memory * @counter: counter * @nr_pages: value to set * * The caller must serialize invocations on the same counter. */ void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) { struct page_counter *c; WRITE_ONCE(counter->low, nr_pages); for (c = counter; c; c = c->parent) propagate_protected_usage(c, atomic_long_read(&c->usage)); } /** * page_counter_memparse - memparse() for page counter limits * @buf: string to parse * @max: string meaning maximum possible value * @nr_pages: returns the result in number of pages * * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be * limited to %PAGE_COUNTER_MAX. */ int page_counter_memparse(const char *buf, const char *max, unsigned long *nr_pages) { char *end; u64 bytes; if (!strcmp(buf, max)) { *nr_pages = PAGE_COUNTER_MAX; return 0; } bytes = memparse(buf, &end); if (*end != '\0') return -EINVAL; *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); return 0; }
19 19 1 1 11 11 20 20 32 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RCULIST_H #define _LINUX_RCULIST_H #ifdef __KERNEL__ /* * RCU-protected list version */ #include <linux/list.h> #include <linux/rcupdate.h> /* * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers * @list: list to be initialized * * You should instead use INIT_LIST_HEAD() for normal initialization and * cleanup tasks, when readers have no access to the list being initialized. * However, if the list being initialized is visible to readers, you * need to keep the compiler from being too mischievous. */ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) { WRITE_ONCE(list->next, list); WRITE_ONCE(list->prev, list); } /* * return the ->next pointer of a list_head in an rcu safe * way, we must not access it directly */ #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) /** * list_tail_rcu - returns the prev pointer of the head of the list * @head: the head of the list * * Note: This should only be used with the list header, and even then * only if list_del() and similar primitives are not also used on the * list header. */ #define list_tail_rcu(head) (*((struct list_head __rcu **)(&(head)->prev))) /* * Check during list traversal that we are within an RCU reader */ #define check_arg_count_one(dummy) #ifdef CONFIG_PROVE_RCU_LIST #define __list_check_rcu(dummy, cond, extra...) \ ({ \ check_arg_count_one(extra); \ RCU_LOCKDEP_WARN(!(cond) && !rcu_read_lock_any_held(), \ "RCU-list traversed in non-reader section!"); \ }) #define __list_check_srcu(cond) \ ({ \ RCU_LOCKDEP_WARN(!(cond), \ "RCU-list traversed without holding the required lock!");\ }) #else #define __list_check_rcu(dummy, cond, extra...) \ ({ check_arg_count_one(extra); }) #define __list_check_srcu(cond) ({ }) #endif /* * Insert a new entry between two known consecutive entries. * * This is only for internal list manipulation where we know * the prev/next entries already! */ static inline void __list_add_rcu(struct list_head *new, struct list_head *prev, struct list_head *next) { if (!__list_add_valid(new, prev, next)) return; new->next = next; new->prev = prev; rcu_assign_pointer(list_next_rcu(prev), new); next->prev = new; } /** * list_add_rcu - add a new entry to rcu-protected list * @new: new entry to be added * @head: list head to add it after * * Insert a new entry after the specified head. * This is good for implementing stacks. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as list_add_rcu() * or list_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * list_for_each_entry_rcu(). */ static inline void list_add_rcu(struct list_head *new, struct list_head *head) { __list_add_rcu(new, head, head->next); } /** * list_add_tail_rcu - add a new entry to rcu-protected list * @new: new entry to be added * @head: list head to add it before * * Insert a new entry before the specified head. * This is useful for implementing queues. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as list_add_tail_rcu() * or list_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * list_for_each_entry_rcu(). */ static inline void list_add_tail_rcu(struct list_head *new, struct list_head *head) { __list_add_rcu(new, head->prev, head); } /** * list_del_rcu - deletes entry from list without re-initialization * @entry: the element to delete from the list. * * Note: list_empty() on entry does not return true after this, * the entry is in an undefined state. It is useful for RCU based * lockfree traversal. * * In particular, it means that we can not poison the forward * pointers that may still be used for walking the list. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as list_del_rcu() * or list_add_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * list_for_each_entry_rcu(). * * Note that the caller is not permitted to immediately free * the newly deleted entry. Instead, either synchronize_rcu() * or call_rcu() must be used to defer freeing until an RCU * grace period has elapsed. */ static inline void list_del_rcu(struct list_head *entry) { __list_del_entry(entry); entry->prev = LIST_POISON2; } /** * hlist_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * * Note: list_unhashed() on the node return true after this. It is * useful for RCU based read lockfree traversal if the writer side * must know if the list entry is still hashed or already unhashed. * * In particular, it means that we can not poison the forward pointers * that may still be used for walking the hash list and we can only * zero the pprev pointer so list_unhashed() will return true after * this. * * The caller must take whatever precautions are necessary (such as * holding appropriate locks) to avoid racing with another * list-mutation primitive, such as hlist_add_head_rcu() or * hlist_del_rcu(), running on this same list. However, it is * perfectly legal to run concurrently with the _rcu list-traversal * primitives, such as hlist_for_each_entry_rcu(). */ static inline void hlist_del_init_rcu(struct hlist_node *n) { if (!hlist_unhashed(n)) { __hlist_del(n); WRITE_ONCE(n->pprev, NULL); } } /** * list_replace_rcu - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert * * The @old entry will be replaced with the @new entry atomically. * Note: @old should not be empty. */ static inline void list_replace_rcu(struct list_head *old, struct list_head *new) { new->next = old->next; new->prev = old->prev; rcu_assign_pointer(list_next_rcu(new->prev), new); new->next->prev = new; old->prev = LIST_POISON2; } /** * __list_splice_init_rcu - join an RCU-protected list into an existing list. * @list: the RCU-protected list to splice * @prev: points to the last element of the existing list * @next: points to the first element of the existing list * @sync: synchronize_rcu, synchronize_rcu_expedited, ... * * The list pointed to by @prev and @next can be RCU-read traversed * concurrently with this function. * * Note that this function blocks. * * Important note: the caller must take whatever action is necessary to prevent * any other updates to the existing list. In principle, it is possible to * modify the list as soon as sync() begins execution. If this sort of thing * becomes necessary, an alternative version based on call_rcu() could be * created. But only if -really- needed -- there is no shortage of RCU API * members. */ static inline void __list_splice_init_rcu(struct list_head *list, struct list_head *prev, struct list_head *next, void (*sync)(void)) { struct list_head *first = list->next; struct list_head *last = list->prev; /* * "first" and "last" tracking list, so initialize it. RCU readers * have access to this list, so we must use INIT_LIST_HEAD_RCU() * instead of INIT_LIST_HEAD(). */ INIT_LIST_HEAD_RCU(list); /* * At this point, the list body still points to the source list. * Wait for any readers to finish using the list before splicing * the list body into the new list. Any new readers will see * an empty list. */ sync(); ASSERT_EXCLUSIVE_ACCESS(*first); ASSERT_EXCLUSIVE_ACCESS(*last); /* * Readers are finished with the source list, so perform splice. * The order is important if the new list is global and accessible * to concurrent RCU readers. Note that RCU readers are not * permitted to traverse the prev pointers without excluding * this function. */ last->next = next; rcu_assign_pointer(list_next_rcu(prev), first); first->prev = prev; next->prev = last; } /** * list_splice_init_rcu - splice an RCU-protected list into an existing list, * designed for stacks. * @list: the RCU-protected list to splice * @head: the place in the existing list to splice the first list into * @sync: synchronize_rcu, synchronize_rcu_expedited, ... */ static inline void list_splice_init_rcu(struct list_head *list, struct list_head *head, void (*sync)(void)) { if (!list_empty(list)) __list_splice_init_rcu(list, head, head->next, sync); } /** * list_splice_tail_init_rcu - splice an RCU-protected list into an existing * list, designed for queues. * @list: the RCU-protected list to splice * @head: the place in the existing list to splice the first list into * @sync: synchronize_rcu, synchronize_rcu_expedited, ... */ static inline void list_splice_tail_init_rcu(struct list_head *list, struct list_head *head, void (*sync)(void)) { if (!list_empty(list)) __list_splice_init_rcu(list, head->prev, head, sync); } /** * list_entry_rcu - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * This primitive may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ container_of(READ_ONCE(ptr), type, member) /* * Where are list_empty_rcu() and list_first_entry_rcu()? * * They do not exist because they would lead to subtle race conditions: * * if (!list_empty_rcu(mylist)) { * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); * do_something(bar); * } * * The list might be non-empty when list_empty_rcu() checks it, but it * might have become empty by the time that list_first_entry_rcu() rereads * the ->next pointer, which would result in a SEGV. * * When not using RCU, it is OK for list_first_entry() to re-read that * pointer because both functions should be protected by some lock that * blocks writers. * * When using RCU, list_empty() uses READ_ONCE() to fetch the * RCU-protected ->next pointer and then compares it to the address of the * list head. However, it neither dereferences this pointer nor provides * this pointer to its caller. Thus, READ_ONCE() suffices (that is, * rcu_dereference() is not needed), which means that list_empty() can be * used anywhere you would want to use list_empty_rcu(). Just don't * expect anything useful to happen if you do a subsequent lockless * call to list_first_entry_rcu()!!! * * See list_first_or_null_rcu for an alternative. */ /** * list_first_or_null_rcu - get the first element from a list * @ptr: the list head to take the element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * Note that if the list is empty, it returns NULL. * * This primitive may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_first_or_null_rcu(ptr, type, member) \ ({ \ struct list_head *__ptr = (ptr); \ struct list_head *__next = READ_ONCE(__ptr->next); \ likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ }) /** * list_next_or_null_rcu - get the next element from a list * @head: the head for the list. * @ptr: the list head to take the next element from. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * Note that if the ptr is at the end of the list, NULL is returned. * * This primitive may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_next_or_null_rcu(head, ptr, type, member) \ ({ \ struct list_head *__head = (head); \ struct list_head *__ptr = (ptr); \ struct list_head *__next = READ_ONCE(__ptr->next); \ likely(__next != __head) ? list_entry_rcu(__next, type, \ member) : NULL; \ }) /** * list_for_each_entry_rcu - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ #define list_for_each_entry_rcu(pos, head, member, cond...) \ for (__list_check_rcu(dummy, ## cond, 0), \ pos = list_entry_rcu((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** * list_for_each_entry_srcu - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. * @cond: lockdep expression for the lock required to traverse the list. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as list_add_rcu() * as long as the traversal is guarded by srcu_read_lock(). * The lockdep expression srcu_read_lock_held() can be passed as the * cond argument from read side. */ #define list_for_each_entry_srcu(pos, head, member, cond) \ for (__list_check_srcu(cond), \ pos = list_entry_rcu((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** * list_entry_lockless - get the struct for this entry * @ptr: the &struct list_head pointer. * @type: the type of the struct this is embedded in. * @member: the name of the list_head within the struct. * * This primitive may safely run concurrently with the _rcu * list-mutation primitives such as list_add_rcu(), but requires some * implicit RCU read-side guarding. One example is running within a special * exception-time environment where preemption is disabled and where lockdep * cannot be invoked. Another example is when items are added to the list, * but never deleted. */ #define list_entry_lockless(ptr, type, member) \ container_of((typeof(ptr))READ_ONCE(ptr), type, member) /** * list_for_each_entry_lockless - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_struct within the struct. * * This primitive may safely run concurrently with the _rcu * list-mutation primitives such as list_add_rcu(), but requires some * implicit RCU read-side guarding. One example is running within a special * exception-time environment where preemption is disabled and where lockdep * cannot be invoked. Another example is when items are added to the list, * but never deleted. */ #define list_for_each_entry_lockless(pos, head, member) \ for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry_lockless(pos->member.next, typeof(*pos), member)) /** * list_for_each_entry_continue_rcu - continue iteration over list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_head within the struct. * * Continue to iterate over list of given type, continuing after * the current position which must have been in the list when the RCU read * lock was taken. * This would typically require either that you obtained the node from a * previous walk of the list in the same RCU read-side critical section, or * that you held some sort of non-RCU reference (such as a reference count) * to keep the node alive *and* in the list. * * This iterator is similar to list_for_each_entry_from_rcu() except * this starts after the given position and that one starts at the given * position. */ #define list_for_each_entry_continue_rcu(pos, head, member) \ for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) /** * list_for_each_entry_from_rcu - iterate over a list from current point * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the list_node within the struct. * * Iterate over the tail of a list starting from a given position, * which must have been in the list when the RCU read lock was taken. * This would typically require either that you obtained the node from a * previous walk of the list in the same RCU read-side critical section, or * that you held some sort of non-RCU reference (such as a reference count) * to keep the node alive *and* in the list. * * This iterator is similar to list_for_each_entry_continue_rcu() except * this starts from the given position and that one starts from the position * after the given position. */ #define list_for_each_entry_from_rcu(pos, head, member) \ for (; &(pos)->member != (head); \ pos = list_entry_rcu(pos->member.next, typeof(*(pos)), member)) /** * hlist_del_rcu - deletes entry from hash list without re-initialization * @n: the element to delete from the hash list. * * Note: list_unhashed() on entry does not return true after this, * the entry is in an undefined state. It is useful for RCU based * lockfree traversal. * * In particular, it means that we can not poison the forward * pointers that may still be used for walking the hash list. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_add_head_rcu() * or hlist_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_for_each_entry(). */ static inline void hlist_del_rcu(struct hlist_node *n) { __hlist_del(n); WRITE_ONCE(n->pprev, LIST_POISON2); } /** * hlist_replace_rcu - replace old entry by new one * @old : the element to be replaced * @new : the new element to insert * * The @old entry will be replaced with the @new entry atomically. */ static inline void hlist_replace_rcu(struct hlist_node *old, struct hlist_node *new) { struct hlist_node *next = old->next; new->next = next; WRITE_ONCE(new->pprev, old->pprev); rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); if (next) WRITE_ONCE(new->next->pprev, &new->next); WRITE_ONCE(old->pprev, LIST_POISON2); } /** * hlists_swap_heads_rcu - swap the lists the hlist heads point to * @left: The hlist head on the left * @right: The hlist head on the right * * The lists start out as [@left ][node1 ... ] and * [@right ][node2 ... ] * The lists end up as [@left ][node2 ... ] * [@right ][node1 ... ] */ static inline void hlists_swap_heads_rcu(struct hlist_head *left, struct hlist_head *right) { struct hlist_node *node1 = left->first; struct hlist_node *node2 = right->first; rcu_assign_pointer(left->first, node2); rcu_assign_pointer(right->first, node1); WRITE_ONCE(node2->pprev, &left->first); WRITE_ONCE(node1->pprev, &right->first); } /* * return the first or the next element in an RCU protected hlist */ #define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) #define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) #define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) /** * hlist_add_head_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_add_head_rcu() * or hlist_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_add_head_rcu(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *first = h->first; n->next = first; WRITE_ONCE(n->pprev, &h->first); rcu_assign_pointer(hlist_first_rcu(h), n); if (first) WRITE_ONCE(first->pprev, &n->next); } /** * hlist_add_tail_rcu * @n: the element to add to the hash list. * @h: the list to add to. * * Description: * Adds the specified element to the specified hlist, * while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_add_head_rcu() * or hlist_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. Regardless of the type of CPU, the * list-traversal primitive must be guarded by rcu_read_lock(). */ static inline void hlist_add_tail_rcu(struct hlist_node *n, struct hlist_head *h) { struct hlist_node *i, *last = NULL; /* Note: write side code, so rcu accessors are not needed. */ for (i = h->first; i; i = i->next) last = i; if (last) { n->next = last->next; WRITE_ONCE(n->pprev, &last->next); rcu_assign_pointer(hlist_next_rcu(last), n); } else { hlist_add_head_rcu(n, h); } } /** * hlist_add_before_rcu * @n: the new element to add to the hash list. * @next: the existing element to add the new element before. * * Description: * Adds the specified element to the specified hlist * before the specified node while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_add_head_rcu() * or hlist_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. */ static inline void hlist_add_before_rcu(struct hlist_node *n, struct hlist_node *next) { WRITE_ONCE(n->pprev, next->pprev); n->next = next; rcu_assign_pointer(hlist_pprev_rcu(n), n); WRITE_ONCE(next->pprev, &n->next); } /** * hlist_add_behind_rcu * @n: the new element to add to the hash list. * @prev: the existing element to add the new element after. * * Description: * Adds the specified element to the specified hlist * after the specified node while permitting racing traversals. * * The caller must take whatever precautions are necessary * (such as holding appropriate locks) to avoid racing * with another list-mutation primitive, such as hlist_add_head_rcu() * or hlist_del_rcu(), running on this same list. * However, it is perfectly legal to run concurrently with * the _rcu list-traversal primitives, such as * hlist_for_each_entry_rcu(), used to prevent memory-consistency * problems on Alpha CPUs. */ static inline void hlist_add_behind_rcu(struct hlist_node *n, struct hlist_node *prev) { n->next = prev->next; WRITE_ONCE(n->pprev, &prev->next); rcu_assign_pointer(hlist_next_rcu(prev), n); if (n->next) WRITE_ONCE(n->next->pprev, &n->next); } #define __hlist_for_each_rcu(pos, head) \ for (pos = rcu_dereference(hlist_first_rcu(head)); \ pos; \ pos = rcu_dereference(hlist_next_rcu(pos))) /** * hlist_for_each_entry_rcu - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @cond: optional lockdep expression if called from non-RCU protection. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ #define hlist_for_each_entry_rcu(pos, head, member, cond...) \ for (__list_check_rcu(dummy, ## cond, 0), \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_srcu - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * @cond: lockdep expression for the lock required to traverse the list. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by srcu_read_lock(). * The lockdep expression srcu_read_lock_held() can be passed as the * cond argument from read side. */ #define hlist_for_each_entry_srcu(pos, head, member, cond) \ for (__list_check_srcu(cond), \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing) * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). * * This is the same as hlist_for_each_entry_rcu() except that it does * not do any RCU debugging or tracing. */ #define hlist_for_each_entry_rcu_notrace(pos, head, member) \ for (pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw_check(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type * @pos: the type * to use as a loop cursor. * @head: the head for your list. * @member: the name of the hlist_node within the struct. * * This list-traversal primitive may safely run concurrently with * the _rcu list-mutation primitives such as hlist_add_head_rcu() * as long as the traversal is guarded by rcu_read_lock(). */ #define hlist_for_each_entry_rcu_bh(pos, head, member) \ for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\ typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\ &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_continue_rcu(pos, member) \ for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ &(pos)->member)), typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_continue_rcu_bh(pos, member) \ for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ &(pos)->member)), typeof(*(pos)), member); \ pos; \ pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ &(pos)->member)), typeof(*(pos)), member)) /** * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point * @pos: the type * to use as a loop cursor. * @member: the name of the hlist_node within the struct. */ #define hlist_for_each_entry_from_rcu(pos, member) \ for (; pos; \ pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ &(pos)->member)), typeof(*(pos)), member)) #endif /* __KERNEL__ */ #endif
179 179 179 179 179 179 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 // SPDX-License-Identifier: GPL-2.0-only /* * Contains CPU feature definitions * * Copyright (C) 2015 ARM Ltd. * * A note for the weary kernel hacker: the code here is confusing and hard to * follow! That's partly because it's solving a nasty problem, but also because * there's a little bit of over-abstraction that tends to obscure what's going * on behind a maze of helper functions and macros. * * The basic problem is that hardware folks have started gluing together CPUs * with distinct architectural features; in some cases even creating SoCs where * user-visible instructions are available only on a subset of the available * cores. We try to address this by snapshotting the feature registers of the * boot CPU and comparing these with the feature registers of each secondary * CPU when bringing them up. If there is a mismatch, then we update the * snapshot state to indicate the lowest-common denominator of the feature, * known as the "safe" value. This snapshot state can be queried to view the * "sanitised" value of a feature register. * * The sanitised register values are used to decide which capabilities we * have in the system. These may be in the form of traditional "hwcaps" * advertised to userspace or internal "cpucaps" which are used to configure * things like alternative patching and static keys. While a feature mismatch * may result in a TAINT_CPU_OUT_OF_SPEC kernel taint, a capability mismatch * may prevent a CPU from being onlined at all. * * Some implementation details worth remembering: * * - Mismatched features are *always* sanitised to a "safe" value, which * usually indicates that the feature is not supported. * * - A mismatched feature marked with FTR_STRICT will cause a "SANITY CHECK" * warning when onlining an offending CPU and the kernel will be tainted * with TAINT_CPU_OUT_OF_SPEC. * * - Features marked as FTR_VISIBLE have their sanitised value visible to * userspace. FTR_VISIBLE features in registers that are only visible * to EL0 by trapping *must* have a corresponding HWCAP so that late * onlining of CPUs cannot lead to features disappearing at runtime. * * - A "feature" is typically a 4-bit register field. A "capability" is the * high-level description derived from the sanitised field value. * * - Read the Arm ARM (DDI 0487F.a) section D13.1.3 ("Principles of the ID * scheme for fields in ID registers") to understand when feature fields * may be signed or unsigned (FTR_SIGNED and FTR_UNSIGNED accordingly). * * - KVM exposes its own view of the feature registers to guest operating * systems regardless of FTR_VISIBLE. This is typically driven from the * sanitised register values to allow virtual CPUs to be migrated between * arbitrary physical CPUs, but some features not present on the host are * also advertised and emulated. Look at sys_reg_descs[] for the gory * details. * * - If the arm64_ftr_bits[] for a register has a missing field, then this * field is treated as STRICT RES0, including for read_sanitised_ftr_reg(). * This is stronger than FTR_HIDDEN and can be used to hide features from * KVM guests. */ #define pr_fmt(fmt) "CPU features: " fmt #include <linux/bsearch.h> #include <linux/cpumask.h> #include <linux/crash_dump.h> #include <linux/kstrtox.h> #include <linux/sort.h> #include <linux/stop_machine.h> #include <linux/sysfs.h> #include <linux/types.h> #include <linux/minmax.h> #include <linux/mm.h> #include <linux/cpu.h> #include <linux/kasan.h> #include <linux/percpu.h> #include <asm/cpu.h> #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/fpsimd.h> #include <asm/hwcap.h> #include <asm/insn.h> #include <asm/kvm_host.h> #include <asm/mmu_context.h> #include <asm/mte.h> #include <asm/processor.h> #include <asm/smp.h> #include <asm/sysreg.h> #include <asm/traps.h> #include <asm/vectors.h> #include <asm/virt.h> /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP_DEFAULT \ (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ COMPAT_HWCAP_TLS|COMPAT_HWCAP_IDIV|\ COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; #endif DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); EXPORT_SYMBOL(system_cpucaps); static struct arm64_cpu_capabilities const __ro_after_init *cpucap_ptrs[ARM64_NCAPS]; DECLARE_BITMAP(boot_cpucaps, ARM64_NCAPS); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; /* * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs * support it? */ static bool __read_mostly allow_mismatched_32bit_el0; /* * Static branch enabled only if allow_mismatched_32bit_el0 is set and we have * seen at least one CPU capable of 32-bit EL0. */ DEFINE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); /* * Mask of CPUs supporting 32-bit EL0. * Only valid if arm64_mismatched_32bit_el0 is enabled. */ static cpumask_var_t cpu_32bit_el0_mask __cpumask_var_read_mostly; void dump_cpu_features(void) { /* file-wide pr_fmt adds "CPU features: " prefix */ pr_emerg("0x%*pb\n", ARM64_NCAPS, &system_cpucaps); } #define __ARM64_MAX_POSITIVE(reg, field) \ ((reg##_##field##_SIGNED ? \ BIT(reg##_##field##_WIDTH - 1) : \ BIT(reg##_##field##_WIDTH)) - 1) #define __ARM64_MIN_NEGATIVE(reg, field) BIT(reg##_##field##_WIDTH - 1) #define __ARM64_CPUID_FIELDS(reg, field, min_value, max_value) \ .sys_reg = SYS_##reg, \ .field_pos = reg##_##field##_SHIFT, \ .field_width = reg##_##field##_WIDTH, \ .sign = reg##_##field##_SIGNED, \ .min_field_value = min_value, \ .max_field_value = max_value, /* * ARM64_CPUID_FIELDS() encodes a field with a range from min_value to * an implicit maximum that depends on the sign-ess of the field. * * An unsigned field will be capped at all ones, while a signed field * will be limited to the positive half only. */ #define ARM64_CPUID_FIELDS(reg, field, min_value) \ __ARM64_CPUID_FIELDS(reg, field, \ SYS_FIELD_VALUE(reg, field, min_value), \ __ARM64_MAX_POSITIVE(reg, field)) /* * ARM64_CPUID_FIELDS_NEG() encodes a field with a range from an * implicit minimal value to max_value. This should be used when * matching a non-implemented property. */ #define ARM64_CPUID_FIELDS_NEG(reg, field, max_value) \ __ARM64_CPUID_FIELDS(reg, field, \ __ARM64_MIN_NEGATIVE(reg, field), \ SYS_FIELD_VALUE(reg, field, max_value)) #define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ { \ .sign = SIGNED, \ .visible = VISIBLE, \ .strict = STRICT, \ .type = TYPE, \ .shift = SHIFT, \ .width = WIDTH, \ .safe_val = SAFE_VAL, \ } /* Define a feature with unsigned values */ #define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) /* Define a feature with a signed value */ #define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) #define ARM64_FTR_END \ { \ .width = 0, \ } static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); static bool __system_matches_cap(unsigned int n); /* * NOTE: Any changes to the visibility of features should be kept in * sync with the documentation of the CPU feature register ABI. */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RNDR_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TLB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SM3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_ATOMIC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_SHA1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SPECRES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_SB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FRINTTS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_GPA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LRCPC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_FCMA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_JSCVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_API_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR1_EL1_APA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DPB_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_LUT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CLRBHB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_WFxT_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64isar3[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR3_EL1_FAMINMAX_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AMU_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_MPAM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SEL2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_RAS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_AdvSIMD_SHIFT, 4, ID_AA64PFR0_EL1_AdvSIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_FP_SHIFT, 4, ID_AA64PFR0_EL1_FP_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL1_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_EL0_SHIFT, 4, ID_AA64PFR0_EL1_ELx_64BIT_ONLY), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SME_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MPAM_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_RAS_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_MTE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_MTE_SHIFT, 4, ID_AA64PFR1_EL1_MTE_NI), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_SSBS_SHIFT, 4, ID_AA64PFR1_EL1_SSBS_NI), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_EL1_BT_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR2_EL1_FPMR_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F64MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_F32MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SM4_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SHA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_B16B16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BF16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_BitPerm_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_EL1_SVEver_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64smfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_FA64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_LUTv2_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SMEver_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F64F64_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I16I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16B16_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F16_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F16_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F8F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_I8I32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_B16F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_BI32I32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_F32F32_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8FMA_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP4_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SME), FTR_STRICT, FTR_EXACT, ID_AA64SMFR0_EL1_SF8DP2_SHIFT, 1, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64fpfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8CVT_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8FMA_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP4_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8DP2_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E4M3_SHIFT, 1, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, ID_AA64FPFR0_EL1_F8E5M2_SHIFT, 1, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_FGT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_EXS_SHIFT, 4, 0), /* * Page size not being supported at Stage-2 is not fatal. You * just give up KVM if PAGE_SIZE isn't supported there. Go fix * your favourite nesting hypervisor. * * There is a small corner case where the hypervisor explicitly * advertises a given granule size at Stage-2 (value 2) on some * vCPUs, and uses the fallback to Stage-1 (value 0) for other * vCPUs. Although this is not forbidden by the architecture, it * indicates that the hypervisor is being silly (or buggy). * * We make no effort to cope with this and pretend that if these * fields are inconsistent across vCPUs, then it isn't worth * trying to bring KVM up. */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN4_2_SHIFT, 4, 1), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN64_2_SHIFT, 4, 1), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_EL1_TGRAN16_2_SHIFT, 4, 1), /* * We already refuse to boot CPUs that don't support our configured * page size, so we can only detect mismatches for a page size other * than the one we're currently using. Unfortunately, SoCs like this * exist in the wild so, even though we don't like it, we'll have to go * along with it and treat them as non-strict. */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN4_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN4_NI), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN64_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN64_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_TGRAN16_SHIFT, 4, ID_AA64MMFR0_EL1_TGRAN16_NI), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGENDEL0_SHIFT, 4, 0), /* Linux shouldn't care about secure memory */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_SNSMEM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_BIGEND_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_ASIDBITS_SHIFT, 4, 0), /* * Differing PARange is fine as long as all peripherals and memory are mapped * within the minimum PARange of all CPUs */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EL1_PARANGE_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TIDCP1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HCX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_TWED_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_XNX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1_SpecSEI_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_PAN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_LO_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HPDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VH_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_VMIDBits_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_EL1_HAFDBS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_E0PD_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_EVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_BBM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_TTL_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_ST_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_NV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CCIDX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_VARange_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_UAO_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_EL1_CnP_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_S1PIE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR3_EL1_TCRX_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_ctr[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, 31, 1, 1), /* RES1 */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DIC_SHIFT, 1, 1), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IDC_SHIFT, 1, 1), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_CWG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_OR_ZERO_SAFE, CTR_EL0_ERG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_DminLine_SHIFT, 4, 1), /* * Linux can handle differing I-cache policies. Userspace JITs will * make use of *minLine. * If we have differing I-cache policies, report it as the weakest - VIPT. */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_EXACT, CTR_EL0_L1Ip_SHIFT, 2, CTR_EL0_L1Ip_VIPT), /* L1Ip */ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, CTR_EL0_IminLine_SHIFT, 4, 0), ARM64_FTR_END, }; static struct arm64_ftr_override __ro_after_init no_override = { }; struct arm64_ftr_reg arm64_ftr_reg_ctrel0 = { .name = "SYS_CTR_EL0", .ftr_bits = ftr_ctr, .override = &no_override, }; static const struct arm64_ftr_bits ftr_id_mmfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_InnerShr_SHIFT, 4, 0xf), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_FCSE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_AuxReg_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_TCM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_ShareLvl_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_OuterShr_SHIFT, 4, 0xf), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_PMSA_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR0_EL1_VMSA_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_DoubleLock_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_PMSVer_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_CTX_CMPs_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_WRPs_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_EL1_BRPs_SHIFT, 4, 0), /* * We can instantiate multiple PMU instances with different levels * of support. */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_EL1_PMUVer_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_EL1_DebugVer_SHIFT, 4, 0x6), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_mvfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPRound_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPShVec_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSqrt_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDivide_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPTrap_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPDP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_FPSP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR0_EL1_SIMDReg_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_mvfr1[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDFMAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPHP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDHP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDSP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDInt_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_SIMDLS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPDNaN_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR1_EL1_FPFtZ_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_mvfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_FPMisc_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, MVFR2_EL1_SIMDMisc_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_EXACT, DCZID_EL0_DZP_SHIFT, 1, 1), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, DCZID_EL0_BS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_gmid[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, GMID_EL1_BS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_isar0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Divide_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Debug_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Coproc_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_CmpBranch_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitField_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_BitCount_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_EL1_Swap_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_RDM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_CRC32_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SHA1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_AES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_EL1_SEVL_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_EVT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CCIDX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_HPDS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_CnP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_XNX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EL1_AC2_SHIFT, 4, 0), /* * SpecSEI = 1 indicates that the PE might generate an SError on an * external abort on speculative read. It is safe to assume that an * SError might be generated than it will not be. Hence it has been * classified as FTR_HIGHER_SAFE. */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_EL1_SpecSEI_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_isar4[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SWP_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_PSR_M_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SynchPrim_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Barrier_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_SMC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Writeback_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_WithShifts_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_EL1_Unpriv_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_mmfr5[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_EL1_ETS_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_isar6[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_BF16_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SPECRES_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_SB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_DP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR6_EL1_JSCVT_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State1_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_EL1_State0_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr1[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GIC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virt_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Sec_frac_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_GenTimer_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Virtualization_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_MProgMod_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_Security_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_EL1_ProgMod_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_pfr2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_SSBS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_EL1_CSV3_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_EL1_PerfMon_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MProfDbg_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapTrc_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopTrc_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_MMapDbg_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopSDbg_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_EL1_CopDbg_SHIFT, 4, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_dfr1[] = { S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_EL1_MTPMU_SHIFT, 4, 0), ARM64_FTR_END, }; /* * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of * 0. Covers the following 32bit registers: * id_isar[1-3], id_mmfr[1-3] */ static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 0, 4, 0), ARM64_FTR_END, }; /* Table for a single 32bit feature value */ static const struct arm64_ftr_bits ftr_single32[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 0, 32, 0), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; #define __ARM64_FTR_REG_OVERRIDE(id_str, id, table, ovr) { \ .sys_id = id, \ .reg = &(struct arm64_ftr_reg){ \ .name = id_str, \ .override = (ovr), \ .ftr_bits = &((table)[0]), \ }} #define ARM64_FTR_REG_OVERRIDE(id, table, ovr) \ __ARM64_FTR_REG_OVERRIDE(#id, id, table, ovr) #define ARM64_FTR_REG(id, table) \ __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) struct arm64_ftr_override id_aa64mmfr0_override; struct arm64_ftr_override id_aa64mmfr1_override; struct arm64_ftr_override id_aa64mmfr2_override; struct arm64_ftr_override id_aa64pfr0_override; struct arm64_ftr_override id_aa64pfr1_override; struct arm64_ftr_override id_aa64zfr0_override; struct arm64_ftr_override id_aa64smfr0_override; struct arm64_ftr_override id_aa64isar1_override; struct arm64_ftr_override id_aa64isar2_override; struct arm64_ftr_override arm64_sw_feature_override; static const struct __ftr_reg_entry { u32 sys_id; struct arm64_ftr_reg *reg; } arm64_ftr_regs[] = { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_id_pfr1), ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0), ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_MMFR2_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), /* Op1 = 0, CRn = 0, CRm = 2 */ ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_id_isar0), ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_id_isar4), ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), /* Op1 = 0, CRn = 0, CRm = 3 */ ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_mvfr0), ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_mvfr1), ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1), ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0, &id_aa64pfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1, &id_aa64pfr1_override), ARM64_FTR_REG(SYS_ID_AA64PFR2_EL1, ftr_id_aa64pfr2), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0, &id_aa64zfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0, &id_aa64smfr0_override), ARM64_FTR_REG(SYS_ID_AA64FPFR0_EL1, ftr_id_aa64fpfr0), /* Op1 = 0, CRn = 0, CRm = 5 */ ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0), ARM64_FTR_REG(SYS_ID_AA64DFR1_EL1, ftr_raz), /* Op1 = 0, CRn = 0, CRm = 6 */ ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, &id_aa64isar2_override), ARM64_FTR_REG(SYS_ID_AA64ISAR3_EL1, ftr_id_aa64isar3), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0, &id_aa64mmfr0_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1, &id_aa64mmfr1_override), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2, &id_aa64mmfr2_override), ARM64_FTR_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3), ARM64_FTR_REG(SYS_ID_AA64MMFR4_EL1, ftr_id_aa64mmfr4), /* Op1 = 1, CRn = 0, CRm = 0 */ ARM64_FTR_REG(SYS_GMID_EL1, ftr_gmid), /* Op1 = 3, CRn = 0, CRm = 0 */ { SYS_CTR_EL0, &arm64_ftr_reg_ctrel0 }, ARM64_FTR_REG(SYS_DCZID_EL0, ftr_dczid), /* Op1 = 3, CRn = 14, CRm = 0 */ ARM64_FTR_REG(SYS_CNTFRQ_EL0, ftr_single32), }; static int search_cmp_ftr_reg(const void *id, const void *regp) { return (int)(unsigned long)id - (int)((const struct __ftr_reg_entry *)regp)->sys_id; } /* * get_arm64_ftr_reg_nowarn - Looks up a feature register entry using * its sys_reg() encoding. With the array arm64_ftr_regs sorted in the * ascending order of sys_id, we use binary search to find a matching * entry. * * returns - Upon success, matching ftr_reg entry for id. * - NULL on failure. It is upto the caller to decide * the impact of a failure. */ static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id) { const struct __ftr_reg_entry *ret; ret = bsearch((const void *)(unsigned long)sys_id, arm64_ftr_regs, ARRAY_SIZE(arm64_ftr_regs), sizeof(arm64_ftr_regs[0]), search_cmp_ftr_reg); if (ret) return ret->reg; return NULL; } /* * get_arm64_ftr_reg - Looks up a feature register entry using * its sys_reg() encoding. This calls get_arm64_ftr_reg_nowarn(). * * returns - Upon success, matching ftr_reg entry for id. * - NULL on failure but with an WARN_ON(). */ struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) { struct arm64_ftr_reg *reg; reg = get_arm64_ftr_reg_nowarn(sys_id); /* * Requesting a non-existent register search is an error. Warn * and let the caller handle it. */ WARN_ON(!reg); return reg; } static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) { u64 mask = arm64_ftr_mask(ftrp); reg &= ~mask; reg |= (ftr_val << ftrp->shift) & mask; return reg; } s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { s64 ret = 0; switch (ftrp->type) { case FTR_EXACT: ret = ftrp->safe_val; break; case FTR_LOWER_SAFE: ret = min(new, cur); break; case FTR_HIGHER_OR_ZERO_SAFE: if (!cur || !new) break; fallthrough; case FTR_HIGHER_SAFE: ret = max(new, cur); break; default: BUG(); } return ret; } static void __init sort_ftr_regs(void) { unsigned int i; for (i = 0; i < ARRAY_SIZE(arm64_ftr_regs); i++) { const struct arm64_ftr_reg *ftr_reg = arm64_ftr_regs[i].reg; const struct arm64_ftr_bits *ftr_bits = ftr_reg->ftr_bits; unsigned int j = 0; /* * Features here must be sorted in descending order with respect * to their shift values and should not overlap with each other. */ for (; ftr_bits->width != 0; ftr_bits++, j++) { unsigned int width = ftr_reg->ftr_bits[j].width; unsigned int shift = ftr_reg->ftr_bits[j].shift; unsigned int prev_shift; WARN((shift + width) > 64, "%s has invalid feature at shift %d\n", ftr_reg->name, shift); /* * Skip the first feature. There is nothing to * compare against for now. */ if (j == 0) continue; prev_shift = ftr_reg->ftr_bits[j - 1].shift; WARN((shift + width) > prev_shift, "%s has feature overlap at shift %d\n", ftr_reg->name, shift); } /* * Skip the first register. There is nothing to * compare against for now. */ if (i == 0) continue; /* * Registers here must be sorted in ascending order with respect * to sys_id for subsequent binary search in get_arm64_ftr_reg() * to work correctly. */ BUG_ON(arm64_ftr_regs[i].sys_id <= arm64_ftr_regs[i - 1].sys_id); } } /* * Initialise the CPU feature register from Boot CPU values. * Also initiliases the strict_mask for the register. * Any bits that are not covered by an arm64_ftr_bits entry are considered * RES0 for the system-wide value, and must strictly match. */ static void init_cpu_ftr_reg(u32 sys_reg, u64 new) { u64 val = 0; u64 strict_mask = ~0x0ULL; u64 user_mask = 0; u64 valid_mask = 0; const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); if (!reg) return; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); s64 ftr_new = arm64_ftr_value(ftrp, new); s64 ftr_ovr = arm64_ftr_value(ftrp, reg->override->val); if ((ftr_mask & reg->override->mask) == ftr_mask) { s64 tmp = arm64_ftr_safe_value(ftrp, ftr_ovr, ftr_new); char *str = NULL; if (ftr_ovr != tmp) { /* Unsafe, remove the override */ reg->override->mask &= ~ftr_mask; reg->override->val &= ~ftr_mask; tmp = ftr_ovr; str = "ignoring override"; } else if (ftr_new != tmp) { /* Override was valid */ ftr_new = tmp; str = "forced"; } else if (ftr_ovr == tmp) { /* Override was the safe value */ str = "already set"; } if (str) pr_warn("%s[%d:%d]: %s to %llx\n", reg->name, ftrp->shift + ftrp->width - 1, ftrp->shift, str, tmp & (BIT(ftrp->width) - 1)); } else if ((ftr_mask & reg->override->val) == ftr_mask) { reg->override->val &= ~ftr_mask; pr_warn("%s[%d:%d]: impossible override, ignored\n", reg->name, ftrp->shift + ftrp->width - 1, ftrp->shift); } val = arm64_ftr_set_value(ftrp, val, ftr_new); valid_mask |= ftr_mask; if (!ftrp->strict) strict_mask &= ~ftr_mask; if (ftrp->visible) user_mask |= ftr_mask; else reg->user_val = arm64_ftr_set_value(ftrp, reg->user_val, ftrp->safe_val); } val &= valid_mask; reg->sys_val = val; reg->strict_mask = strict_mask; reg->user_mask = user_mask; } extern const struct arm64_cpu_capabilities arm64_errata[]; static const struct arm64_cpu_capabilities arm64_features[]; static void __init init_cpucap_indirect_list_from_array(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) { if (WARN(caps->capability >= ARM64_NCAPS, "Invalid capability %d\n", caps->capability)) continue; if (WARN(cpucap_ptrs[caps->capability], "Duplicate entry for capability %d\n", caps->capability)) continue; cpucap_ptrs[caps->capability] = caps; } } static void __init init_cpucap_indirect_list(void) { init_cpucap_indirect_list_from_array(arm64_features); init_cpucap_indirect_list_from_array(arm64_errata); } static void __init setup_boot_cpu_capabilities(void); static void init_32bit_cpu_features(struct cpuinfo_32bit *info) { init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); init_cpu_ftr_reg(SYS_ID_ISAR3_EL1, info->reg_id_isar3); init_cpu_ftr_reg(SYS_ID_ISAR4_EL1, info->reg_id_isar4); init_cpu_ftr_reg(SYS_ID_ISAR5_EL1, info->reg_id_isar5); init_cpu_ftr_reg(SYS_ID_ISAR6_EL1, info->reg_id_isar6); init_cpu_ftr_reg(SYS_ID_MMFR0_EL1, info->reg_id_mmfr0); init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); } #ifdef CONFIG_ARM64_PSEUDO_NMI static bool enable_pseudo_nmi; static int __init early_enable_pseudo_nmi(char *p) { return kstrtobool(p, &enable_pseudo_nmi); } early_param("irqchip.gicv3_pseudo_nmi", early_enable_pseudo_nmi); static __init void detect_system_supports_pseudo_nmi(void) { struct device_node *np; if (!enable_pseudo_nmi) return; /* * Detect broken MediaTek firmware that doesn't properly save and * restore GIC priorities. */ np = of_find_compatible_node(NULL, NULL, "arm,gic-v3"); if (np && of_property_read_bool(np, "mediatek,broken-save-restore-fw")) { pr_info("Pseudo-NMI disabled due to MediaTek Chromebook GICR save problem\n"); enable_pseudo_nmi = false; } of_node_put(np); } #else /* CONFIG_ARM64_PSEUDO_NMI */ static inline void detect_system_supports_pseudo_nmi(void) { } #endif void __init init_cpu_features(struct cpuinfo_arm64 *info) { /* Before we start using the tables, make sure it is sorted */ sort_ftr_regs(); init_cpu_ftr_reg(SYS_CTR_EL0, info->reg_ctr); init_cpu_ftr_reg(SYS_DCZID_EL0, info->reg_dczid); init_cpu_ftr_reg(SYS_CNTFRQ_EL0, info->reg_cntfrq); init_cpu_ftr_reg(SYS_ID_AA64DFR0_EL1, info->reg_id_aa64dfr0); init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2); init_cpu_ftr_reg(SYS_ID_AA64ISAR3_EL1, info->reg_id_aa64isar3); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); init_cpu_ftr_reg(SYS_ID_AA64MMFR3_EL1, info->reg_id_aa64mmfr3); init_cpu_ftr_reg(SYS_ID_AA64MMFR4_EL1, info->reg_id_aa64mmfr4); init_cpu_ftr_reg(SYS_ID_AA64PFR0_EL1, info->reg_id_aa64pfr0); init_cpu_ftr_reg(SYS_ID_AA64PFR1_EL1, info->reg_id_aa64pfr1); init_cpu_ftr_reg(SYS_ID_AA64PFR2_EL1, info->reg_id_aa64pfr2); init_cpu_ftr_reg(SYS_ID_AA64ZFR0_EL1, info->reg_id_aa64zfr0); init_cpu_ftr_reg(SYS_ID_AA64SMFR0_EL1, info->reg_id_aa64smfr0); init_cpu_ftr_reg(SYS_ID_AA64FPFR0_EL1, info->reg_id_aa64fpfr0); if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) init_32bit_cpu_features(&info->aarch32); if (IS_ENABLED(CONFIG_ARM64_SVE) && id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { unsigned long cpacr = cpacr_save_enable_kernel_sve(); vec_init_vq_map(ARM64_VEC_SVE); cpacr_restore(cpacr); } if (IS_ENABLED(CONFIG_ARM64_SME) && id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { unsigned long cpacr = cpacr_save_enable_kernel_sme(); /* * We mask out SMPS since even if the hardware * supports priorities the kernel does not at present * and we block access to them. */ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; vec_init_vq_map(ARM64_VEC_SME); cpacr_restore(cpacr); } if (id_aa64pfr1_mte(info->reg_id_aa64pfr1)) init_cpu_ftr_reg(SYS_GMID_EL1, info->reg_gmid); } static void update_cpu_ftr_reg(struct arm64_ftr_reg *reg, u64 new) { const struct arm64_ftr_bits *ftrp; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { s64 ftr_cur = arm64_ftr_value(ftrp, reg->sys_val); s64 ftr_new = arm64_ftr_value(ftrp, new); if (ftr_cur == ftr_new) continue; /* Find a safe value */ ftr_new = arm64_ftr_safe_value(ftrp, ftr_new, ftr_cur); reg->sys_val = arm64_ftr_set_value(ftrp, reg->sys_val, ftr_new); } } static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); if (!regp) return 0; update_cpu_ftr_reg(regp, val); if ((boot & regp->strict_mask) == (val & regp->strict_mask)) return 0; pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016llx, CPU%d: %#016llx\n", regp->name, boot, cpu, val); return 1; } static void relax_cpu_ftr_reg(u32 sys_id, int field) { const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); if (!regp) return; for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) { if (ftrp->shift == field) { regp->strict_mask &= ~arm64_ftr_mask(ftrp); break; } } /* Bogus field? */ WARN_ON(!ftrp->width); } static void lazy_init_32bit_cpu_features(struct cpuinfo_arm64 *info, struct cpuinfo_arm64 *boot) { static bool boot_cpu_32bit_regs_overridden = false; if (!allow_mismatched_32bit_el0 || boot_cpu_32bit_regs_overridden) return; if (id_aa64pfr0_32bit_el0(boot->reg_id_aa64pfr0)) return; boot->aarch32 = info->aarch32; init_32bit_cpu_features(&boot->aarch32); boot_cpu_32bit_regs_overridden = true; } static int update_32bit_cpu_features(int cpu, struct cpuinfo_32bit *info, struct cpuinfo_32bit *boot) { int taint = 0; u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); /* * If we don't have AArch32 at EL1, then relax the strictness of * EL1-dependent register fields to avoid spurious sanity check fails. */ if (!id_aa64pfr0_32bit_el1(pfr0)) { relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_EL1_SMC_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virt_frac_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Sec_frac_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Virtualization_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_Security_SHIFT); relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_EL1_ProgMod_SHIFT); } taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, info->reg_id_dfr0, boot->reg_id_dfr0); taint |= check_update_ftr_reg(SYS_ID_DFR1_EL1, cpu, info->reg_id_dfr1, boot->reg_id_dfr1); taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, info->reg_id_isar0, boot->reg_id_isar0); taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, info->reg_id_isar1, boot->reg_id_isar1); taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, info->reg_id_isar2, boot->reg_id_isar2); taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, info->reg_id_isar3, boot->reg_id_isar3); taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, info->reg_id_isar4, boot->reg_id_isar4); taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, info->reg_id_isar5, boot->reg_id_isar5); taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, info->reg_id_isar6, boot->reg_id_isar6); /* * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and * ACTLR formats could differ across CPUs and therefore would have to * be trapped for virtualization anyway. */ taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, info->reg_id_mmfr0, boot->reg_id_mmfr0); taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, info->reg_id_mmfr1, boot->reg_id_mmfr1); taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, info->reg_id_mmfr2, boot->reg_id_mmfr2); taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, info->reg_id_mmfr3, boot->reg_id_mmfr3); taint |= check_update_ftr_reg(SYS_ID_MMFR4_EL1, cpu, info->reg_id_mmfr4, boot->reg_id_mmfr4); taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu, info->reg_id_mmfr5, boot->reg_id_mmfr5); taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, info->reg_id_pfr0, boot->reg_id_pfr0); taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, info->reg_id_pfr1, boot->reg_id_pfr1); taint |= check_update_ftr_reg(SYS_ID_PFR2_EL1, cpu, info->reg_id_pfr2, boot->reg_id_pfr2); taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, info->reg_mvfr0, boot->reg_mvfr0); taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, info->reg_mvfr1, boot->reg_mvfr1); taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, info->reg_mvfr2, boot->reg_mvfr2); return taint; } /* * Update system wide CPU feature registers with the values from a * non-boot CPU. Also performs SANITY checks to make sure that there * aren't any insane variations from that of the boot CPU. */ void update_cpu_features(int cpu, struct cpuinfo_arm64 *info, struct cpuinfo_arm64 *boot) { int taint = 0; /* * The kernel can handle differing I-cache policies, but otherwise * caches should look identical. Userspace JITs will make use of * *minLine. */ taint |= check_update_ftr_reg(SYS_CTR_EL0, cpu, info->reg_ctr, boot->reg_ctr); /* * Userspace may perform DC ZVA instructions. Mismatched block sizes * could result in too much or too little memory being zeroed if a * process is preempted and migrated between CPUs. */ taint |= check_update_ftr_reg(SYS_DCZID_EL0, cpu, info->reg_dczid, boot->reg_dczid); /* If different, timekeeping will be broken (especially with KVM) */ taint |= check_update_ftr_reg(SYS_CNTFRQ_EL0, cpu, info->reg_cntfrq, boot->reg_cntfrq); /* * The kernel uses self-hosted debug features and expects CPUs to * support identical debug features. We presently need CTX_CMPs, WRPs, * and BRPs to be identical. * ID_AA64DFR1 is currently RES0. */ taint |= check_update_ftr_reg(SYS_ID_AA64DFR0_EL1, cpu, info->reg_id_aa64dfr0, boot->reg_id_aa64dfr0); taint |= check_update_ftr_reg(SYS_ID_AA64DFR1_EL1, cpu, info->reg_id_aa64dfr1, boot->reg_id_aa64dfr1); /* * Even in big.LITTLE, processors should be identical instruction-set * wise. */ taint |= check_update_ftr_reg(SYS_ID_AA64ISAR0_EL1, cpu, info->reg_id_aa64isar0, boot->reg_id_aa64isar0); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, info->reg_id_aa64isar1, boot->reg_id_aa64isar1); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu, info->reg_id_aa64isar2, boot->reg_id_aa64isar2); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR3_EL1, cpu, info->reg_id_aa64isar3, boot->reg_id_aa64isar3); /* * Differing PARange support is fine as long as all peripherals and * memory are mapped within the minimum PARange of all CPUs. * Linux should not care about secure memory. */ taint |= check_update_ftr_reg(SYS_ID_AA64MMFR0_EL1, cpu, info->reg_id_aa64mmfr0, boot->reg_id_aa64mmfr0); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR1_EL1, cpu, info->reg_id_aa64mmfr1, boot->reg_id_aa64mmfr1); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); taint |= check_update_ftr_reg(SYS_ID_AA64MMFR3_EL1, cpu, info->reg_id_aa64mmfr3, boot->reg_id_aa64mmfr3); taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, info->reg_id_aa64pfr1, boot->reg_id_aa64pfr1); taint |= check_update_ftr_reg(SYS_ID_AA64PFR2_EL1, cpu, info->reg_id_aa64pfr2, boot->reg_id_aa64pfr2); taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); taint |= check_update_ftr_reg(SYS_ID_AA64SMFR0_EL1, cpu, info->reg_id_aa64smfr0, boot->reg_id_aa64smfr0); taint |= check_update_ftr_reg(SYS_ID_AA64FPFR0_EL1, cpu, info->reg_id_aa64fpfr0, boot->reg_id_aa64fpfr0); /* Probe vector lengths */ if (IS_ENABLED(CONFIG_ARM64_SVE) && id_aa64pfr0_sve(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1))) { if (!system_capabilities_finalized()) { unsigned long cpacr = cpacr_save_enable_kernel_sve(); vec_update_vq_map(ARM64_VEC_SVE); cpacr_restore(cpacr); } } if (IS_ENABLED(CONFIG_ARM64_SME) && id_aa64pfr1_sme(read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1))) { unsigned long cpacr = cpacr_save_enable_kernel_sme(); /* * We mask out SMPS since even if the hardware * supports priorities the kernel does not at present * and we block access to them. */ info->reg_smidr = read_cpuid(SMIDR_EL1) & ~SMIDR_EL1_SMPS; /* Probe vector lengths */ if (!system_capabilities_finalized()) vec_update_vq_map(ARM64_VEC_SME); cpacr_restore(cpacr); } /* * The kernel uses the LDGM/STGM instructions and the number of tags * they read/write depends on the GMID_EL1.BS field. Check that the * value is the same on all CPUs. */ if (IS_ENABLED(CONFIG_ARM64_MTE) && id_aa64pfr1_mte(info->reg_id_aa64pfr1)) { taint |= check_update_ftr_reg(SYS_GMID_EL1, cpu, info->reg_gmid, boot->reg_gmid); } /* * If we don't have AArch32 at all then skip the checks entirely * as the register values may be UNKNOWN and we're not going to be * using them for anything. * * This relies on a sanitised view of the AArch64 ID registers * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last. */ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { lazy_init_32bit_cpu_features(info, boot); taint |= update_32bit_cpu_features(cpu, &info->aarch32, &boot->aarch32); } /* * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ if (taint) { pr_warn_once("Unsupported CPU feature variation detected.\n"); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } } u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); if (!regp) return 0; return regp->sys_val; } EXPORT_SYMBOL_GPL(read_sanitised_ftr_reg); #define read_sysreg_case(r) \ case r: val = read_sysreg_s(r); break; /* * __read_sysreg_by_encoding() - Used by a STARTING cpu before cpuinfo is populated. * Read the system register on the current CPU */ u64 __read_sysreg_by_encoding(u32 sys_id) { struct arm64_ftr_reg *regp; u64 val; switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); read_sysreg_case(SYS_ID_PFR1_EL1); read_sysreg_case(SYS_ID_PFR2_EL1); read_sysreg_case(SYS_ID_DFR0_EL1); read_sysreg_case(SYS_ID_DFR1_EL1); read_sysreg_case(SYS_ID_MMFR0_EL1); read_sysreg_case(SYS_ID_MMFR1_EL1); read_sysreg_case(SYS_ID_MMFR2_EL1); read_sysreg_case(SYS_ID_MMFR3_EL1); read_sysreg_case(SYS_ID_MMFR4_EL1); read_sysreg_case(SYS_ID_MMFR5_EL1); read_sysreg_case(SYS_ID_ISAR0_EL1); read_sysreg_case(SYS_ID_ISAR1_EL1); read_sysreg_case(SYS_ID_ISAR2_EL1); read_sysreg_case(SYS_ID_ISAR3_EL1); read_sysreg_case(SYS_ID_ISAR4_EL1); read_sysreg_case(SYS_ID_ISAR5_EL1); read_sysreg_case(SYS_ID_ISAR6_EL1); read_sysreg_case(SYS_MVFR0_EL1); read_sysreg_case(SYS_MVFR1_EL1); read_sysreg_case(SYS_MVFR2_EL1); read_sysreg_case(SYS_ID_AA64PFR0_EL1); read_sysreg_case(SYS_ID_AA64PFR1_EL1); read_sysreg_case(SYS_ID_AA64PFR2_EL1); read_sysreg_case(SYS_ID_AA64ZFR0_EL1); read_sysreg_case(SYS_ID_AA64SMFR0_EL1); read_sysreg_case(SYS_ID_AA64FPFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR0_EL1); read_sysreg_case(SYS_ID_AA64DFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR0_EL1); read_sysreg_case(SYS_ID_AA64MMFR1_EL1); read_sysreg_case(SYS_ID_AA64MMFR2_EL1); read_sysreg_case(SYS_ID_AA64MMFR3_EL1); read_sysreg_case(SYS_ID_AA64MMFR4_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); read_sysreg_case(SYS_ID_AA64ISAR2_EL1); read_sysreg_case(SYS_ID_AA64ISAR3_EL1); read_sysreg_case(SYS_CNTFRQ_EL0); read_sysreg_case(SYS_CTR_EL0); read_sysreg_case(SYS_DCZID_EL0); default: BUG(); return 0; } regp = get_arm64_ftr_reg(sys_id); if (regp) { val &= ~regp->override->mask; val |= (regp->override->val & regp->override->mask); } return val; } #include <linux/irqchip/arm-gic-v3.h> static bool has_always(const struct arm64_cpu_capabilities *entry, int scope) { return true; } static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { int val, min, max; u64 tmp; val = cpuid_feature_extract_field_width(reg, entry->field_pos, entry->field_width, entry->sign); tmp = entry->min_field_value; tmp <<= entry->field_pos; min = cpuid_feature_extract_field_width(tmp, entry->field_pos, entry->field_width, entry->sign); tmp = entry->max_field_value; tmp <<= entry->field_pos; max = cpuid_feature_extract_field_width(tmp, entry->field_pos, entry->field_width, entry->sign); return val >= min && val <= max; } static u64 read_scoped_sysreg(const struct arm64_cpu_capabilities *entry, int scope) { WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); if (scope == SCOPE_SYSTEM) return read_sanitised_ftr_reg(entry->sys_reg); else return __read_sysreg_by_encoding(entry->sys_reg); } static bool has_user_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { int mask; struct arm64_ftr_reg *regp; u64 val = read_scoped_sysreg(entry, scope); regp = get_arm64_ftr_reg(entry->sys_reg); if (!regp) return false; mask = cpuid_feature_extract_unsigned_field_width(regp->user_mask, entry->field_pos, entry->field_width); if (!mask) return false; return feature_matches(val, entry); } static bool has_cpuid_feature(const struct arm64_cpu_capabilities *entry, int scope) { u64 val = read_scoped_sysreg(entry, scope); return feature_matches(val, entry); } const struct cpumask *system_32bit_el0_cpumask(void) { if (!system_supports_32bit_el0()) return cpu_none_mask; if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) return cpu_32bit_el0_mask; return cpu_possible_mask; } static int __init parse_32bit_el0_param(char *str) { allow_mismatched_32bit_el0 = true; return 0; } early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param); static ssize_t aarch32_el0_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct cpumask *mask = system_32bit_el0_cpumask(); return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask)); } static const DEVICE_ATTR_RO(aarch32_el0); static int __init aarch32_el0_sysfs_init(void) { struct device *dev_root; int ret = 0; if (!allow_mismatched_32bit_el0) return 0; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = device_create_file(dev_root, &dev_attr_aarch32_el0); put_device(dev_root); } return ret; } device_initcall(aarch32_el0_sysfs_init); static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope) { if (!has_cpuid_feature(entry, scope)) return allow_mismatched_32bit_el0; if (scope == SCOPE_SYSTEM) pr_info("detected: 32-bit EL0 Support\n"); return true; } static bool has_useable_gicv3_cpuif(const struct arm64_cpu_capabilities *entry, int scope) { bool has_sre; if (!has_cpuid_feature(entry, scope)) return false; has_sre = gic_enable_sre(); if (!has_sre) pr_warn_once("%s present but disabled by higher exception level\n", entry->desc); return has_sre; } static bool has_cache_idc(const struct arm64_cpu_capabilities *entry, int scope) { u64 ctr; if (scope == SCOPE_SYSTEM) ctr = arm64_ftr_reg_ctrel0.sys_val; else ctr = read_cpuid_effective_cachetype(); return ctr & BIT(CTR_EL0_IDC_SHIFT); } static void cpu_emulate_effective_ctr(const struct arm64_cpu_capabilities *__unused) { /* * If the CPU exposes raw CTR_EL0.IDC = 0, while effectively * CTR_EL0.IDC = 1 (from CLIDR values), we need to trap accesses * to the CTR_EL0 on this CPU and emulate it with the real/safe * value. */ if (!(read_cpuid_cachetype() & BIT(CTR_EL0_IDC_SHIFT))) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } static bool has_cache_dic(const struct arm64_cpu_capabilities *entry, int scope) { u64 ctr; if (scope == SCOPE_SYSTEM) ctr = arm64_ftr_reg_ctrel0.sys_val; else ctr = read_cpuid_cachetype(); return ctr & BIT(CTR_EL0_DIC_SHIFT); } static bool __maybe_unused has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) { /* * Kdump isn't guaranteed to power-off all secondary CPUs, CNP * may share TLB entries with a CPU stuck in the crashed * kernel. */ if (is_kdump_kernel()) return false; if (cpus_have_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP)) return false; return has_cpuid_feature(entry, scope); } static bool __meltdown_safe = true; static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, int scope) { /* List of CPUs that are not vulnerable and don't need KPTI */ static const struct midr_range kpti_safe_list[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A35), MIDR_ALL_VERSIONS(MIDR_CORTEX_A53), MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_GOLD), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_2XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_3XX_SILVER), MIDR_ALL_VERSIONS(MIDR_QCOM_KRYO_4XX_SILVER), { /* sentinel */ } }; char const *str = "kpti command line option"; bool meltdown_safe; meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list); /* Defer to CPU feature registers */ if (has_cpuid_feature(entry, scope)) meltdown_safe = true; if (!meltdown_safe) __meltdown_safe = false; /* * For reasons that aren't entirely clear, enabling KPTI on Cavium * ThunderX leads to apparent I-cache corruption of kernel text, which * ends as well as you might imagine. Don't even try. We cannot rely * on the cpus_have_*cap() helpers here to detect the CPU erratum * because cpucap detection order may change. However, since we know * affected CPUs are always in a homogeneous configuration, it is * safe to rely on this_cpu_has_cap() here. */ if (this_cpu_has_cap(ARM64_WORKAROUND_CAVIUM_27456)) { str = "ARM64_WORKAROUND_CAVIUM_27456"; __kpti_forced = -1; } /* Useful for KASLR robustness */ if (kaslr_enabled() && kaslr_requires_kpti()) { if (!__kpti_forced) { str = "KASLR"; __kpti_forced = 1; } } if (cpu_mitigations_off() && !__kpti_forced) { str = "mitigations=off"; __kpti_forced = -1; } if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) { pr_info_once("kernel page table isolation disabled by kernel configuration\n"); return false; } /* Forced? */ if (__kpti_forced) { pr_info_once("kernel page table isolation forced %s by %s\n", __kpti_forced > 0 ? "ON" : "OFF", str); return __kpti_forced > 0; } return !meltdown_safe; } static bool has_nv1(const struct arm64_cpu_capabilities *entry, int scope) { /* * Although the Apple M2 family appears to support NV1, the * PTW barfs on the nVHE EL2 S1 page table format. Pretend * that it doesn't support NV1 at all. */ static const struct midr_range nv1_ni_list[] = { MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_PRO), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD_MAX), MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE_MAX), {} }; return (__system_matches_cap(ARM64_HAS_NESTED_VIRT) && !(has_cpuid_feature(entry, scope) || is_midr_in_range_list(read_cpuid_id(), nv1_ni_list))); } #if defined(ID_AA64MMFR0_EL1_TGRAN_LPA2) && defined(ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2) static bool has_lpa2_at_stage1(u64 mmfr0) { unsigned int tgran; tgran = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_SHIFT); return tgran == ID_AA64MMFR0_EL1_TGRAN_LPA2; } static bool has_lpa2_at_stage2(u64 mmfr0) { unsigned int tgran; tgran = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_2_SHIFT); return tgran == ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_LPA2; } static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope) { u64 mmfr0; mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); return has_lpa2_at_stage1(mmfr0) && has_lpa2_at_stage2(mmfr0); } #else static bool has_lpa2(const struct arm64_cpu_capabilities *entry, int scope) { return false; } #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define KPTI_NG_TEMP_VA (-(1UL << PMD_SHIFT)) extern void create_kpti_ng_temp_pgd(pgd_t *pgdir, phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), int flags); static phys_addr_t __initdata kpti_ng_temp_alloc; static phys_addr_t __init kpti_ng_pgd_alloc(int shift) { kpti_ng_temp_alloc -= PAGE_SIZE; return kpti_ng_temp_alloc; } static int __init __kpti_install_ng_mappings(void *__unused) { typedef void (kpti_remap_fn)(int, int, phys_addr_t, unsigned long); extern kpti_remap_fn idmap_kpti_install_ng_mappings; kpti_remap_fn *remap_fn; int cpu = smp_processor_id(); int levels = CONFIG_PGTABLE_LEVELS; int order = order_base_2(levels); u64 kpti_ng_temp_pgd_pa = 0; pgd_t *kpti_ng_temp_pgd; u64 alloc = 0; if (levels == 5 && !pgtable_l5_enabled()) levels = 4; else if (levels == 4 && !pgtable_l4_enabled()) levels = 3; remap_fn = (void *)__pa_symbol(idmap_kpti_install_ng_mappings); if (!cpu) { alloc = __get_free_pages(GFP_ATOMIC | __GFP_ZERO, order); kpti_ng_temp_pgd = (pgd_t *)(alloc + (levels - 1) * PAGE_SIZE); kpti_ng_temp_alloc = kpti_ng_temp_pgd_pa = __pa(kpti_ng_temp_pgd); // // Create a minimal page table hierarchy that permits us to map // the swapper page tables temporarily as we traverse them. // // The physical pages are laid out as follows: // // +--------+-/-------+-/------ +-/------ +-\\\--------+ // : PTE[] : | PMD[] : | PUD[] : | P4D[] : ||| PGD[] : // +--------+-\-------+-\------ +-\------ +-///--------+ // ^ // The first page is mapped into this hierarchy at a PMD_SHIFT // aligned virtual address, so that we can manipulate the PTE // level entries while the mapping is active. The first entry // covers the PTE[] page itself, the remaining entries are free // to be used as a ad-hoc fixmap. // create_kpti_ng_temp_pgd(kpti_ng_temp_pgd, __pa(alloc), KPTI_NG_TEMP_VA, PAGE_SIZE, PAGE_KERNEL, kpti_ng_pgd_alloc, 0); } cpu_install_idmap(); remap_fn(cpu, num_online_cpus(), kpti_ng_temp_pgd_pa, KPTI_NG_TEMP_VA); cpu_uninstall_idmap(); if (!cpu) { free_pages(alloc, order); arm64_use_ng_mappings = true; } return 0; } static void __init kpti_install_ng_mappings(void) { /* Check whether KPTI is going to be used */ if (!arm64_kernel_unmapped_at_el0()) return; /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not * created any global mappings at all. */ if (arm64_use_ng_mappings) return; stop_machine(__kpti_install_ng_mappings, NULL, cpu_online_mask); } #else static inline void kpti_install_ng_mappings(void) { } #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ static void cpu_enable_kpti(struct arm64_cpu_capabilities const *cap) { if (__this_cpu_read(this_cpu_vector) == vectors) { const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); __this_cpu_write(this_cpu_vector, v); } } static int __init parse_kpti(char *str) { bool enabled; int ret = kstrtobool(str, &enabled); if (ret) return ret; __kpti_forced = enabled ? 1 : -1; return 0; } early_param("kpti", parse_kpti); #ifdef CONFIG_ARM64_HW_AFDBM static struct cpumask dbm_cpus __read_mostly; static inline void __cpu_enable_hw_dbm(void) { u64 tcr = read_sysreg(tcr_el1) | TCR_HD; write_sysreg(tcr, tcr_el1); isb(); local_flush_tlb_all(); } static bool cpu_has_broken_dbm(void) { /* List of CPUs which have broken DBM support. */ static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif #ifdef CONFIG_ARM64_ERRATUM_2051678 MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 2), #endif {}, }; return is_midr_in_range_list(read_cpuid_id(), cpus); } static bool cpu_can_use_dbm(const struct arm64_cpu_capabilities *cap) { return has_cpuid_feature(cap, SCOPE_LOCAL_CPU) && !cpu_has_broken_dbm(); } static void cpu_enable_hw_dbm(struct arm64_cpu_capabilities const *cap) { if (cpu_can_use_dbm(cap)) { __cpu_enable_hw_dbm(); cpumask_set_cpu(smp_processor_id(), &dbm_cpus); } } static bool has_hw_dbm(const struct arm64_cpu_capabilities *cap, int __unused) { /* * DBM is a non-conflicting feature. i.e, the kernel can safely * run a mix of CPUs with and without the feature. So, we * unconditionally enable the capability to allow any late CPU * to use the feature. We only enable the control bits on the * CPU, if it is supported. */ return true; } #endif #ifdef CONFIG_ARM64_AMU_EXTN /* * The "amu_cpus" cpumask only signals that the CPU implementation for the * flagged CPUs supports the Activity Monitors Unit (AMU) but does not provide * information regarding all the events that it supports. When a CPU bit is * set in the cpumask, the user of this feature can only rely on the presence * of the 4 fixed counters for that CPU. But this does not guarantee that the * counters are enabled or access to these counters is enabled by code * executed at higher exception levels (firmware). */ static struct cpumask amu_cpus __read_mostly; bool cpu_has_amu_feat(int cpu) { return cpumask_test_cpu(cpu, &amu_cpus); } int get_cpu_with_amu_feat(void) { return cpumask_any(&amu_cpus); } static void cpu_amu_enable(struct arm64_cpu_capabilities const *cap) { if (has_cpuid_feature(cap, SCOPE_LOCAL_CPU)) { cpumask_set_cpu(smp_processor_id(), &amu_cpus); /* 0 reference values signal broken/disabled counters */ if (!this_cpu_has_cap(ARM64_WORKAROUND_2457168)) update_freq_counters_refs(); } } static bool has_amu(const struct arm64_cpu_capabilities *cap, int __unused) { /* * The AMU extension is a non-conflicting feature: the kernel can * safely run a mix of CPUs with and without support for the * activity monitors extension. Therefore, unconditionally enable * the capability to allow any late CPU to use the feature. * * With this feature unconditionally enabled, the cpu_enable * function will be called for all CPUs that match the criteria, * including secondary and hotplugged, marking this feature as * present on that respective CPU. The enable function will also * print a detection message. */ return true; } #else int get_cpu_with_amu_feat(void) { return nr_cpu_ids; } #endif static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused) { return is_kernel_in_hyp_mode(); } static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) { /* * Copy register values that aren't redirected by hardware. * * Before code patching, we only set tpidr_el1, all CPUs need to copy * this value to tpidr_el2 before we patch the code. Once we've done * that, freshly-onlined CPUs will set tpidr_el2, so we don't need to * do anything here. */ if (!alternative_is_applied(ARM64_HAS_VIRT_HOST_EXTN)) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap, int scope) { if (kvm_get_mode() != KVM_MODE_NV) return false; if (!has_cpuid_feature(cap, scope)) { pr_warn("unavailable: %s\n", cap->desc); return false; } return true; } static bool hvhe_possible(const struct arm64_cpu_capabilities *entry, int __unused) { return arm64_test_sw_feature_override(ARM64_SW_FEATURE_OVERRIDE_HVHE); } #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { /* * We modify PSTATE. This won't work from irq context as the PSTATE * is discarded once we return from the exception. */ WARN_ON_ONCE(in_interrupt()); sysreg_clear_set(sctlr_el1, SCTLR_EL1_SPAN, 0); set_pstate_pan(1); } #endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_RAS_EXTN static void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused) { /* Firmware may have left a deferred SError in this register. */ write_sysreg_s(0, SYS_DISR_EL1); } #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_PTR_AUTH static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, int scope) { int boot_val, sec_val; /* We don't expect to be called with SCOPE_SYSTEM */ WARN_ON(scope == SCOPE_SYSTEM); /* * The ptr-auth feature levels are not intercompatible with lower * levels. Hence we must match ptr-auth feature level of the secondary * CPUs with that of the boot CPU. The level of boot cpu is fetched * from the sanitised register whereas direct register read is done for * the secondary CPUs. * The sanitised feature state is guaranteed to match that of the * boot CPU as a mismatched secondary CPU is parked before it gets * a chance to update the state, with the capability. */ boot_val = cpuid_feature_extract_field(read_sanitised_ftr_reg(entry->sys_reg), entry->field_pos, entry->sign); if (scope & SCOPE_BOOT_CPU) return boot_val >= entry->min_field_value; /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), entry->field_pos, entry->sign); return (sec_val >= entry->min_field_value) && (sec_val == boot_val); } static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { bool api = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); bool apa = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); bool apa3 = has_address_auth_cpucap(cpucap_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); return apa || apa3 || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, int __unused) { bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3); return gpa || gpa3 || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ #ifdef CONFIG_ARM64_E0PD static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) { if (this_cpu_has_cap(ARM64_HAS_E0PD)) sysreg_clear_set(tcr_el1, 0, TCR_E0PD1); } #endif /* CONFIG_ARM64_E0PD */ #ifdef CONFIG_ARM64_PSEUDO_NMI static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, int scope) { /* * ARM64_HAS_GIC_CPUIF_SYSREGS has a lower index, and is a boot CPU * feature, so will be detected earlier. */ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_MASKING <= ARM64_HAS_GIC_CPUIF_SYSREGS); if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS)) return false; return enable_pseudo_nmi; } static bool has_gic_prio_relaxed_sync(const struct arm64_cpu_capabilities *entry, int scope) { /* * If we're not using priority masking then we won't be poking PMR_EL1, * and there's no need to relax synchronization of writes to it, and * ICC_CTLR_EL1 might not be accessible and we must avoid reads from * that. * * ARM64_HAS_GIC_PRIO_MASKING has a lower index, and is a boot CPU * feature, so will be detected earlier. */ BUILD_BUG_ON(ARM64_HAS_GIC_PRIO_RELAXED_SYNC <= ARM64_HAS_GIC_PRIO_MASKING); if (!cpus_have_cap(ARM64_HAS_GIC_PRIO_MASKING)) return false; /* * When Priority Mask Hint Enable (PMHE) == 0b0, PMR is not used as a * hint for interrupt distribution, a DSB is not necessary when * unmasking IRQs via PMR, and we can relax the barrier to a NOP. * * Linux itself doesn't use 1:N distribution, so has no need to * set PMHE. The only reason to have it set is if EL3 requires it * (and we can't change it). */ return (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) == 0; } #endif #ifdef CONFIG_ARM64_BTI static void bti_enable(const struct arm64_cpu_capabilities *__unused) { /* * Use of X16/X17 for tail-calls and trampolines that jump to * function entry points using BR is a requirement for * marking binaries with GNU_PROPERTY_AARCH64_FEATURE_1_BTI. * So, be strict and forbid other BRs using other registers to * jump onto a PACIxSP instruction: */ sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); isb(); } #endif /* CONFIG_ARM64_BTI */ #ifdef CONFIG_ARM64_MTE static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) { sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0); mte_cpu_setup(); /* * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ if (try_page_mte_tagging(ZERO_PAGE(0))) { mte_clear_page_tags(lm_alias(empty_zero_page)); set_page_mte_tagged(ZERO_PAGE(0)); } kasan_init_hw_tags_cpu(); } #endif /* CONFIG_ARM64_MTE */ static void user_feature_fixup(void) { if (cpus_have_cap(ARM64_WORKAROUND_2658417)) { struct arm64_ftr_reg *regp; regp = get_arm64_ftr_reg(SYS_ID_AA64ISAR1_EL1); if (regp) regp->user_mask &= ~ID_AA64ISAR1_EL1_BF16_MASK; } if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_SSBS)) { struct arm64_ftr_reg *regp; regp = get_arm64_ftr_reg(SYS_ID_AA64PFR1_EL1); if (regp) regp->user_mask &= ~ID_AA64PFR1_EL1_SSBS_MASK; } } static void elf_hwcap_fixup(void) { #ifdef CONFIG_COMPAT if (cpus_have_cap(ARM64_WORKAROUND_1742098)) compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES; #endif /* CONFIG_COMPAT */ } #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { return kvm_get_mode() == KVM_MODE_PROTECTED; } #endif /* CONFIG_KVM */ static void cpu_trap_el0_impdef(const struct arm64_cpu_capabilities *__unused) { sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_TIDCP); } static void cpu_enable_dit(const struct arm64_cpu_capabilities *__unused) { set_pstate_dit(1); } static void cpu_enable_mops(const struct arm64_cpu_capabilities *__unused) { sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_MSCEn); } /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) { return !!(cap->type & ARM64_CPUCAP_OPTIONAL_FOR_LATE_CPU); } static bool cpucap_late_cpu_permitted(const struct arm64_cpu_capabilities *cap) { return !!(cap->type & ARM64_CPUCAP_PERMITTED_FOR_LATE_CPU); } static bool cpucap_panic_on_conflict(const struct arm64_cpu_capabilities *cap) { return !!(cap->type & ARM64_CPUCAP_PANIC_ON_CONFLICT); } static const struct arm64_cpu_capabilities arm64_features[] = { { .capability = ARM64_ALWAYS_BOOT, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_always, }, { .capability = ARM64_ALWAYS_SYSTEM, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_always, }, { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_GIC_CPUIF_SYSREGS, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_useable_gicv3_cpuif, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, GIC, IMP) }, { .desc = "Enhanced Counter Virtualization", .capability = ARM64_HAS_ECV, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, IMP) }, { .desc = "Enhanced Counter Virtualization (CNTPOFF)", .capability = ARM64_HAS_ECV_CNTPOFF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, ECV, CNTPOFF) }, #ifdef CONFIG_ARM64_PAN { .desc = "Privileged Access Never", .capability = ARM64_HAS_PAN, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_pan, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, IMP) }, #endif /* CONFIG_ARM64_PAN */ #ifdef CONFIG_ARM64_EPAN { .desc = "Enhanced Privileged Access Never", .capability = ARM64_HAS_EPAN, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, PAN, PAN3) }, #endif /* CONFIG_ARM64_EPAN */ #ifdef CONFIG_ARM64_LSE_ATOMICS { .desc = "LSE atomic instructions", .capability = ARM64_HAS_LSE_ATOMICS, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, ATOMIC, IMP) }, #endif /* CONFIG_ARM64_LSE_ATOMICS */ { .desc = "Virtualization Host Extensions", .capability = ARM64_HAS_VIRT_HOST_EXTN, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = runs_at_el2, .cpu_enable = cpu_copy_el2regs, }, { .desc = "Nested Virtualization Support", .capability = ARM64_HAS_NESTED_VIRT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_nested_virt_support, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2) }, { .capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_32bit_el0, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, EL0, AARCH32) }, #ifdef CONFIG_KVM { .desc = "32-bit EL1 Support", .capability = ARM64_HAS_32BIT_EL1, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, EL1, AARCH32) }, { .desc = "Protected KVM", .capability = ARM64_KVM_PROTECTED_MODE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = is_kvm_protected_mode, }, { .desc = "HCRX_EL2 register", .capability = ARM64_HAS_HCX, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HCX, IMP) }, #endif { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, .type = ARM64_CPUCAP_BOOT_RESTRICTED_CPU_LOCAL_FEATURE, .cpu_enable = cpu_enable_kpti, .matches = unmap_kernel_at_el0, /* * The ID feature fields below are used to indicate that * the CPU doesn't need KPTI. See unmap_kernel_at_el0 for * more details. */ ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, CSV3, IMP) }, { .capability = ARM64_HAS_FPSIMD, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_fpsimd, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, FP, IMP) }, #ifdef CONFIG_ARM64_PMEM { .desc = "Data cache clean to Point of Persistence", .capability = ARM64_HAS_DCPOP, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, DPB, IMP) }, { .desc = "Data cache clean to Point of Deep Persistence", .capability = ARM64_HAS_DCPODP, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, DPB, DPB2) }, #endif #ifdef CONFIG_ARM64_SVE { .desc = "Scalable Vector Extension", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SVE, .cpu_enable = cpu_enable_sve, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, SVE, IMP) }, #endif /* CONFIG_ARM64_SVE */ #ifdef CONFIG_ARM64_RAS_EXTN { .desc = "RAS Extension Support", .capability = ARM64_HAS_RAS_EXTN, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_clear_disr, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, RAS, IMP) }, #endif /* CONFIG_ARM64_RAS_EXTN */ #ifdef CONFIG_ARM64_AMU_EXTN { .desc = "Activity Monitors Unit (AMU)", .capability = ARM64_HAS_AMU_EXTN, .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .matches = has_amu, .cpu_enable = cpu_amu_enable, .cpus = &amu_cpus, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, AMU, IMP) }, #endif /* CONFIG_ARM64_AMU_EXTN */ { .desc = "Data cache clean to the PoU not required for I/D coherence", .capability = ARM64_HAS_CACHE_IDC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_idc, .cpu_enable = cpu_emulate_effective_ctr, }, { .desc = "Instruction cache invalidation not required for I/D coherence", .capability = ARM64_HAS_CACHE_DIC, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cache_dic, }, { .desc = "Stage-2 Force Write-Back", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_HAS_STAGE2_FWB, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, FWB, IMP) }, { .desc = "ARMv8.4 Translation Table Level", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_HAS_ARMv8_4_TTL, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, TTL, IMP) }, { .desc = "TLB range maintenance instructions", .capability = ARM64_HAS_TLB_RANGE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, TLB, RANGE) }, #ifdef CONFIG_ARM64_HW_AFDBM { .desc = "Hardware dirty bit management", .type = ARM64_CPUCAP_WEAK_LOCAL_CPU_FEATURE, .capability = ARM64_HW_DBM, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, .cpus = &dbm_cpus, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, HAFDBS, DBM) }, #endif { .desc = "CRC32 instructions", .capability = ARM64_HAS_CRC32, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, CRC32, IMP) }, { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SSBS, IMP) }, #ifdef CONFIG_ARM64_CNP { .desc = "Common not Private translations", .capability = ARM64_HAS_CNP, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_useable_cnp, .cpu_enable = cpu_enable_cnp, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, CnP, IMP) }, #endif { .desc = "Speculation barrier (SB)", .capability = ARM64_HAS_SB, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, SB, IMP) }, #ifdef CONFIG_ARM64_PTR_AUTH { .desc = "Address authentication (architected QARMA5 algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_address_auth_cpucap, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, APA, PAuth) }, { .desc = "Address authentication (architected QARMA3 algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_address_auth_cpucap, ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, APA3, PAuth) }, { .desc = "Address authentication (IMP DEF algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_address_auth_cpucap, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, API, PAuth) }, { .capability = ARM64_HAS_ADDRESS_AUTH, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_address_auth_metacap, }, { .desc = "Generic authentication (architected QARMA5 algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPA, IMP) }, { .desc = "Generic authentication (architected QARMA3 algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, GPA3, IMP) }, { .desc = "Generic authentication (IMP DEF algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, GPI, IMP) }, { .capability = ARM64_HAS_GENERIC_AUTH, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_generic_auth, }, #endif /* CONFIG_ARM64_PTR_AUTH */ #ifdef CONFIG_ARM64_PSEUDO_NMI { /* * Depends on having GICv3 */ .desc = "IRQ priority masking", .capability = ARM64_HAS_GIC_PRIO_MASKING, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = can_use_gic_priorities, }, { /* * Depends on ARM64_HAS_GIC_PRIO_MASKING */ .capability = ARM64_HAS_GIC_PRIO_RELAXED_SYNC, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_gic_prio_relaxed_sync, }, #endif #ifdef CONFIG_ARM64_E0PD { .desc = "E0PD", .capability = ARM64_HAS_E0PD, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .cpu_enable = cpu_enable_e0pd, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, E0PD, IMP) }, #endif { .desc = "Random Number Generator", .capability = ARM64_HAS_RNG, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR0_EL1, RNDR, IMP) }, #ifdef CONFIG_ARM64_BTI { .desc = "Branch Target Identification", .capability = ARM64_BTI, #ifdef CONFIG_ARM64_BTI_KERNEL .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, #else .type = ARM64_CPUCAP_SYSTEM_FEATURE, #endif .matches = has_cpuid_feature, .cpu_enable = bti_enable, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, BT, IMP) }, #endif #ifdef CONFIG_ARM64_MTE { .desc = "Memory Tagging Extension", .capability = ARM64_MTE, .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_mte, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE2) }, { .desc = "Asymmetric MTE Tag Check Fault", .capability = ARM64_MTE_ASYMM, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, MTE, MTE3) }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", .capability = ARM64_HAS_LDAPR, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP) }, { .desc = "Fine Grained Traps", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_HAS_FGT, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP) }, #ifdef CONFIG_ARM64_SME { .desc = "Scalable Matrix Extension", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SME, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_sme, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, IMP) }, /* FA64 should be sorted after the base SME capability */ { .desc = "FA64", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SME_FA64, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_fa64, ARM64_CPUID_FIELDS(ID_AA64SMFR0_EL1, FA64, IMP) }, { .desc = "SME2", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_SME2, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_sme2, ARM64_CPUID_FIELDS(ID_AA64PFR1_EL1, SME, SME2) }, #endif /* CONFIG_ARM64_SME */ { .desc = "WFx with timeout", .capability = ARM64_HAS_WFXT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, WFxT, IMP) }, { .desc = "Trap EL0 IMPLEMENTATION DEFINED functionality", .capability = ARM64_HAS_TIDCP1, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_trap_el0_impdef, ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, TIDCP1, IMP) }, { .desc = "Data independent timing control (DIT)", .capability = ARM64_HAS_DIT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_dit, ARM64_CPUID_FIELDS(ID_AA64PFR0_EL1, DIT, IMP) }, { .desc = "Memory Copy and Memory Set instructions", .capability = ARM64_HAS_MOPS, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_mops, ARM64_CPUID_FIELDS(ID_AA64ISAR2_EL1, MOPS, IMP) }, { .capability = ARM64_HAS_TCR2, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, TCRX, IMP) }, { .desc = "Stage-1 Permission Indirection Extension (S1PIE)", .capability = ARM64_HAS_S1PIE, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR3_EL1, S1PIE, IMP) }, { .desc = "VHE for hypervisor only", .capability = ARM64_KVM_HVHE, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = hvhe_possible, }, { .desc = "Enhanced Virtualization Traps", .capability = ARM64_HAS_EVT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_cpuid_feature, ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, { .desc = "52-bit Virtual Addressing for KVM (LPA2)", .capability = ARM64_HAS_LPA2, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_lpa2, }, { .desc = "FPMR", .type = ARM64_CPUCAP_SYSTEM_FEATURE, .capability = ARM64_HAS_FPMR, .matches = has_cpuid_feature, .cpu_enable = cpu_enable_fpmr, ARM64_CPUID_FIELDS(ID_AA64PFR2_EL1, FPMR, IMP) }, #ifdef CONFIG_ARM64_VA_BITS_52 { .capability = ARM64_HAS_VA52, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .matches = has_cpuid_feature, #ifdef CONFIG_ARM64_64K_PAGES .desc = "52-bit Virtual Addressing (LVA)", ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, VARange, 52) #else .desc = "52-bit Virtual Addressing (LPA2)", #ifdef CONFIG_ARM64_4K_PAGES ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN4, 52_BIT) #else ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, TGRAN16, 52_BIT) #endif #endif }, #endif { .desc = "NV1", .capability = ARM64_HAS_HCR_NV1, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .matches = has_nv1, ARM64_CPUID_FIELDS_NEG(ID_AA64MMFR4_EL1, E2H0, NI_NV1) }, {}, }; #define HWCAP_CPUID_MATCH(reg, field, min_value) \ .matches = has_user_cpuid_feature, \ ARM64_CPUID_FIELDS(reg, field, min_value) #define __HWCAP_CAP(name, cap_type, cap) \ .desc = name, \ .type = ARM64_CPUCAP_SYSTEM_FEATURE, \ .hwcap_type = cap_type, \ .hwcap = cap, \ #define HWCAP_CAP(reg, field, min_value, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ HWCAP_CPUID_MATCH(reg, field, min_value) \ } #define HWCAP_MULTI_CAP(list, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ .matches = cpucap_multi_entry_cap_matches, \ .match_list = list, \ } #define HWCAP_CAP_MATCH(match, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ .matches = match, \ } #ifdef CONFIG_ARM64_PTR_AUTH static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, APA, PAuth) }, { HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, APA3, PAuth) }, { HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, API, PAuth) }, {}, }; static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPA, IMP) }, { HWCAP_CPUID_MATCH(ID_AA64ISAR2_EL1, GPA3, IMP) }, { HWCAP_CPUID_MATCH(ID_AA64ISAR1_EL1, GPI, IMP) }, {}, }; #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR0_EL1, AES, PMULL, CAP_HWCAP, KERNEL_HWCAP_PMULL), HWCAP_CAP(ID_AA64ISAR0_EL1, AES, AES, CAP_HWCAP, KERNEL_HWCAP_AES), HWCAP_CAP(ID_AA64ISAR0_EL1, SHA1, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA1), HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA256, CAP_HWCAP, KERNEL_HWCAP_SHA2), HWCAP_CAP(ID_AA64ISAR0_EL1, SHA2, SHA512, CAP_HWCAP, KERNEL_HWCAP_SHA512), HWCAP_CAP(ID_AA64ISAR0_EL1, CRC32, IMP, CAP_HWCAP, KERNEL_HWCAP_CRC32), HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, IMP, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), HWCAP_CAP(ID_AA64ISAR0_EL1, ATOMIC, FEAT_LSE128, CAP_HWCAP, KERNEL_HWCAP_LSE128), HWCAP_CAP(ID_AA64ISAR0_EL1, RDM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), HWCAP_CAP(ID_AA64ISAR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SHA3), HWCAP_CAP(ID_AA64ISAR0_EL1, SM3, IMP, CAP_HWCAP, KERNEL_HWCAP_SM3), HWCAP_CAP(ID_AA64ISAR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SM4), HWCAP_CAP(ID_AA64ISAR0_EL1, DP, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), HWCAP_CAP(ID_AA64ISAR0_EL1, FHM, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM, CAP_HWCAP, KERNEL_HWCAP_FLAGM), HWCAP_CAP(ID_AA64ISAR0_EL1, TS, FLAGM2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), HWCAP_CAP(ID_AA64ISAR0_EL1, RNDR, IMP, CAP_HWCAP, KERNEL_HWCAP_RNG), HWCAP_CAP(ID_AA64PFR0_EL1, FP, IMP, CAP_HWCAP, KERNEL_HWCAP_FP), HWCAP_CAP(ID_AA64PFR0_EL1, FP, FP16, CAP_HWCAP, KERNEL_HWCAP_FPHP), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, IMP, CAP_HWCAP, KERNEL_HWCAP_ASIMD), HWCAP_CAP(ID_AA64PFR0_EL1, AdvSIMD, FP16, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), HWCAP_CAP(ID_AA64PFR0_EL1, DIT, IMP, CAP_HWCAP, KERNEL_HWCAP_DIT), HWCAP_CAP(ID_AA64PFR2_EL1, FPMR, IMP, CAP_HWCAP, KERNEL_HWCAP_FPMR), HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, IMP, CAP_HWCAP, KERNEL_HWCAP_DCPOP), HWCAP_CAP(ID_AA64ISAR1_EL1, DPB, DPB2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), HWCAP_CAP(ID_AA64ISAR1_EL1, JSCVT, IMP, CAP_HWCAP, KERNEL_HWCAP_JSCVT), HWCAP_CAP(ID_AA64ISAR1_EL1, FCMA, IMP, CAP_HWCAP, KERNEL_HWCAP_FCMA), HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, IMP, CAP_HWCAP, KERNEL_HWCAP_LRCPC), HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), HWCAP_CAP(ID_AA64ISAR1_EL1, LRCPC, LRCPC3, CAP_HWCAP, KERNEL_HWCAP_LRCPC3), HWCAP_CAP(ID_AA64ISAR1_EL1, FRINTTS, IMP, CAP_HWCAP, KERNEL_HWCAP_FRINT), HWCAP_CAP(ID_AA64ISAR1_EL1, SB, IMP, CAP_HWCAP, KERNEL_HWCAP_SB), HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_BF16), HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), HWCAP_CAP(ID_AA64ISAR2_EL1, LUT, IMP, CAP_HWCAP, KERNEL_HWCAP_LUT), HWCAP_CAP(ID_AA64ISAR3_EL1, FAMINMAX, IMP, CAP_HWCAP, KERNEL_HWCAP_FAMINMAX), HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2p1, CAP_HWCAP, KERNEL_HWCAP_SVE2P1), HWCAP_CAP(ID_AA64ZFR0_EL1, SVEver, SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), HWCAP_CAP(ID_AA64ZFR0_EL1, AES, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEAES), HWCAP_CAP(ID_AA64ZFR0_EL1, AES, PMULL128, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), HWCAP_CAP(ID_AA64ZFR0_EL1, BitPerm, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), HWCAP_CAP(ID_AA64ZFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE_B16B16), HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), HWCAP_CAP(ID_AA64ZFR0_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_SVE_EBF16), HWCAP_CAP(ID_AA64ZFR0_EL1, SHA3, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), HWCAP_CAP(ID_AA64ZFR0_EL1, SM4, IMP, CAP_HWCAP, KERNEL_HWCAP_SVESM4), HWCAP_CAP(ID_AA64ZFR0_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), HWCAP_CAP(ID_AA64ZFR0_EL1, F32MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), HWCAP_CAP(ID_AA64ZFR0_EL1, F64MM, IMP, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(ID_AA64PFR1_EL1, SSBS, SSBS2, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI HWCAP_CAP(ID_AA64PFR1_EL1, BT, IMP, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE2, CAP_HWCAP, KERNEL_HWCAP_MTE), HWCAP_CAP(ID_AA64PFR1_EL1, MTE, MTE3, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ HWCAP_CAP(ID_AA64MMFR0_EL1, ECV, IMP, CAP_HWCAP, KERNEL_HWCAP_ECV), HWCAP_CAP(ID_AA64MMFR1_EL1, AFP, IMP, CAP_HWCAP, KERNEL_HWCAP_AFP), HWCAP_CAP(ID_AA64ISAR2_EL1, CSSC, IMP, CAP_HWCAP, KERNEL_HWCAP_CSSC), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRFM, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRFM), HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS), HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), HWCAP_CAP(ID_AA64SMFR0_EL1, LUTv2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_LUTV2), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2p1, CAP_HWCAP, KERNEL_HWCAP_SME2P1), HWCAP_CAP(ID_AA64SMFR0_EL1, SMEver, SME2, CAP_HWCAP, KERNEL_HWCAP_SME2), HWCAP_CAP(ID_AA64SMFR0_EL1, I16I64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I64), HWCAP_CAP(ID_AA64SMFR0_EL1, F64F64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F64F64), HWCAP_CAP(ID_AA64SMFR0_EL1, I16I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I16I32), HWCAP_CAP(ID_AA64SMFR0_EL1, B16B16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16B16), HWCAP_CAP(ID_AA64SMFR0_EL1, F16F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F16), HWCAP_CAP(ID_AA64SMFR0_EL1, F8F16, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F16), HWCAP_CAP(ID_AA64SMFR0_EL1, F8F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F8F32), HWCAP_CAP(ID_AA64SMFR0_EL1, I8I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_I8I32), HWCAP_CAP(ID_AA64SMFR0_EL1, F16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F16F32), HWCAP_CAP(ID_AA64SMFR0_EL1, B16F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_B16F32), HWCAP_CAP(ID_AA64SMFR0_EL1, BI32I32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_BI32I32), HWCAP_CAP(ID_AA64SMFR0_EL1, F32F32, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_F32F32), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8FMA), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP4), HWCAP_CAP(ID_AA64SMFR0_EL1, SF8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_SF8DP2), #endif /* CONFIG_ARM64_SME */ HWCAP_CAP(ID_AA64FPFR0_EL1, F8CVT, IMP, CAP_HWCAP, KERNEL_HWCAP_F8CVT), HWCAP_CAP(ID_AA64FPFR0_EL1, F8FMA, IMP, CAP_HWCAP, KERNEL_HWCAP_F8FMA), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP4, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP4), HWCAP_CAP(ID_AA64FPFR0_EL1, F8DP2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8DP2), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E4M3, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E4M3), HWCAP_CAP(ID_AA64FPFR0_EL1, F8E5M2, IMP, CAP_HWCAP, KERNEL_HWCAP_F8E5M2), {}, }; #ifdef CONFIG_COMPAT static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) { /* * Check that all of MVFR1_EL1.{SIMDSP, SIMDInt, SIMDLS} are available, * in line with that of arm32 as in vfp_init(). We make sure that the * check is future proof, by making sure value is non-zero. */ u32 mvfr1; WARN_ON(scope == SCOPE_LOCAL_CPU && preemptible()); if (scope == SCOPE_SYSTEM) mvfr1 = read_sanitised_ftr_reg(SYS_MVFR1_EL1); else mvfr1 = read_sysreg_s(SYS_MVFR1_EL1); return cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDSP_SHIFT) && cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDInt_SHIFT) && cpuid_feature_extract_unsigned_field(mvfr1, MVFR1_EL1_SIMDLS_SHIFT); } #endif static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), HWCAP_CAP(MVFR1_EL1, SIMDFMAC, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ HWCAP_CAP(MVFR0_EL1, FPDP, VFPv3, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), HWCAP_CAP(MVFR0_EL1, FPDP, VFPv3, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), HWCAP_CAP(MVFR1_EL1, FPHP, FP16, CAP_COMPAT_HWCAP, COMPAT_HWCAP_FPHP), HWCAP_CAP(MVFR1_EL1, SIMDHP, SIMDHP_FLOAT, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDHP), HWCAP_CAP(ID_ISAR5_EL1, AES, VMULL, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), HWCAP_CAP(ID_ISAR5_EL1, AES, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), HWCAP_CAP(ID_ISAR5_EL1, SHA1, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), HWCAP_CAP(ID_ISAR5_EL1, SHA2, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), HWCAP_CAP(ID_ISAR5_EL1, CRC32, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), HWCAP_CAP(ID_ISAR6_EL1, DP, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDDP), HWCAP_CAP(ID_ISAR6_EL1, FHM, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDFHM), HWCAP_CAP(ID_ISAR6_EL1, SB, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SB), HWCAP_CAP(ID_ISAR6_EL1, BF16, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_ASIMDBF16), HWCAP_CAP(ID_ISAR6_EL1, I8MM, IMP, CAP_COMPAT_HWCAP, COMPAT_HWCAP_I8MM), HWCAP_CAP(ID_PFR2_EL1, SSBS, IMP, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SSBS), #endif {}, }; static void cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap) { switch (cap->hwcap_type) { case CAP_HWCAP: cpu_set_feature(cap->hwcap); break; #ifdef CONFIG_COMPAT case CAP_COMPAT_HWCAP: compat_elf_hwcap |= (u32)cap->hwcap; break; case CAP_COMPAT_HWCAP2: compat_elf_hwcap2 |= (u32)cap->hwcap; break; #endif default: WARN_ON(1); break; } } /* Check if we have a particular HWCAP enabled */ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap) { bool rc; switch (cap->hwcap_type) { case CAP_HWCAP: rc = cpu_have_feature(cap->hwcap); break; #ifdef CONFIG_COMPAT case CAP_COMPAT_HWCAP: rc = (compat_elf_hwcap & (u32)cap->hwcap) != 0; break; case CAP_COMPAT_HWCAP2: rc = (compat_elf_hwcap2 & (u32)cap->hwcap) != 0; break; #endif default: WARN_ON(1); rc = false; } return rc; } static void setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps) { /* We support emulation of accesses to CPU ID feature registers */ cpu_set_named_feature(CPUID); for (; hwcaps->matches; hwcaps++) if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps))) cap_set_elf_hwcap(hwcaps); } static void update_cpu_capabilities(u16 scope_mask) { int i; const struct arm64_cpu_capabilities *caps; scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask) || cpus_have_cap(caps->capability) || !caps->matches(caps, cpucap_default_scope(caps))) continue; if (caps->desc && !caps->cpus) pr_info("detected: %s\n", caps->desc); __set_bit(caps->capability, system_cpucaps); if ((scope_mask & SCOPE_BOOT_CPU) && (caps->type & SCOPE_BOOT_CPU)) set_bit(caps->capability, boot_cpucaps); } } /* * Enable all the available capabilities on this CPU. The capabilities * with BOOT_CPU scope are handled separately and hence skipped here. */ static int cpu_enable_non_boot_scope_capabilities(void *__unused) { int i; u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU; for_each_available_cap(i) { const struct arm64_cpu_capabilities *cap = cpucap_ptrs[i]; if (WARN_ON(!cap)) continue; if (!(cap->type & non_boot_scope)) continue; if (cap->cpu_enable) cap->cpu_enable(cap); } return 0; } /* * Run through the enabled capabilities and enable() it on all active * CPUs */ static void __init enable_cpu_capabilities(u16 scope_mask) { int i; const struct arm64_cpu_capabilities *caps; bool boot_scope; scope_mask &= ARM64_CPUCAP_SCOPE_MASK; boot_scope = !!(scope_mask & SCOPE_BOOT_CPU); for (i = 0; i < ARM64_NCAPS; i++) { caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask) || !cpus_have_cap(caps->capability)) continue; if (boot_scope && caps->cpu_enable) /* * Capabilities with SCOPE_BOOT_CPU scope are finalised * before any secondary CPU boots. Thus, each secondary * will enable the capability as appropriate via * check_local_cpu_capabilities(). The only exception is * the boot CPU, for which the capability must be * enabled here. This approach avoids costly * stop_machine() calls for this case. */ caps->cpu_enable(caps); } /* * For all non-boot scope capabilities, use stop_machine() * as it schedules the work allowing us to modify PSTATE, * instead of on_each_cpu() which uses an IPI, giving us a * PSTATE that disappears when we return. */ if (!boot_scope) stop_machine(cpu_enable_non_boot_scope_capabilities, NULL, cpu_online_mask); } /* * Run through the list of capabilities to check for conflicts. * If the system has already detected a capability, take necessary * action on this CPU. */ static void verify_local_cpu_caps(u16 scope_mask) { int i; bool cpu_has_cap, system_has_cap; const struct arm64_cpu_capabilities *caps; scope_mask &= ARM64_CPUCAP_SCOPE_MASK; for (i = 0; i < ARM64_NCAPS; i++) { caps = cpucap_ptrs[i]; if (!caps || !(caps->type & scope_mask)) continue; cpu_has_cap = caps->matches(caps, SCOPE_LOCAL_CPU); system_has_cap = cpus_have_cap(caps->capability); if (system_has_cap) { /* * Check if the new CPU misses an advertised feature, * which is not safe to miss. */ if (!cpu_has_cap && !cpucap_late_cpu_optional(caps)) break; /* * We have to issue cpu_enable() irrespective of * whether the CPU has it or not, as it is enabeld * system wide. It is upto the call back to take * appropriate action on this CPU. */ if (caps->cpu_enable) caps->cpu_enable(caps); } else { /* * Check if the CPU has this capability if it isn't * safe to have when the system doesn't. */ if (cpu_has_cap && !cpucap_late_cpu_permitted(caps)) break; } } if (i < ARM64_NCAPS) { pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n", smp_processor_id(), caps->capability, caps->desc, system_has_cap, cpu_has_cap); if (cpucap_panic_on_conflict(caps)) cpu_panic_kernel(); else cpu_die_early(); } } /* * Check for CPU features that are used in early boot * based on the Boot CPU value. */ static void check_early_cpu_features(void) { verify_cpu_asid_bits(); verify_local_cpu_caps(SCOPE_BOOT_CPU); } static void __verify_local_elf_hwcaps(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) if (cpus_have_elf_hwcap(caps) && !caps->matches(caps, SCOPE_LOCAL_CPU)) { pr_crit("CPU%d: missing HWCAP: %s\n", smp_processor_id(), caps->desc); cpu_die_early(); } } static void verify_local_elf_hwcaps(void) { __verify_local_elf_hwcaps(arm64_elf_hwcaps); if (id_aa64pfr0_32bit_el0(read_cpuid(ID_AA64PFR0_EL1))) __verify_local_elf_hwcaps(compat_elf_hwcaps); } static void verify_sve_features(void) { unsigned long cpacr = cpacr_save_enable_kernel_sve(); if (vec_verify_vq_map(ARM64_VEC_SVE)) { pr_crit("CPU%d: SVE: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); } cpacr_restore(cpacr); } static void verify_sme_features(void) { unsigned long cpacr = cpacr_save_enable_kernel_sme(); if (vec_verify_vq_map(ARM64_VEC_SME)) { pr_crit("CPU%d: SME: vector length support mismatch\n", smp_processor_id()); cpu_die_early(); } cpacr_restore(cpacr); } static void verify_hyp_capabilities(void) { u64 safe_mmfr1, mmfr0, mmfr1; int parange, ipa_max; unsigned int safe_vmid_bits, vmid_bits; if (!IS_ENABLED(CONFIG_KVM)) return; safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); /* Verify VMID bits */ safe_vmid_bits = get_vmid_bits(safe_mmfr1); vmid_bits = get_vmid_bits(mmfr1); if (vmid_bits < safe_vmid_bits) { pr_crit("CPU%d: VMID width mismatch\n", smp_processor_id()); cpu_die_early(); } /* Verify IPA range */ parange = cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_PARANGE_SHIFT); ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange); if (ipa_max < get_kvm_ipa_limit()) { pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id()); cpu_die_early(); } } /* * Run through the enabled system capabilities and enable() it on this CPU. * The capabilities were decided based on the available CPUs at the boot time. * Any new CPU should match the system wide status of the capability. If the * new CPU doesn't have a capability which the system now has enabled, we * cannot do anything to fix it up and could cause unexpected failures. So * we park the CPU. */ static void verify_local_cpu_capabilities(void) { /* * The capabilities with SCOPE_BOOT_CPU are checked from * check_early_cpu_features(), as they need to be verified * on all secondary CPUs. */ verify_local_cpu_caps(SCOPE_ALL & ~SCOPE_BOOT_CPU); verify_local_elf_hwcaps(); if (system_supports_sve()) verify_sve_features(); if (system_supports_sme()) verify_sme_features(); if (is_hyp_mode_available()) verify_hyp_capabilities(); } void check_local_cpu_capabilities(void) { /* * All secondary CPUs should conform to the early CPU features * in use by the kernel based on boot CPU. */ check_early_cpu_features(); /* * If we haven't finalised the system capabilities, this CPU gets * a chance to update the errata work arounds and local features. * Otherwise, this CPU should verify that it has all the system * advertised capabilities. */ if (!system_capabilities_finalized()) update_cpu_capabilities(SCOPE_LOCAL_CPU); else verify_local_cpu_capabilities(); } bool this_cpu_has_cap(unsigned int n) { if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) { const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_LOCAL_CPU); } return false; } EXPORT_SYMBOL_GPL(this_cpu_has_cap); /* * This helper function is used in a narrow window when, * - The system wide safe registers are set with all the SMP CPUs and, * - The SYSTEM_FEATURE system_cpucaps may not have been set. */ static bool __maybe_unused __system_matches_cap(unsigned int n) { if (n < ARM64_NCAPS) { const struct arm64_cpu_capabilities *cap = cpucap_ptrs[n]; if (cap) return cap->matches(cap, SCOPE_SYSTEM); } return false; } void cpu_set_feature(unsigned int num) { set_bit(num, elf_hwcap); } bool cpu_have_feature(unsigned int num) { return test_bit(num, elf_hwcap); } EXPORT_SYMBOL_GPL(cpu_have_feature); unsigned long cpu_get_elf_hwcap(void) { /* * We currently only populate the first 32 bits of AT_HWCAP. Please * note that for userspace compatibility we guarantee that bits 62 * and 63 will always be returned as 0. */ return elf_hwcap[0]; } unsigned long cpu_get_elf_hwcap2(void) { return elf_hwcap[1]; } static void __init setup_boot_cpu_capabilities(void) { /* * The boot CPU's feature register values have been recorded. Detect * boot cpucaps and local cpucaps for the boot CPU, then enable and * patch alternatives for the available boot cpucaps. */ update_cpu_capabilities(SCOPE_BOOT_CPU | SCOPE_LOCAL_CPU); enable_cpu_capabilities(SCOPE_BOOT_CPU); apply_boot_alternatives(); } void __init setup_boot_cpu_features(void) { /* * Initialize the indirect array of CPU capabilities pointers before we * handle the boot CPU. */ init_cpucap_indirect_list(); /* * Detect broken pseudo-NMI. Must be called _before_ the call to * setup_boot_cpu_capabilities() since it interacts with * can_use_gic_priorities(). */ detect_system_supports_pseudo_nmi(); setup_boot_cpu_capabilities(); } static void __init setup_system_capabilities(void) { /* * The system-wide safe feature register values have been finalized. * Detect, enable, and patch alternatives for the available system * cpucaps. */ update_cpu_capabilities(SCOPE_SYSTEM); enable_cpu_capabilities(SCOPE_ALL & ~SCOPE_BOOT_CPU); apply_alternatives_all(); /* * Log any cpucaps with a cpumask as these aren't logged by * update_cpu_capabilities(). */ for (int i = 0; i < ARM64_NCAPS; i++) { const struct arm64_cpu_capabilities *caps = cpucap_ptrs[i]; if (caps && caps->cpus && caps->desc && cpumask_any(caps->cpus) < nr_cpu_ids) pr_info("detected: %s on CPU%*pbl\n", caps->desc, cpumask_pr_args(caps->cpus)); } /* * TTBR0 PAN doesn't have its own cpucap, so log it manually. */ if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); } void __init setup_system_features(void) { setup_system_capabilities(); kpti_install_ng_mappings(); sve_setup(); sme_setup(); /* * Check for sane CTR_EL0.CWG value. */ if (!cache_type_cwg()) pr_warn("No Cache Writeback Granule information, assuming %d\n", ARCH_DMA_MINALIGN); } void __init setup_user_features(void) { user_feature_fixup(); setup_elf_hwcaps(arm64_elf_hwcaps); if (system_supports_32bit_el0()) { setup_elf_hwcaps(compat_elf_hwcaps); elf_hwcap_fixup(); } minsigstksz_setup(); } static int enable_mismatched_32bit_el0(unsigned int cpu) { /* * The first 32-bit-capable CPU we detected and so can no longer * be offlined by userspace. -1 indicates we haven't yet onlined * a 32-bit-capable CPU. */ static int lucky_winner = -1; struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu); bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0); if (cpu_32bit) { cpumask_set_cpu(cpu, cpu_32bit_el0_mask); static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0); } if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit) return 0; if (lucky_winner >= 0) return 0; /* * We've detected a mismatch. We need to keep one of our CPUs with * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting * every CPU in the system for a 32-bit task. */ lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask, cpu_active_mask); get_cpu_device(lucky_winner)->offline_disabled = true; setup_elf_hwcaps(compat_elf_hwcaps); elf_hwcap_fixup(); pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", cpu, lucky_winner); return 0; } static int __init init_32bit_el0_mask(void) { if (!allow_mismatched_32bit_el0) return 0; if (!zalloc_cpumask_var(&cpu_32bit_el0_mask, GFP_KERNEL)) return -ENOMEM; return cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/mismatched_32bit_el0:online", enable_mismatched_32bit_el0, NULL); } subsys_initcall_sync(init_32bit_el0_mask); static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap) { cpu_enable_swapper_cnp(); } /* * We emulate only the following system register space. * Op0 = 0x3, CRn = 0x0, Op1 = 0x0, CRm = [0, 2 - 7] * See Table C5-6 System instruction encodings for System register accesses, * ARMv8 ARM(ARM DDI 0487A.f) for more details. */ static inline bool __attribute_const__ is_emulated(u32 id) { return (sys_reg_Op0(id) == 0x3 && sys_reg_CRn(id) == 0x0 && sys_reg_Op1(id) == 0x0 && (sys_reg_CRm(id) == 0 || ((sys_reg_CRm(id) >= 2) && (sys_reg_CRm(id) <= 7)))); } /* * With CRm == 0, reg should be one of : * MIDR_EL1, MPIDR_EL1 or REVIDR_EL1. */ static inline int emulate_id_reg(u32 id, u64 *valp) { switch (id) { case SYS_MIDR_EL1: *valp = read_cpuid_id(); break; case SYS_MPIDR_EL1: *valp = SYS_MPIDR_SAFE_VAL; break; case SYS_REVIDR_EL1: /* IMPLEMENTATION DEFINED values are emulated with 0 */ *valp = 0; break; default: return -EINVAL; } return 0; } static int emulate_sys_reg(u32 id, u64 *valp) { struct arm64_ftr_reg *regp; if (!is_emulated(id)) return -EINVAL; if (sys_reg_CRm(id) == 0) return emulate_id_reg(id, valp); regp = get_arm64_ftr_reg_nowarn(id); if (regp) *valp = arm64_ftr_reg_user_value(regp); else /* * The untracked registers are either IMPLEMENTATION DEFINED * (e.g, ID_AFR0_EL1) or reserved RAZ. */ *valp = 0; return 0; } int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt) { int rc; u64 val; rc = emulate_sys_reg(sys_reg, &val); if (!rc) { pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); } return rc; } bool try_emulate_mrs(struct pt_regs *regs, u32 insn) { u32 sys_reg, rt; if (compat_user_mode(regs) || !aarch64_insn_is_mrs(insn)) return false; /* * sys_reg values are defined as used in mrs/msr instruction. * shift the imm value to get the encoding. */ sys_reg = (u32)aarch64_insn_decode_immediate(AARCH64_INSN_IMM_16, insn) << 5; rt = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RT, insn); return do_emulate_mrs(regs, sys_reg, rt) == 0; } enum mitigation_state arm64_get_meltdown_state(void) { if (__meltdown_safe) return SPECTRE_UNAFFECTED; if (arm64_kernel_unmapped_at_el0()) return SPECTRE_MITIGATED; return SPECTRE_VULNERABLE; } ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) { switch (arm64_get_meltdown_state()) { case SPECTRE_UNAFFECTED: return sprintf(buf, "Not affected\n"); case SPECTRE_MITIGATED: return sprintf(buf, "Mitigation: PTI\n"); default: return sprintf(buf, "Vulnerable\n"); } }
9 55 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* fs/ internal definitions * * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ struct super_block; struct file_system_type; struct iomap; struct iomap_ops; struct linux_binprm; struct path; struct mount; struct shrink_control; struct fs_context; struct pipe_inode_info; struct iov_iter; struct mnt_idmap; /* * block/bdev.c */ #ifdef CONFIG_BLOCK extern void __init bdev_cache_init(void); #else static inline void bdev_cache_init(void) { } #endif /* CONFIG_BLOCK */ /* * buffer.c */ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, get_block_t *get_block, const struct iomap *iomap); /* * char_dev.c */ extern void __init chrdev_init(void); /* * fs_context.c */ extern const struct fs_context_operations legacy_fs_context_ops; extern int parse_monolithic_mount_data(struct fs_context *, void *); extern void vfs_clean_context(struct fs_context *fc); extern int finish_clean_context(struct fs_context *fc); /* * namei.c */ extern int filename_lookup(int dfd, struct filename *name, unsigned flags, struct path *path, struct path *root); int do_rmdir(int dfd, struct filename *name); int do_unlinkat(int dfd, struct filename *name); int may_linkat(struct mnt_idmap *idmap, const struct path *link); int do_renameat2(int olddfd, struct filename *oldname, int newdfd, struct filename *newname, unsigned int flags); int do_mkdirat(int dfd, struct filename *name, umode_t mode); int do_symlinkat(struct filename *from, int newdfd, struct filename *to); int do_linkat(int olddfd, struct filename *old, int newdfd, struct filename *new, int flags); int vfs_tmpfile(struct mnt_idmap *idmap, const struct path *parentpath, struct file *file, umode_t mode); /* * namespace.c */ extern struct vfsmount *lookup_mnt(const struct path *); extern int finish_automount(struct vfsmount *, const struct path *); extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); int mnt_get_write_access_file(struct file *file); void mnt_put_write_access_file(struct file *file); extern void dissolve_on_fput(struct vfsmount *); extern bool may_mount(void); int path_mount(const char *dev_name, struct path *path, const char *type_page, unsigned long flags, void *data_page); int path_umount(struct path *path, int flags); int show_path(struct seq_file *m, struct dentry *root); /* * fs_struct.c */ extern void chroot_fs_refs(const struct path *, const struct path *); /* * file_table.c */ struct file *alloc_empty_file(int flags, const struct cred *cred); struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred); struct file *alloc_empty_backing_file(int flags, const struct cred *cred); static inline void file_put_write_access(struct file *file) { put_write_access(file->f_inode); mnt_put_write_access(file->f_path.mnt); if (unlikely(file->f_mode & FMODE_BACKING)) mnt_put_write_access(backing_file_user_path(file)->mnt); } static inline void put_file_access(struct file *file) { if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) { i_readcount_dec(file->f_inode); } else if (file->f_mode & FMODE_WRITER) { file_put_write_access(file); } } /* * super.c */ extern int reconfigure_super(struct fs_context *); extern bool super_trylock_shared(struct super_block *sb); struct super_block *user_get_super(dev_t, bool excl); void put_super(struct super_block *sb); extern bool mount_capable(struct fs_context *); int sb_init_dio_done_wq(struct super_block *sb); /* * Prepare superblock for changing its read-only state (i.e., either remount * read-write superblock read-only or vice versa). After this function returns * mnt_is_readonly() will return true for any mount of the superblock if its * caller is able to observe any changes done by the remount. This holds until * sb_end_ro_state_change() is called. */ static inline void sb_start_ro_state_change(struct super_block *sb) { WRITE_ONCE(sb->s_readonly_remount, 1); /* * For RO->RW transition, the barrier pairs with the barrier in * mnt_is_readonly() making sure if mnt_is_readonly() sees SB_RDONLY * cleared, it will see s_readonly_remount set. * For RW->RO transition, the barrier pairs with the barrier in * mnt_get_write_access() before the mnt_is_readonly() check. * The barrier makes sure if mnt_get_write_access() sees MNT_WRITE_HOLD * already cleared, it will see s_readonly_remount set. */ smp_wmb(); } /* * Ends section changing read-only state of the superblock. After this function * returns if mnt_is_readonly() returns false, the caller will be able to * observe all the changes remount did to the superblock. */ static inline void sb_end_ro_state_change(struct super_block *sb) { /* * This barrier provides release semantics that pairs with * the smp_rmb() acquire semantics in mnt_is_readonly(). * This barrier pair ensure that when mnt_is_readonly() sees * 0 for sb->s_readonly_remount, it will also see all the * preceding flag changes that were made during the RO state * change. */ smp_wmb(); WRITE_ONCE(sb->s_readonly_remount, 0); } /* * open.c */ struct open_flags { int open_flag; umode_t mode; int acc_mode; int intent; int lookup_flags; }; extern struct file *do_filp_open(int dfd, struct filename *pathname, const struct open_flags *op); extern struct file *do_file_open_root(const struct path *, const char *, const struct open_flags *); extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); struct file *file_close_fd_locked(struct files_struct *files, unsigned fd); long do_ftruncate(struct file *file, loff_t length, int small); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); int chmod_common(const struct path *path, umode_t mode); int do_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag); int chown_common(const struct path *path, uid_t user, gid_t group); extern int vfs_open(const struct path *, struct file *); /* * inode.c */ extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc); int dentry_needs_remove_privs(struct mnt_idmap *, struct dentry *dentry); bool in_group_or_capable(struct mnt_idmap *idmap, const struct inode *inode, vfsgid_t vfsgid); /* * fs-writeback.c */ extern long get_nr_dirty_inodes(void); void invalidate_inodes(struct super_block *sb); /* * dcache.c */ extern int d_set_mounted(struct dentry *dentry); extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc); extern struct dentry *d_alloc_cursor(struct dentry *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); extern char *simple_dname(struct dentry *, char *, int); extern void dput_to_list(struct dentry *, struct list_head *); extern void shrink_dentry_list(struct list_head *); extern void shrink_dcache_for_umount(struct super_block *); extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seq); extern void d_genocide(struct dentry *); /* * pipe.c */ extern const struct file_operations pipefifo_fops; /* * fs_pin.c */ extern void group_pin_kill(struct hlist_head *p); extern void mnt_pin_kill(struct mount *m); /* * fs/nsfs.c */ extern const struct dentry_operations ns_dentry_operations; /* * fs/stat.c: */ int getname_statx_lookup_flags(int flags); int do_statx(int dfd, struct filename *filename, unsigned int flags, unsigned int mask, struct statx __user *buffer); /* * fs/splice.c: */ ssize_t splice_file_to_pipe(struct file *in, struct pipe_inode_info *opipe, loff_t *offset, size_t len, unsigned int flags); /* * fs/xattr.c: */ struct xattr_name { char name[XATTR_NAME_MAX + 1]; }; struct xattr_ctx { /* Value of attribute */ union { const void __user *cvalue; void __user *value; }; void *kvalue; size_t size; /* Attribute name */ struct xattr_name *kname; unsigned int flags; }; ssize_t do_getxattr(struct mnt_idmap *idmap, struct dentry *d, struct xattr_ctx *ctx); int setxattr_copy(const char __user *name, struct xattr_ctx *ctx); int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, struct xattr_ctx *ctx); int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode); #ifdef CONFIG_FS_POSIX_ACL int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, const void *kvalue, size_t size); ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, void *kvalue, size_t size); #else static inline int do_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, const void *kvalue, size_t size) { return -EOPNOTSUPP; } static inline ssize_t do_get_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, void *kvalue, size_t size) { return -EOPNOTSUPP; } #endif ssize_t __kernel_write_iter(struct file *file, struct iov_iter *from, loff_t *pos); /* * fs/attr.c */ struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns); struct mnt_idmap *mnt_idmap_get(struct mnt_idmap *idmap); void mnt_idmap_put(struct mnt_idmap *idmap); struct stashed_operations { void (*put_data)(void *data); int (*init_inode)(struct inode *inode, void *data); }; int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data, struct path *path); void stashed_dentry_prune(struct dentry *dentry);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef BLK_INTERNAL_H #define BLK_INTERNAL_H #include <linux/blk-crypto.h> #include <linux/memblock.h> /* for max_pfn/max_low_pfn */ #include <linux/sched/sysctl.h> #include <linux/timekeeping.h> #include <xen/xen.h> #include "blk-crypto-internal.h" struct elevator_type; /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ) extern struct dentry *blk_debugfs_root; struct blk_flush_queue { spinlock_t mq_flush_lock; unsigned int flush_pending_idx:1; unsigned int flush_running_idx:1; blk_status_t rq_status; unsigned long flush_pending_since; struct list_head flush_queue[2]; unsigned long flush_data_in_flight; struct request *flush_rq; }; bool is_flush_rq(struct request *req); struct blk_flush_queue *blk_alloc_flush_queue(int node, int cmd_size, gfp_t flags); void blk_free_flush_queue(struct blk_flush_queue *q); void blk_freeze_queue(struct request_queue *q); void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic); void blk_queue_start_drain(struct request_queue *q); int __bio_queue_enter(struct request_queue *q, struct bio *bio); void submit_bio_noacct_nocheck(struct bio *bio); void bio_await_chain(struct bio *bio); static inline bool blk_try_enter_queue(struct request_queue *q, bool pm) { rcu_read_lock(); if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter)) goto fail; /* * The code that increments the pm_only counter must ensure that the * counter is globally visible before the queue is unfrozen. */ if (blk_queue_pm_only(q) && (!pm || queue_rpm_status(q) == RPM_SUSPENDED)) goto fail_put; rcu_read_unlock(); return true; fail_put: blk_queue_exit(q); fail: rcu_read_unlock(); return false; } static inline int bio_queue_enter(struct bio *bio) { struct request_queue *q = bdev_get_queue(bio->bi_bdev); if (blk_try_enter_queue(q, false)) return 0; return __bio_queue_enter(q, bio); } static inline void blk_wait_io(struct completion *done) { /* Prevent hang_check timer from firing at us during very long I/O */ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2; if (timeout) while (!wait_for_completion_io_timeout(done, timeout)) ; else wait_for_completion_io(done); } #define BIO_INLINE_VECS 4 struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs, gfp_t gfp_mask); void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs); bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv, struct page *page, unsigned len, unsigned offset, bool *same_page); static inline bool biovec_phys_mergeable(struct request_queue *q, struct bio_vec *vec1, struct bio_vec *vec2) { unsigned long mask = queue_segment_boundary(q); phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset; phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset; /* * Merging adjacent physical pages may not work correctly under KMSAN * if their metadata pages aren't adjacent. Just disable merging. */ if (IS_ENABLED(CONFIG_KMSAN)) return false; if (addr1 + vec1->bv_len != addr2) return false; if (xen_domain() && !xen_biovec_phys_mergeable(vec1, vec2->bv_page)) return false; if ((addr1 | mask) != ((addr2 + vec2->bv_len - 1) | mask)) return false; return true; } static inline bool __bvec_gap_to_prev(const struct queue_limits *lim, struct bio_vec *bprv, unsigned int offset) { return (offset & lim->virt_boundary_mask) || ((bprv->bv_offset + bprv->bv_len) & lim->virt_boundary_mask); } /* * Check if adding a bio_vec after bprv with offset would create a gap in * the SG list. Most drivers don't care about this, but some do. */ static inline bool bvec_gap_to_prev(const struct queue_limits *lim, struct bio_vec *bprv, unsigned int offset) { if (!lim->virt_boundary_mask) return false; return __bvec_gap_to_prev(lim, bprv, offset); } static inline bool rq_mergeable(struct request *rq) { if (blk_rq_is_passthrough(rq)) return false; if (req_op(rq) == REQ_OP_FLUSH) return false; if (req_op(rq) == REQ_OP_WRITE_ZEROES) return false; if (req_op(rq) == REQ_OP_ZONE_APPEND) return false; if (rq->cmd_flags & REQ_NOMERGE_FLAGS) return false; if (rq->rq_flags & RQF_NOMERGE_FLAGS) return false; return true; } /* * There are two different ways to handle DISCARD merges: * 1) If max_discard_segments > 1, the driver treats every bio as a range and * send the bios to controller together. The ranges don't need to be * contiguous. * 2) Otherwise, the request will be normal read/write requests. The ranges * need to be contiguous. */ static inline bool blk_discard_mergable(struct request *req) { if (req_op(req) == REQ_OP_DISCARD && queue_max_discard_segments(req->q) > 1) return true; return false; } static inline unsigned int blk_rq_get_max_segments(struct request *rq) { if (req_op(rq) == REQ_OP_DISCARD) return queue_max_discard_segments(rq->q); return queue_max_segments(rq->q); } static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, enum req_op op) { if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); if (unlikely(op == REQ_OP_WRITE_ZEROES)) return q->limits.max_write_zeroes_sectors; return q->limits.max_sectors; } #ifdef CONFIG_BLK_DEV_INTEGRITY void blk_flush_integrity(void); bool __bio_integrity_endio(struct bio *); void bio_integrity_free(struct bio *bio); static inline bool bio_integrity_endio(struct bio *bio) { if (bio_integrity(bio)) return __bio_integrity_endio(bio); return true; } bool blk_integrity_merge_rq(struct request_queue *, struct request *, struct request *); bool blk_integrity_merge_bio(struct request_queue *, struct request *, struct bio *); static inline bool integrity_req_gap_back_merge(struct request *req, struct bio *next) { struct bio_integrity_payload *bip = bio_integrity(req->bio); struct bio_integrity_payload *bip_next = bio_integrity(next); return bvec_gap_to_prev(&req->q->limits, &bip->bip_vec[bip->bip_vcnt - 1], bip_next->bip_vec[0].bv_offset); } static inline bool integrity_req_gap_front_merge(struct request *req, struct bio *bio) { struct bio_integrity_payload *bip = bio_integrity(bio); struct bio_integrity_payload *bip_next = bio_integrity(req->bio); return bvec_gap_to_prev(&req->q->limits, &bip->bip_vec[bip->bip_vcnt - 1], bip_next->bip_vec[0].bv_offset); } extern const struct attribute_group blk_integrity_attr_group; #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline bool blk_integrity_merge_rq(struct request_queue *rq, struct request *r1, struct request *r2) { return true; } static inline bool blk_integrity_merge_bio(struct request_queue *rq, struct request *r, struct bio *b) { return true; } static inline bool integrity_req_gap_back_merge(struct request *req, struct bio *next) { return false; } static inline bool integrity_req_gap_front_merge(struct request *req, struct bio *bio) { return false; } static inline void blk_flush_integrity(void) { } static inline bool bio_integrity_endio(struct bio *bio) { return true; } static inline void bio_integrity_free(struct bio *bio) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ unsigned long blk_rq_timeout(unsigned long timeout); void blk_add_timer(struct request *req); enum bio_merge_status { BIO_MERGE_OK, BIO_MERGE_NONE, BIO_MERGE_FAILED, }; enum bio_merge_status bio_attempt_back_merge(struct request *req, struct bio *bio, unsigned int nr_segs); bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int nr_segs); bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio, unsigned int nr_segs); /* * Plug flush limits */ #define BLK_MAX_REQUEST_COUNT 32 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) /* * Internal elevator interface */ #define ELV_ON_HASH(rq) ((rq)->rq_flags & RQF_HASHED) bool blk_insert_flush(struct request *rq); int elevator_switch(struct request_queue *q, struct elevator_type *new_e); void elevator_disable(struct request_queue *q); void elevator_exit(struct request_queue *q); int elv_register_queue(struct request_queue *q, bool uevent); void elv_unregister_queue(struct request_queue *q); ssize_t part_size_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_stat_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_fail_show(struct device *dev, struct device_attribute *attr, char *buf); ssize_t part_fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count); ssize_t part_timeout_show(struct device *, struct device_attribute *, char *); ssize_t part_timeout_store(struct device *, struct device_attribute *, const char *, size_t); static inline bool bio_may_exceed_limits(struct bio *bio, const struct queue_limits *lim) { switch (bio_op(bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: case REQ_OP_WRITE_ZEROES: return true; /* non-trivial splitting decisions */ default: break; } /* * All drivers must accept single-segments bios that are <= PAGE_SIZE. * This is a quick and dirty check that relies on the fact that * bi_io_vec[0] is always valid if a bio has data. The check might * lead to occasional false negatives when bios are cloned, but compared * to the performance impact of cloned bios themselves the loop below * doesn't matter anyway. */ return lim->chunk_sectors || bio->bi_vcnt != 1 || bio->bi_io_vec->bv_len + bio->bi_io_vec->bv_offset > PAGE_SIZE; } struct bio *__bio_split_to_limits(struct bio *bio, const struct queue_limits *lim, unsigned int *nr_segs); int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs); bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, struct request *next); unsigned int blk_recalc_rq_segments(struct request *rq); bool blk_rq_merge_ok(struct request *rq, struct bio *bio); enum elv_merge blk_try_merge(struct request *rq, struct bio *bio); int blk_set_default_limits(struct queue_limits *lim); int blk_dev_init(void); /* * Contribute to IO statistics IFF: * * a) it's attached to a gendisk, and * b) the queue had IO stats enabled when this request was started */ static inline bool blk_do_io_stat(struct request *rq) { return (rq->rq_flags & RQF_IO_STAT) && !blk_rq_is_passthrough(rq); } void update_io_ticks(struct block_device *part, unsigned long now, bool end); unsigned int part_in_flight(struct block_device *part); static inline void req_set_nomerge(struct request_queue *q, struct request *req) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; } /* * Internal io_context interface */ struct io_cq *ioc_find_get_icq(struct request_queue *q); struct io_cq *ioc_lookup_icq(struct request_queue *q); #ifdef CONFIG_BLK_ICQ void ioc_clear_queue(struct request_queue *q); #else static inline void ioc_clear_queue(struct request_queue *q) { } #endif /* CONFIG_BLK_ICQ */ struct bio *__blk_queue_bounce(struct bio *bio, struct request_queue *q); static inline bool blk_queue_may_bounce(struct request_queue *q) { return IS_ENABLED(CONFIG_BOUNCE) && q->limits.bounce == BLK_BOUNCE_HIGH && max_low_pfn >= max_pfn; } static inline struct bio *blk_queue_bounce(struct bio *bio, struct request_queue *q) { if (unlikely(blk_queue_may_bounce(q) && bio_has_data(bio))) return __blk_queue_bounce(bio, q); return bio; } #ifdef CONFIG_BLK_DEV_ZONED void disk_init_zone_resources(struct gendisk *disk); void disk_free_zone_resources(struct gendisk *disk); static inline bool bio_zone_write_plugging(struct bio *bio) { return bio_flagged(bio, BIO_ZONE_WRITE_PLUGGING); } static inline bool bio_is_zone_append(struct bio *bio) { return bio_op(bio) == REQ_OP_ZONE_APPEND || bio_flagged(bio, BIO_EMULATES_ZONE_APPEND); } void blk_zone_write_plug_bio_merged(struct bio *bio); void blk_zone_write_plug_init_request(struct request *rq); static inline void blk_zone_update_request_bio(struct request *rq, struct bio *bio) { /* * For zone append requests, the request sector indicates the location * at which the BIO data was written. Return this value to the BIO * issuer through the BIO iter sector. * For plugged zone writes, which include emulated zone append, we need * the original BIO sector so that blk_zone_write_plug_bio_endio() can * lookup the zone write plug. */ if (req_op(rq) == REQ_OP_ZONE_APPEND || bio_zone_write_plugging(bio)) bio->bi_iter.bi_sector = rq->__sector; } void blk_zone_write_plug_bio_endio(struct bio *bio); static inline void blk_zone_bio_endio(struct bio *bio) { /* * For write BIOs to zoned devices, signal the completion of the BIO so * that the next write BIO can be submitted by zone write plugging. */ if (bio_zone_write_plugging(bio)) blk_zone_write_plug_bio_endio(bio); } void blk_zone_write_plug_finish_request(struct request *rq); static inline void blk_zone_finish_request(struct request *rq) { if (rq->rq_flags & RQF_ZONE_WRITE_PLUGGING) blk_zone_write_plug_finish_request(rq); } int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg); int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ static inline void disk_init_zone_resources(struct gendisk *disk) { } static inline void disk_free_zone_resources(struct gendisk *disk) { } static inline bool bio_zone_write_plugging(struct bio *bio) { return false; } static inline bool bio_is_zone_append(struct bio *bio) { return false; } static inline void blk_zone_write_plug_bio_merged(struct bio *bio) { } static inline void blk_zone_write_plug_init_request(struct request *rq) { } static inline void blk_zone_update_request_bio(struct request *rq, struct bio *bio) { } static inline void blk_zone_bio_endio(struct bio *bio) { } static inline void blk_zone_finish_request(struct request *rq) { } static inline int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg) { return -ENOTTY; } static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode, unsigned int cmd, unsigned long arg) { return -ENOTTY; } #endif /* CONFIG_BLK_DEV_ZONED */ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); void bdev_unhash(struct block_device *bdev); void bdev_drop(struct block_device *bdev); int blk_alloc_ext_minor(void); void blk_free_ext_minor(unsigned int minor); #define ADDPART_FLAG_NONE 0 #define ADDPART_FLAG_RAID 1 #define ADDPART_FLAG_WHOLEDISK 2 int bdev_add_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); int bdev_del_partition(struct gendisk *disk, int partno); int bdev_resize_partition(struct gendisk *disk, int partno, sector_t start, sector_t length); void drop_partition(struct block_device *part); void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors); struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, struct lock_class_key *lkclass); int bio_add_hw_page(struct request_queue *q, struct bio *bio, struct page *page, unsigned int len, unsigned int offset, unsigned int max_sectors, bool *same_page); /* * Clean up a page appropriately, where the page may be pinned, may have a * ref taken on it or neither. */ static inline void bio_release_page(struct bio *bio, struct page *page) { if (bio_flagged(bio, BIO_PAGE_PINNED)) unpin_user_page(page); } struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id); int disk_scan_partitions(struct gendisk *disk, blk_mode_t mode); int disk_alloc_events(struct gendisk *disk); void disk_add_events(struct gendisk *disk); void disk_del_events(struct gendisk *disk); void disk_release_events(struct gendisk *disk); void disk_block_events(struct gendisk *disk); void disk_unblock_events(struct gendisk *disk); void disk_flush_events(struct gendisk *disk, unsigned int mask); extern struct device_attribute dev_attr_events; extern struct device_attribute dev_attr_events_async; extern struct device_attribute dev_attr_events_poll_msecs; extern struct attribute_group blk_trace_attr_group; blk_mode_t file_to_blk_mode(struct file *file); int truncate_bdev_range(struct block_device *bdev, blk_mode_t mode, loff_t lstart, loff_t lend); long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); extern const struct address_space_operations def_blk_aops; int disk_register_independent_access_ranges(struct gendisk *disk); void disk_unregister_independent_access_ranges(struct gendisk *disk); #ifdef CONFIG_FAIL_MAKE_REQUEST bool should_fail_request(struct block_device *part, unsigned int bytes); #else /* CONFIG_FAIL_MAKE_REQUEST */ static inline bool should_fail_request(struct block_device *part, unsigned int bytes) { return false; } #endif /* CONFIG_FAIL_MAKE_REQUEST */ /* * Optimized request reference counting. Ideally we'd make timeouts be more * clever, as that's the only reason we need references at all... But until * this happens, this is faster than using refcount_t. Also see: * * abc54d634334 ("io_uring: switch to atomic_t for io_kiocb reference count") */ #define req_ref_zero_or_close_to_overflow(req) \ ((unsigned int) atomic_read(&(req->ref)) + 127u <= 127u) static inline bool req_ref_inc_not_zero(struct request *req) { return atomic_inc_not_zero(&req->ref); } static inline bool req_ref_put_and_test(struct request *req) { WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); return atomic_dec_and_test(&req->ref); } static inline void req_ref_set(struct request *req, int value) { atomic_set(&req->ref, value); } static inline int req_ref_read(struct request *req) { return atomic_read(&req->ref); } static inline u64 blk_time_get_ns(void) { struct blk_plug *plug = current->plug; if (!plug || !in_task()) return ktime_get_ns(); /* * 0 could very well be a valid time, but rather than flag "this is * a valid timestamp" separately, just accept that we'll do an extra * ktime_get_ns() if we just happen to get 0 as the current time. */ if (!plug->cur_ktime) { plug->cur_ktime = ktime_get_ns(); current->flags |= PF_BLOCK_TS; } return plug->cur_ktime; } static inline ktime_t blk_time_get(void) { return ns_to_ktime(blk_time_get_ns()); } /* * From most significant bit: * 1 bit: reserved for other usage, see below * 12 bits: original size of bio * 51 bits: issue time of bio */ #define BIO_ISSUE_RES_BITS 1 #define BIO_ISSUE_SIZE_BITS 12 #define BIO_ISSUE_RES_SHIFT (64 - BIO_ISSUE_RES_BITS) #define BIO_ISSUE_SIZE_SHIFT (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS) #define BIO_ISSUE_TIME_MASK ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1) #define BIO_ISSUE_SIZE_MASK \ (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT) #define BIO_ISSUE_RES_MASK (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1)) /* Reserved bit for blk-throtl */ #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63) static inline u64 __bio_issue_time(u64 time) { return time & BIO_ISSUE_TIME_MASK; } static inline u64 bio_issue_time(struct bio_issue *issue) { return __bio_issue_time(issue->value); } static inline sector_t bio_issue_size(struct bio_issue *issue) { return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT); } static inline void bio_issue_init(struct bio_issue *issue, sector_t size) { size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1; issue->value = ((issue->value & BIO_ISSUE_RES_MASK) | (blk_time_get_ns() & BIO_ISSUE_TIME_MASK) | ((u64)size << BIO_ISSUE_SIZE_SHIFT)); } void bdev_release(struct file *bdev_file); int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder, const struct blk_holder_ops *hops, struct file *bdev_file); int bdev_permission(dev_t dev, blk_mode_t mode, void *holder); #endif /* BLK_INTERNAL_H */
9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010-2011 Canonical Ltd <jeremy.kerr@canonical.com> * Copyright (C) 2011-2012 Linaro Ltd <mturquette@linaro.org> * * Standard functionality for the common clock API. See Documentation/driver-api/clk.rst */ #include <linux/clk.h> #include <linux/clk-provider.h> #include <linux/clk/clk-conf.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/err.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/of.h> #include <linux/device.h> #include <linux/init.h> #include <linux/pm_runtime.h> #include <linux/sched.h> #include <linux/clkdev.h> #include "clk.h" static DEFINE_SPINLOCK(enable_lock); static DEFINE_MUTEX(prepare_lock); static struct task_struct *prepare_owner; static struct task_struct *enable_owner; static int prepare_refcnt; static int enable_refcnt; static HLIST_HEAD(clk_root_list); static HLIST_HEAD(clk_orphan_list); static LIST_HEAD(clk_notifier_list); /* List of registered clks that use runtime PM */ static HLIST_HEAD(clk_rpm_list); static DEFINE_MUTEX(clk_rpm_list_lock); static const struct hlist_head *all_lists[] = { &clk_root_list, &clk_orphan_list, NULL, }; /*** private data structures ***/ struct clk_parent_map { const struct clk_hw *hw; struct clk_core *core; const char *fw_name; const char *name; int index; }; struct clk_core { const char *name; const struct clk_ops *ops; struct clk_hw *hw; struct module *owner; struct device *dev; struct hlist_node rpm_node; struct device_node *of_node; struct clk_core *parent; struct clk_parent_map *parents; u8 num_parents; u8 new_parent_index; unsigned long rate; unsigned long req_rate; unsigned long new_rate; struct clk_core *new_parent; struct clk_core *new_child; unsigned long flags; bool orphan; bool rpm_enabled; unsigned int enable_count; unsigned int prepare_count; unsigned int protect_count; unsigned long min_rate; unsigned long max_rate; unsigned long accuracy; int phase; struct clk_duty duty; struct hlist_head children; struct hlist_node child_node; struct hlist_head clks; unsigned int notifier_count; #ifdef CONFIG_DEBUG_FS struct dentry *dentry; struct hlist_node debug_node; #endif struct kref ref; }; #define CREATE_TRACE_POINTS #include <trace/events/clk.h> struct clk { struct clk_core *core; struct device *dev; const char *dev_id; const char *con_id; unsigned long min_rate; unsigned long max_rate; unsigned int exclusive_count; struct hlist_node clks_node; }; /*** runtime pm ***/ static int clk_pm_runtime_get(struct clk_core *core) { if (!core->rpm_enabled) return 0; return pm_runtime_resume_and_get(core->dev); } static void clk_pm_runtime_put(struct clk_core *core) { if (!core->rpm_enabled) return; pm_runtime_put_sync(core->dev); } /** * clk_pm_runtime_get_all() - Runtime "get" all clk provider devices * * Call clk_pm_runtime_get() on all runtime PM enabled clks in the clk tree so * that disabling unused clks avoids a deadlock where a device is runtime PM * resuming/suspending and the runtime PM callback is trying to grab the * prepare_lock for something like clk_prepare_enable() while * clk_disable_unused_subtree() holds the prepare_lock and is trying to runtime * PM resume/suspend the device as well. * * Context: Acquires the 'clk_rpm_list_lock' and returns with the lock held on * success. Otherwise the lock is released on failure. * * Return: 0 on success, negative errno otherwise. */ static int clk_pm_runtime_get_all(void) { int ret; struct clk_core *core, *failed; /* * Grab the list lock to prevent any new clks from being registered * or unregistered until clk_pm_runtime_put_all(). */ mutex_lock(&clk_rpm_list_lock); /* * Runtime PM "get" all the devices that are needed for the clks * currently registered. Do this without holding the prepare_lock, to * avoid the deadlock. */ hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { ret = clk_pm_runtime_get(core); if (ret) { failed = core; pr_err("clk: Failed to runtime PM get '%s' for clk '%s'\n", dev_name(failed->dev), failed->name); goto err; } } return 0; err: hlist_for_each_entry(core, &clk_rpm_list, rpm_node) { if (core == failed) break; clk_pm_runtime_put(core); } mutex_unlock(&clk_rpm_list_lock); return ret; } /** * clk_pm_runtime_put_all() - Runtime "put" all clk provider devices * * Put the runtime PM references taken in clk_pm_runtime_get_all() and release * the 'clk_rpm_list_lock'. */ static void clk_pm_runtime_put_all(void) { struct clk_core *core; hlist_for_each_entry(core, &clk_rpm_list, rpm_node) clk_pm_runtime_put(core); mutex_unlock(&clk_rpm_list_lock); } static void clk_pm_runtime_init(struct clk_core *core) { struct device *dev = core->dev; if (dev && pm_runtime_enabled(dev)) { core->rpm_enabled = true; mutex_lock(&clk_rpm_list_lock); hlist_add_head(&core->rpm_node, &clk_rpm_list); mutex_unlock(&clk_rpm_list_lock); } } /*** locking ***/ static void clk_prepare_lock(void) { if (!mutex_trylock(&prepare_lock)) { if (prepare_owner == current) { prepare_refcnt++; return; } mutex_lock(&prepare_lock); } WARN_ON_ONCE(prepare_owner != NULL); WARN_ON_ONCE(prepare_refcnt != 0); prepare_owner = current; prepare_refcnt = 1; } static void clk_prepare_unlock(void) { WARN_ON_ONCE(prepare_owner != current); WARN_ON_ONCE(prepare_refcnt == 0); if (--prepare_refcnt) return; prepare_owner = NULL; mutex_unlock(&prepare_lock); } static unsigned long clk_enable_lock(void) __acquires(enable_lock) { unsigned long flags; /* * On UP systems, spin_trylock_irqsave() always returns true, even if * we already hold the lock. So, in that case, we rely only on * reference counting. */ if (!IS_ENABLED(CONFIG_SMP) || !spin_trylock_irqsave(&enable_lock, flags)) { if (enable_owner == current) { enable_refcnt++; __acquire(enable_lock); if (!IS_ENABLED(CONFIG_SMP)) local_save_flags(flags); return flags; } spin_lock_irqsave(&enable_lock, flags); } WARN_ON_ONCE(enable_owner != NULL); WARN_ON_ONCE(enable_refcnt != 0); enable_owner = current; enable_refcnt = 1; return flags; } static void clk_enable_unlock(unsigned long flags) __releases(enable_lock) { WARN_ON_ONCE(enable_owner != current); WARN_ON_ONCE(enable_refcnt == 0); if (--enable_refcnt) { __release(enable_lock); return; } enable_owner = NULL; spin_unlock_irqrestore(&enable_lock, flags); } static bool clk_core_rate_is_protected(struct clk_core *core) { return core->protect_count; } static bool clk_core_is_prepared(struct clk_core *core) { bool ret = false; /* * .is_prepared is optional for clocks that can prepare * fall back to software usage counter if it is missing */ if (!core->ops->is_prepared) return core->prepare_count; if (!clk_pm_runtime_get(core)) { ret = core->ops->is_prepared(core->hw); clk_pm_runtime_put(core); } return ret; } static bool clk_core_is_enabled(struct clk_core *core) { bool ret = false; /* * .is_enabled is only mandatory for clocks that gate * fall back to software usage counter if .is_enabled is missing */ if (!core->ops->is_enabled) return core->enable_count; /* * Check if clock controller's device is runtime active before * calling .is_enabled callback. If not, assume that clock is * disabled, because we might be called from atomic context, from * which pm_runtime_get() is not allowed. * This function is called mainly from clk_disable_unused_subtree, * which ensures proper runtime pm activation of controller before * taking enable spinlock, but the below check is needed if one tries * to call it from other places. */ if (core->rpm_enabled) { pm_runtime_get_noresume(core->dev); if (!pm_runtime_active(core->dev)) { ret = false; goto done; } } /* * This could be called with the enable lock held, or from atomic * context. If the parent isn't enabled already, we can't do * anything here. We can also assume this clock isn't enabled. */ if ((core->flags & CLK_OPS_PARENT_ENABLE) && core->parent) if (!clk_core_is_enabled(core->parent)) { ret = false; goto done; } ret = core->ops->is_enabled(core->hw); done: if (core->rpm_enabled) pm_runtime_put(core->dev); return ret; } /*** helper functions ***/ const char *__clk_get_name(const struct clk *clk) { return !clk ? NULL : clk->core->name; } EXPORT_SYMBOL_GPL(__clk_get_name); const char *clk_hw_get_name(const struct clk_hw *hw) { return hw->core->name; } EXPORT_SYMBOL_GPL(clk_hw_get_name); struct clk_hw *__clk_get_hw(struct clk *clk) { return !clk ? NULL : clk->core->hw; } EXPORT_SYMBOL_GPL(__clk_get_hw); unsigned int clk_hw_get_num_parents(const struct clk_hw *hw) { return hw->core->num_parents; } EXPORT_SYMBOL_GPL(clk_hw_get_num_parents); struct clk_hw *clk_hw_get_parent(const struct clk_hw *hw) { return hw->core->parent ? hw->core->parent->hw : NULL; } EXPORT_SYMBOL_GPL(clk_hw_get_parent); static struct clk_core *__clk_lookup_subtree(const char *name, struct clk_core *core) { struct clk_core *child; struct clk_core *ret; if (!strcmp(core->name, name)) return core; hlist_for_each_entry(child, &core->children, child_node) { ret = __clk_lookup_subtree(name, child); if (ret) return ret; } return NULL; } static struct clk_core *clk_core_lookup(const char *name) { struct clk_core *root_clk; struct clk_core *ret; if (!name) return NULL; /* search the 'proper' clk tree first */ hlist_for_each_entry(root_clk, &clk_root_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; } /* if not found, then search the orphan tree */ hlist_for_each_entry(root_clk, &clk_orphan_list, child_node) { ret = __clk_lookup_subtree(name, root_clk); if (ret) return ret; } return NULL; } #ifdef CONFIG_OF static int of_parse_clkspec(const struct device_node *np, int index, const char *name, struct of_phandle_args *out_args); static struct clk_hw * of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec); #else static inline int of_parse_clkspec(const struct device_node *np, int index, const char *name, struct of_phandle_args *out_args) { return -ENOENT; } static inline struct clk_hw * of_clk_get_hw_from_clkspec(struct of_phandle_args *clkspec) { return ERR_PTR(-ENOENT); } #endif /** * clk_core_get - Find the clk_core parent of a clk * @core: clk to find parent of * @p_index: parent index to search for * * This is the preferred method for clk providers to find the parent of a * clk when that parent is external to the clk controller. The parent_names * array is indexed and treated as a local name matching a string in the device * node's 'clock-names' property or as the 'con_id' matching the device's * dev_name() in a clk_lookup. This allows clk providers to use their own * namespace instead of looking for a globally unique parent string. * * For example the following DT snippet would allow a clock registered by the * clock-controller@c001 that has a clk_init_data::parent_data array * with 'xtal' in the 'name' member to find the clock provided by the * clock-controller@f00abcd without needing to get the globally unique name of * the xtal clk. * * parent: clock-controller@f00abcd { * reg = <0xf00abcd 0xabcd>; * #clock-cells = <0>; * }; * * clock-controller@c001 { * reg = <0xc001 0xf00d>; * clocks = <&parent>; * clock-names = "xtal"; * #clock-cells = <1>; * }; * * Returns: -ENOENT when the provider can't be found or the clk doesn't * exist in the provider or the name can't be found in the DT node or * in a clkdev lookup. NULL when the provider knows about the clk but it * isn't provided on this system. * A valid clk_core pointer when the clk can be found in the provider. */ static struct clk_core *clk_core_get(struct clk_core *core, u8 p_index) { const char *name = core->parents[p_index].fw_name; int index = core->parents[p_index].index; struct clk_hw *hw = ERR_PTR(-ENOENT); struct device *dev = core->dev; const char *dev_id = dev ? dev_name(dev) : NULL; struct device_node *np = core->of_node; struct of_phandle_args clkspec; if (np && (name || index >= 0) && !of_parse_clkspec(np, index, name, &clkspec)) { hw = of_clk_get_hw_from_clkspec(&clkspec); of_node_put(clkspec.np); } else if (name) { /* * If the DT search above couldn't find the provider fallback to * looking up via clkdev based clk_lookups. */ hw = clk_find_hw(dev_id, name); } if (IS_ERR(hw)) return ERR_CAST(hw); if (!hw) return NULL; return hw->core; } static void clk_core_fill_parent_index(struct clk_core *core, u8 index) { struct clk_parent_map *entry = &core->parents[index]; struct clk_core *parent; if (entry->hw) { parent = entry->hw->core; } else { parent = clk_core_get(core, index); if (PTR_ERR(parent) == -ENOENT && entry->name) parent = clk_core_lookup(entry->name); } /* * We have a direct reference but it isn't registered yet? * Orphan it and let clk_reparent() update the orphan status * when the parent is registered. */ if (!parent) parent = ERR_PTR(-EPROBE_DEFER); /* Only cache it if it's not an error */ if (!IS_ERR(parent)) entry->core = parent; } static struct clk_core *clk_core_get_parent_by_index(struct clk_core *core, u8 index) { if (!core || index >= core->num_parents || !core->parents) return NULL; if (!core->parents[index].core) clk_core_fill_parent_index(core, index); return core->parents[index].core; } struct clk_hw * clk_hw_get_parent_by_index(const struct clk_hw *hw, unsigned int index) { struct clk_core *parent; parent = clk_core_get_parent_by_index(hw->core, index); return !parent ? NULL : parent->hw; } EXPORT_SYMBOL_GPL(clk_hw_get_parent_by_index); unsigned int __clk_get_enable_count(struct clk *clk) { return !clk ? 0 : clk->core->enable_count; } static unsigned long clk_core_get_rate_nolock(struct clk_core *core) { if (!core) return 0; if (!core->num_parents || core->parent) return core->rate; /* * Clk must have a parent because num_parents > 0 but the parent isn't * known yet. Best to return 0 as the rate of this clk until we can * properly recalc the rate based on the parent's rate. */ return 0; } unsigned long clk_hw_get_rate(const struct clk_hw *hw) { return clk_core_get_rate_nolock(hw->core); } EXPORT_SYMBOL_GPL(clk_hw_get_rate); static unsigned long clk_core_get_accuracy_no_lock(struct clk_core *core) { if (!core) return 0; return core->accuracy; } unsigned long clk_hw_get_flags(const struct clk_hw *hw) { return hw->core->flags; } EXPORT_SYMBOL_GPL(clk_hw_get_flags); bool clk_hw_is_prepared(const struct clk_hw *hw) { return clk_core_is_prepared(hw->core); } EXPORT_SYMBOL_GPL(clk_hw_is_prepared); bool clk_hw_rate_is_protected(const struct clk_hw *hw) { return clk_core_rate_is_protected(hw->core); } EXPORT_SYMBOL_GPL(clk_hw_rate_is_protected); bool clk_hw_is_enabled(const struct clk_hw *hw) { return clk_core_is_enabled(hw->core); } EXPORT_SYMBOL_GPL(clk_hw_is_enabled); bool __clk_is_enabled(struct clk *clk) { if (!clk) return false; return clk_core_is_enabled(clk->core); } EXPORT_SYMBOL_GPL(__clk_is_enabled); static bool mux_is_better_rate(unsigned long rate, unsigned long now, unsigned long best, unsigned long flags) { if (flags & CLK_MUX_ROUND_CLOSEST) return abs(now - rate) < abs(best - rate); return now <= rate && now > best; } static void clk_core_init_rate_req(struct clk_core * const core, struct clk_rate_request *req, unsigned long rate); static int clk_core_round_rate_nolock(struct clk_core *core, struct clk_rate_request *req); static bool clk_core_has_parent(struct clk_core *core, const struct clk_core *parent) { struct clk_core *tmp; unsigned int i; /* Optimize for the case where the parent is already the parent. */ if (core->parent == parent) return true; for (i = 0; i < core->num_parents; i++) { tmp = clk_core_get_parent_by_index(core, i); if (!tmp) continue; if (tmp == parent) return true; } return false; } static void clk_core_forward_rate_req(struct clk_core *core, const struct clk_rate_request *old_req, struct clk_core *parent, struct clk_rate_request *req, unsigned long parent_rate) { if (WARN_ON(!clk_core_has_parent(core, parent))) return; clk_core_init_rate_req(parent, req, parent_rate); if (req->min_rate < old_req->min_rate) req->min_rate = old_req->min_rate; if (req->max_rate > old_req->max_rate) req->max_rate = old_req->max_rate; } static int clk_core_determine_rate_no_reparent(struct clk_hw *hw, struct clk_rate_request *req) { struct clk_core *core = hw->core; struct clk_core *parent = core->parent; unsigned long best; int ret; if (core->flags & CLK_SET_RATE_PARENT) { struct clk_rate_request parent_req; if (!parent) { req->rate = 0; return 0; } clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate); trace_clk_rate_request_start(&parent_req); ret = clk_core_round_rate_nolock(parent, &parent_req); if (ret) return ret; trace_clk_rate_request_done(&parent_req); best = parent_req.rate; } else if (parent) { best = clk_core_get_rate_nolock(parent); } else { best = clk_core_get_rate_nolock(core); } req->best_parent_rate = best; req->rate = best; return 0; } int clk_mux_determine_rate_flags(struct clk_hw *hw, struct clk_rate_request *req, unsigned long flags) { struct clk_core *core = hw->core, *parent, *best_parent = NULL; int i, num_parents, ret; unsigned long best = 0; /* if NO_REPARENT flag set, pass through to current parent */ if (core->flags & CLK_SET_RATE_NO_REPARENT) return clk_core_determine_rate_no_reparent(hw, req); /* find the parent that can provide the fastest rate <= rate */ num_parents = core->num_parents; for (i = 0; i < num_parents; i++) { unsigned long parent_rate; parent = clk_core_get_parent_by_index(core, i); if (!parent) continue; if (core->flags & CLK_SET_RATE_PARENT) { struct clk_rate_request parent_req; clk_core_forward_rate_req(core, req, parent, &parent_req, req->rate); trace_clk_rate_request_start(&parent_req); ret = clk_core_round_rate_nolock(parent, &parent_req); if (ret) continue; trace_clk_rate_request_done(&parent_req); parent_rate = parent_req.rate; } else { parent_rate = clk_core_get_rate_nolock(parent); } if (mux_is_better_rate(req->rate, parent_rate, best, flags)) { best_parent = parent; best = parent_rate; } } if (!best_parent) return -EINVAL; req->best_parent_hw = best_parent->hw; req->best_parent_rate = best; req->rate = best; return 0; } EXPORT_SYMBOL_GPL(clk_mux_determine_rate_flags); struct clk *__clk_lookup(const char *name) { struct clk_core *core = clk_core_lookup(name); return !core ? NULL : core->hw->clk; } static void clk_core_get_boundaries(struct clk_core *core, unsigned long *min_rate, unsigned long *max_rate) { struct clk *clk_user; lockdep_assert_held(&prepare_lock); *min_rate = core->min_rate; *max_rate = core->max_rate; hlist_for_each_entry(clk_user, &core->clks, clks_node) *min_rate = max(*min_rate, clk_user->min_rate); hlist_for_each_entry(clk_user, &core->clks, clks_node) *max_rate = min(*max_rate, clk_user->max_rate); } /* * clk_hw_get_rate_range() - returns the clock rate range for a hw clk * @hw: the hw clk we want to get the range from * @min_rate: pointer to the variable that will hold the minimum * @max_rate: pointer to the variable that will hold the maximum * * Fills the @min_rate and @max_rate variables with the minimum and * maximum that clock can reach. */ void clk_hw_get_rate_range(struct clk_hw *hw, unsigned long *min_rate, unsigned long *max_rate) { clk_core_get_boundaries(hw->core, min_rate, max_rate); } EXPORT_SYMBOL_GPL(clk_hw_get_rate_range); static bool clk_core_check_boundaries(struct clk_core *core, unsigned long min_rate, unsigned long max_rate) { struct clk *user; lockdep_assert_held(&prepare_lock); if (min_rate > core->max_rate || max_rate < core->min_rate) return false; hlist_for_each_entry(user, &core->clks, clks_node) if (min_rate > user->max_rate || max_rate < user->min_rate) return false; return true; } void clk_hw_set_rate_range(struct clk_hw *hw, unsigned long min_rate, unsigned long max_rate) { hw->core->min_rate = min_rate; hw->core->max_rate = max_rate; } EXPORT_SYMBOL_GPL(clk_hw_set_rate_range); /* * __clk_mux_determine_rate - clk_ops::determine_rate implementation for a mux type clk * @hw: mux type clk to determine rate on * @req: rate request, also used to return preferred parent and frequencies * * Helper for finding best parent to provide a given frequency. This can be used * directly as a determine_rate callback (e.g. for a mux), or from a more * complex clock that may combine a mux with other operations. * * Returns: 0 on success, -EERROR value on error */ int __clk_mux_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { return clk_mux_determine_rate_flags(hw, req, 0); } EXPORT_SYMBOL_GPL(__clk_mux_determine_rate); int __clk_mux_determine_rate_closest(struct clk_hw *hw, struct clk_rate_request *req) { return clk_mux_determine_rate_flags(hw, req, CLK_MUX_ROUND_CLOSEST); } EXPORT_SYMBOL_GPL(__clk_mux_determine_rate_closest); /* * clk_hw_determine_rate_no_reparent - clk_ops::determine_rate implementation for a clk that doesn't reparent * @hw: mux type clk to determine rate on * @req: rate request, also used to return preferred frequency * * Helper for finding best parent rate to provide a given frequency. * This can be used directly as a determine_rate callback (e.g. for a * mux), or from a more complex clock that may combine a mux with other * operations. * * Returns: 0 on success, -EERROR value on error */ int clk_hw_determine_rate_no_reparent(struct clk_hw *hw, struct clk_rate_request *req) { return clk_core_determine_rate_no_reparent(hw, req); } EXPORT_SYMBOL_GPL(clk_hw_determine_rate_no_reparent); /*** clk api ***/ static void clk_core_rate_unprotect(struct clk_core *core) { lockdep_assert_held(&prepare_lock); if (!core) return; if (WARN(core->protect_count == 0, "%s already unprotected\n", core->name)) return; if (--core->protect_count > 0) return; clk_core_rate_unprotect(core->parent); } static int clk_core_rate_nuke_protect(struct clk_core *core) { int ret; lockdep_assert_held(&prepare_lock); if (!core) return -EINVAL; if (core->protect_count == 0) return 0; ret = core->protect_count; core->protect_count = 1; clk_core_rate_unprotect(core); return ret; } /** * clk_rate_exclusive_put - release exclusivity over clock rate control * @clk: the clk over which the exclusivity is released * * clk_rate_exclusive_put() completes a critical section during which a clock * consumer cannot tolerate any other consumer making any operation on the * clock which could result in a rate change or rate glitch. Exclusive clocks * cannot have their rate changed, either directly or indirectly due to changes * further up the parent chain of clocks. As a result, clocks up parent chain * also get under exclusive control of the calling consumer. * * If exlusivity is claimed more than once on clock, even by the same consumer, * the rate effectively gets locked as exclusivity can't be preempted. * * Calls to clk_rate_exclusive_put() must be balanced with calls to * clk_rate_exclusive_get(). Calls to this function may sleep, and do not return * error status. */ void clk_rate_exclusive_put(struct clk *clk) { if (!clk) return; clk_prepare_lock(); /* * if there is something wrong with this consumer protect count, stop * here before messing with the provider */ if (WARN_ON(clk->exclusive_count <= 0)) goto out; clk_core_rate_unprotect(clk->core); clk->exclusive_count--; out: clk_prepare_unlock(); } EXPORT_SYMBOL_GPL(clk_rate_exclusive_put); static void clk_core_rate_protect(struct clk_core *core) { lockdep_assert_held(&prepare_lock); if (!core) return; if (core->protect_count == 0) clk_core_rate_protect(core->parent); core->protect_count++; } static void clk_core_rate_restore_protect(struct clk_core *core, int count) { lockdep_assert_held(&prepare_lock); if (!core) return; if (count == 0) return; clk_core_rate_protect(core); core->protect_count = count; } /** * clk_rate_exclusive_get - get exclusivity over the clk rate control * @clk: the clk over which the exclusity of rate control is requested * * clk_rate_exclusive_get() begins a critical section during which a clock * consumer cannot tolerate any other consumer making any operation on the * clock which could result in a rate change or rate glitch. Exclusive clocks * cannot have their rate changed, either directly or indirectly due to changes * further up the parent chain of clocks. As a result, clocks up parent chain * also get under exclusive control of the calling consumer. * * If exlusivity is claimed more than once on clock, even by the same consumer, * the rate effectively gets locked as exclusivity can't be preempted. * * Calls to clk_rate_exclusive_get() should be balanced with calls to * clk_rate_exclusive_put(). Calls to this function may sleep. * Returns 0 on success, -EERROR otherwise */ int clk_rate_exclusive_get(struct clk *clk) { if (!clk) return 0; clk_prepare_lock(); clk_core_rate_protect(clk->core); clk->exclusive_count++; clk_prepare_unlock(); return 0; } EXPORT_SYMBOL_GPL(clk_rate_exclusive_get); static void devm_clk_rate_exclusive_put(void *data) { struct clk *clk = data; clk_rate_exclusive_put(clk); } int devm_clk_rate_exclusive_get(struct device *dev, struct clk *clk) { int ret; ret = clk_rate_exclusive_get(clk); if (ret) return ret; return devm_add_action_or_reset(dev, devm_clk_rate_exclusive_put, clk); } EXPORT_SYMBOL_GPL(devm_clk_rate_exclusive_get); static void clk_core_unprepare(struct clk_core *core) { lockdep_assert_held(&prepare_lock); if (!core) return; if (WARN(core->prepare_count == 0, "%s already unprepared\n", core->name)) return; if (WARN(core->prepare_count == 1 && core->flags & CLK_IS_CRITICAL, "Unpreparing critical %s\n", core->name)) return; if (core->flags & CLK_SET_RATE_GATE) clk_core_rate_unprotect(core); if (--core->prepare_count > 0) return; WARN(core->enable_count > 0, "Unpreparing enabled %s\n", core->name); trace_clk_unprepare(core); if (core->ops->unprepare) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); clk_core_unprepare(core->parent); clk_pm_runtime_put(core); } static void clk_core_unprepare_lock(struct clk_core *core) { clk_prepare_lock(); clk_core_unprepare(core); clk_prepare_unlock(); } /** * clk_unprepare - undo preparation of a clock source * @clk: the clk being unprepared * * clk_unprepare may sleep, which differentiates it from clk_disable. In a * simple case, clk_unprepare can be used instead of clk_disable to gate a clk * if the operation may sleep. One example is a clk which is accessed over * I2c. In the complex case a clk gate operation may require a fast and a slow * part. It is this reason that clk_unprepare and clk_disable are not mutually * exclusive. In fact clk_disable must be called before clk_unprepare. */ void clk_unprepare(struct clk *clk) { if (IS_ERR_OR_NULL(clk)) return; clk_core_unprepare_lock(clk->core); } EXPORT_SYMBOL_GPL(clk_unprepare); static int clk_core_prepare(struct clk_core *core) { int ret = 0; lockdep_assert_held(&prepare_lock); if (!core) return 0; if (core->prepare_count == 0) { ret = clk_pm_runtime_get(core); if (ret) return ret; ret = clk_core_prepare(core->parent); if (ret) goto runtime_put; trace_clk_prepare(core); if (core->ops->prepare) ret = core->ops->prepare(core->hw); trace_clk_prepare_complete(core); if (ret) goto unprepare; } core->prepare_count++; /* * CLK_SET_RATE_GATE is a special case of clock protection * Instead of a consumer claiming exclusive rate control, it is * actually the provider which prevents any consumer from making any * operation which could result in a rate change or rate glitch while * the clock is prepared. */ if (core->flags & CLK_SET_RATE_GATE) clk_core_rate_protect(core); return 0; unprepare: clk_core_unprepare(core->parent); runtime_put: clk_pm_runtime_put(core); return ret; } static int clk_core_prepare_lock(struct clk_core *core) { int ret; clk_prepare_lock(); ret = clk_core_prepare(core); clk_prepare_unlock(); return ret; } /** * clk_prepare - prepare a clock source * @clk: the clk being prepared * * clk_prepare may sleep, which differentiates it from clk_enable. In a simple * case, clk_prepare can be used instead of clk_enable to ungate a clk if the * operation may sleep. One example is a clk which is accessed over I2c. In * the complex case a clk ungate operation may require a fast and a slow part. * It is this reason that clk_prepare and clk_enable are not mutually * exclusive. In fact clk_prepare must be called before clk_enable. * Returns 0 on success, -EERROR otherwise. */ int clk_prepare(struct clk *clk) { if (!clk) return 0; return clk_core_prepare_lock(clk->core); } EXPORT_SYMBOL_GPL(clk_prepare); static void clk_core_disable(struct clk_core *core) { lockdep_assert_held(&enable_lock); if (!core) return; if (WARN(core->enable_count == 0, "%s already disabled\n", core->name)) return; if (WARN(core->enable_count == 1 && core->flags & CLK_IS_CRITICAL, "Disabling critical %s\n", core->name)) return; if (--core->enable_count > 0) return; trace_clk_disable(core); if (core->ops->disable) core->ops->disable(core->hw); trace_clk_disable_complete(core); clk_core_disable(core->parent); } static void clk_core_disable_lock(struct clk_core *core) { unsigned long flags; flags = clk_enable_lock(); clk_core_disable(core); clk_enable_unlock(flags); } /** * clk_disable - gate a clock * @clk: the clk being gated * * clk_disable must not sleep, which differentiates it from clk_unprepare. In * a simple case, clk_disable can be used instead of clk_unprepare to gate a * clk if the operation is fast and will never sleep. One example is a * SoC-internal clk which is controlled via simple register writes. In the * complex case a clk gate operation may require a fast and a slow part. It is * this reason that clk_unprepare and clk_disable are not mutually exclusive. * In fact clk_disable must be called before clk_unprepare. */ void clk_disable(struct clk *clk) { if (IS_ERR_OR_NULL(clk)) return; clk_core_disable_lock(clk->core); } EXPORT_SYMBOL_GPL(clk_disable); static int clk_core_enable(struct clk_core *core) { int ret = 0; lockdep_assert_held(&enable_lock); if (!core) return 0; if (WARN(core->prepare_count == 0, "Enabling unprepared %s\n", core->name)) return -ESHUTDOWN; if (core->enable_count == 0) { ret = clk_core_enable(core->parent); if (ret) return ret; trace_clk_enable(core); if (core->ops->enable) ret = core->ops->enable(core->hw); trace_clk_enable_complete(core); if (ret) { clk_core_disable(core->parent); return ret; } } core->enable_count++; return 0; } static int clk_core_enable_lock(struct clk_core *core) { unsigned long flags; int ret; flags = clk_enable_lock(); ret = clk_core_enable(core); clk_enable_unlock(flags); return ret; } /** * clk_gate_restore_context - restore context for poweroff * @hw: the clk_hw pointer of clock whose state is to be restored * * The clock gate restore context function enables or disables * the gate clocks based on the enable_count. This is done in cases * where the clock context is lost and based on the enable_count * the clock either needs to be enabled/disabled. This * helps restore the state of gate clocks. */ void clk_gate_restore_context(struct clk_hw *hw) { struct clk_core *core = hw->core; if (core->enable_count) core->ops->enable(hw); else core->ops->disable(hw); } EXPORT_SYMBOL_GPL(clk_gate_restore_context); static int clk_core_save_context(struct clk_core *core) { struct clk_core *child; int ret = 0; hlist_for_each_entry(child, &core->children, child_node) { ret = clk_core_save_context(child); if (ret < 0) return ret; } if (core->ops && core->ops->save_context) ret = core->ops->save_context(core->hw); return ret; } static void clk_core_restore_context(struct clk_core *core) { struct clk_core *child; if (core->ops && core->ops->restore_context) core->ops->restore_context(core->hw); hlist_for_each_entry(child, &core->children, child_node) clk_core_restore_context(child); } /** * clk_save_context - save clock context for poweroff * * Saves the context of the clock register for powerstates in which the * contents of the registers will be lost. Occurs deep within the suspend * code. Returns 0 on success. */ int clk_save_context(void) { struct clk_core *clk; int ret; hlist_for_each_entry(clk, &clk_root_list, child_node) { ret = clk_core_save_context(clk); if (ret < 0) return ret; } hlist_for_each_entry(clk, &clk_orphan_list, child_node) { ret = clk_core_save_context(clk); if (ret < 0) return ret; } return 0; } EXPORT_SYMBOL_GPL(clk_save_context); /** * clk_restore_context - restore clock context after poweroff * * Restore the saved clock context upon resume. * */ void clk_restore_context(void) { struct clk_core *core; hlist_for_each_entry(core, &clk_root_list, child_node) clk_core_restore_context(core); hlist_for_each_entry(core, &clk_orphan_list, child_node) clk_core_restore_context(core); } EXPORT_SYMBOL_GPL(clk_restore_context); /** * clk_enable - ungate a clock * @clk: the clk being ungated * * clk_enable must not sleep, which differentiates it from clk_prepare. In a * simple case, clk_enable can be used instead of clk_prepare to ungate a clk * if the operation will never sleep. One example is a SoC-internal clk which * is controlled via simple register writes. In the complex case a clk ungate * operation may require a fast and a slow part. It is this reason that * clk_enable and clk_prepare are not mutually exclusive. In fact clk_prepare * must be called before clk_enable. Returns 0 on success, -EERROR * otherwise. */ int clk_enable(struct clk *clk) { if (!clk) return 0; return clk_core_enable_lock(clk->core); } EXPORT_SYMBOL_GPL(clk_enable); /** * clk_is_enabled_when_prepared - indicate if preparing a clock also enables it. * @clk: clock source * * Returns true if clk_prepare() implicitly enables the clock, effectively * making clk_enable()/clk_disable() no-ops, false otherwise. * * This is of interest mainly to power management code where actually * disabling the clock also requires unpreparing it to have any material * effect. * * Regardless of the value returned here, the caller must always invoke * clk_enable() or clk_prepare_enable() and counterparts for usage counts * to be right. */ bool clk_is_enabled_when_prepared(struct clk *clk) { return clk && !(clk->core->ops->enable && clk->core->ops->disable); } EXPORT_SYMBOL_GPL(clk_is_enabled_when_prepared); static int clk_core_prepare_enable(struct clk_core *core) { int ret; ret = clk_core_prepare_lock(core); if (ret) return ret; ret = clk_core_enable_lock(core); if (ret) clk_core_unprepare_lock(core); return ret; } static void clk_core_disable_unprepare(struct clk_core *core) { clk_core_disable_lock(core); clk_core_unprepare_lock(core); } static void __init clk_unprepare_unused_subtree(struct clk_core *core) { struct clk_core *child; lockdep_assert_held(&prepare_lock); hlist_for_each_entry(child, &core->children, child_node) clk_unprepare_unused_subtree(child); if (core->prepare_count) return; if (core->flags & CLK_IGNORE_UNUSED) return; if (clk_core_is_prepared(core)) { trace_clk_unprepare(core); if (core->ops->unprepare_unused) core->ops->unprepare_unused(core->hw); else if (core->ops->unprepare) core->ops->unprepare(core->hw); trace_clk_unprepare_complete(core); } } static void __init clk_disable_unused_subtree(struct clk_core *core) { struct clk_core *child; unsigned long flags; lockdep_assert_held(&prepare_lock); hlist_for_each_entry(child, &core->children, child_node) clk_disable_unused_subtree(child); if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_prepare_enable(core->parent); flags = clk_enable_lock(); if (core->enable_count) goto unlock_out; if (core->flags & CLK_IGNORE_UNUSED) goto unlock_out; /* * some gate clocks have special needs during the disable-unused * sequence. call .disable_unused if available, otherwise fall * back to .disable */ if (clk_core_is_enabled(core)) { trace_clk_disable(core); if (core->ops->disable_unused) core->ops->disable_unused(core->hw); else if (core->ops->disable) core->ops->disable(core->hw); trace_clk_disable_complete(core); } unlock_out: clk_enable_unlock(flags); if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_unprepare(core->parent); } static bool clk_ignore_unused __initdata; static int __init clk_ignore_unused_setup(char *__unused) { clk_ignore_unused = true; return 1; } __setup("clk_ignore_unused", clk_ignore_unused_setup); static int __init clk_disable_unused(void) { struct clk_core *core; int ret; if (clk_ignore_unused) { pr_warn("clk: Not disabling unused clocks\n"); return 0; } pr_info("clk: Disabling unused clocks\n"); ret = clk_pm_runtime_get_all(); if (ret) return ret; /* * Grab the prepare lock to keep the clk topology stable while iterating * over clks. */ clk_prepare_lock(); hlist_for_each_entry(core, &clk_root_list, child_node) clk_disable_unused_subtree(core); hlist_for_each_entry(core, &clk_orphan_list, child_node) clk_disable_unused_subtree(core); hlist_for_each_entry(core, &clk_root_list, child_node) clk_unprepare_unused_subtree(core); hlist_for_each_entry(core, &clk_orphan_list, child_node) clk_unprepare_unused_subtree(core); clk_prepare_unlock(); clk_pm_runtime_put_all(); return 0; } late_initcall_sync(clk_disable_unused); static int clk_core_determine_round_nolock(struct clk_core *core, struct clk_rate_request *req) { long rate; lockdep_assert_held(&prepare_lock); if (!core) return 0; /* * Some clock providers hand-craft their clk_rate_requests and * might not fill min_rate and max_rate. * * If it's the case, clamping the rate is equivalent to setting * the rate to 0 which is bad. Skip the clamping but complain so * that it gets fixed, hopefully. */ if (!req->min_rate && !req->max_rate) pr_warn("%s: %s: clk_rate_request has initialized min or max rate.\n", __func__, core->name); else req->rate = clamp(req->rate, req->min_rate, req->max_rate); /* * At this point, core protection will be disabled * - if the provider is not protected at all * - if the calling consumer is the only one which has exclusivity * over the provider */ if (clk_core_rate_is_protected(core)) { req->rate = core->rate; } else if (core->ops->determine_rate) { return core->ops->determine_rate(core->hw, req); } else if (core->ops->round_rate) { rate = core->ops->round_rate(core->hw, req->rate, &req->best_parent_rate); if (rate < 0) return rate; req->rate = rate; } else { return -EINVAL; } return 0; } static void clk_core_init_rate_req(struct clk_core * const core, struct clk_rate_request *req, unsigned long rate) { struct clk_core *parent; if (WARN_ON(!req)) return; memset(req, 0, sizeof(*req)); req->max_rate = ULONG_MAX; if (!core) return; req->core = core; req->rate = rate; clk_core_get_boundaries(core, &req->min_rate, &req->max_rate); parent = core->parent; if (parent) { req->best_parent_hw = parent->hw; req->best_parent_rate = parent->rate; } else { req->best_parent_hw = NULL; req->best_parent_rate = 0; } } /** * clk_hw_init_rate_request - Initializes a clk_rate_request * @hw: the clk for which we want to submit a rate request * @req: the clk_rate_request structure we want to initialise * @rate: the rate which is to be requested * * Initializes a clk_rate_request structure to submit to * __clk_determine_rate() or similar functions. */ void clk_hw_init_rate_request(const struct clk_hw *hw, struct clk_rate_request *req, unsigned long rate) { if (WARN_ON(!hw || !req)) return; clk_core_init_rate_req(hw->core, req, rate); } EXPORT_SYMBOL_GPL(clk_hw_init_rate_request); /** * clk_hw_forward_rate_request - Forwards a clk_rate_request to a clock's parent * @hw: the original clock that got the rate request * @old_req: the original clk_rate_request structure we want to forward * @parent: the clk we want to forward @old_req to * @req: the clk_rate_request structure we want to initialise * @parent_rate: The rate which is to be requested to @parent * * Initializes a clk_rate_request structure to submit to a clock parent * in __clk_determine_rate() or similar functions. */ void clk_hw_forward_rate_request(const struct clk_hw *hw, const struct clk_rate_request *old_req, const struct clk_hw *parent, struct clk_rate_request *req, unsigned long parent_rate) { if (WARN_ON(!hw || !old_req || !parent || !req)) return; clk_core_forward_rate_req(hw->core, old_req, parent->core, req, parent_rate); } EXPORT_SYMBOL_GPL(clk_hw_forward_rate_request); static bool clk_core_can_round(struct clk_core * const core) { return core->ops->determine_rate || core->ops->round_rate; } static int clk_core_round_rate_nolock(struct clk_core *core, struct clk_rate_request *req) { int ret; lockdep_assert_held(&prepare_lock); if (!core) { req->rate = 0; return 0; } if (clk_core_can_round(core)) return clk_core_determine_round_nolock(core, req); if (core->flags & CLK_SET_RATE_PARENT) { struct clk_rate_request parent_req; clk_core_forward_rate_req(core, req, core->parent, &parent_req, req->rate); trace_clk_rate_request_start(&parent_req); ret = clk_core_round_rate_nolock(core->parent, &parent_req); if (ret) return ret; trace_clk_rate_request_done(&parent_req); req->best_parent_rate = parent_req.rate; req->rate = parent_req.rate; return 0; } req->rate = core->rate; return 0; } /** * __clk_determine_rate - get the closest rate actually supported by a clock * @hw: determine the rate of this clock * @req: target rate request * * Useful for clk_ops such as .set_rate and .determine_rate. */ int __clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req) { if (!hw) { req->rate = 0; return 0; } return clk_core_round_rate_nolock(hw->core, req); } EXPORT_SYMBOL_GPL(__clk_determine_rate); /** * clk_hw_round_rate() - round the given rate for a hw clk * @hw: the hw clk for which we are rounding a rate * @rate: the rate which is to be rounded * * Takes in a rate as input and rounds it to a rate that the clk can actually * use. * * Context: prepare_lock must be held. * For clk providers to call from within clk_ops such as .round_rate, * .determine_rate. * * Return: returns rounded rate of hw clk if clk supports round_rate operation * else returns the parent rate. */ unsigned long clk_hw_round_rate(struct clk_hw *hw, unsigned long rate) { int ret; struct clk_rate_request req; clk_core_init_rate_req(hw->core, &req, rate); trace_clk_rate_request_start(&req); ret = clk_core_round_rate_nolock(hw->core, &req); if (ret) return 0; trace_clk_rate_request_done(&req); return req.rate; } EXPORT_SYMBOL_GPL(clk_hw_round_rate); /** * clk_round_rate - round the given rate for a clk * @clk: the clk for which we are rounding a rate * @rate: the rate which is to be rounded * * Takes in a rate as input and rounds it to a rate that the clk can actually * use which is then returned. If clk doesn't support round_rate operation * then the parent rate is returned. */ long clk_round_rate(struct clk *clk, unsigned long rate) { struct clk_rate_request req; int ret; if (!clk) return 0; clk_prepare_lock(); if (clk->exclusive_count) clk_core_rate_unprotect(clk->core); clk_core_init_rate_req(clk->core, &req, rate); trace_clk_rate_request_start(&req); ret = clk_core_round_rate_nolock(clk->core, &req); trace_clk_rate_request_done(&req); if (clk->exclusive_count) clk_core_rate_protect(clk->core); clk_prepare_unlock(); if (ret) return ret; return req.rate; } EXPORT_SYMBOL_GPL(clk_round_rate); /** * __clk_notify - call clk notifier chain * @core: clk that is changing rate * @msg: clk notifier type (see include/linux/clk.h) * @old_rate: old clk rate * @new_rate: new clk rate * * Triggers a notifier call chain on the clk rate-change notification * for 'clk'. Passes a pointer to the struct clk and the previous * and current rates to the notifier callback. Intended to be called by * internal clock code only. Returns NOTIFY_DONE from the last driver * called if all went well, or NOTIFY_STOP or NOTIFY_BAD immediately if * a driver returns that. */ static int __clk_notify(struct clk_core *core, unsigned long msg, unsigned long old_rate, unsigned long new_rate) { struct clk_notifier *cn; struct clk_notifier_data cnd; int ret = NOTIFY_DONE; cnd.old_rate = old_rate; cnd.new_rate = new_rate; list_for_each_entry(cn, &clk_notifier_list, node) { if (cn->clk->core == core) { cnd.clk = cn->clk; ret = srcu_notifier_call_chain(&cn->notifier_head, msg, &cnd); if (ret & NOTIFY_STOP_MASK) return ret; } } return ret; } /** * __clk_recalc_accuracies * @core: first clk in the subtree * * Walks the subtree of clks starting with clk and recalculates accuracies as * it goes. Note that if a clk does not implement the .recalc_accuracy * callback then it is assumed that the clock will take on the accuracy of its * parent. */ static void __clk_recalc_accuracies(struct clk_core *core) { unsigned long parent_accuracy = 0; struct clk_core *child; lockdep_assert_held(&prepare_lock); if (core->parent) parent_accuracy = core->parent->accuracy; if (core->ops->recalc_accuracy) core->accuracy = core->ops->recalc_accuracy(core->hw, parent_accuracy); else core->accuracy = parent_accuracy; hlist_for_each_entry(child, &core->children, child_node) __clk_recalc_accuracies(child); } static long clk_core_get_accuracy_recalc(struct clk_core *core) { if (core && (core->flags & CLK_GET_ACCURACY_NOCACHE)) __clk_recalc_accuracies(core); return clk_core_get_accuracy_no_lock(core); } /** * clk_get_accuracy - return the accuracy of clk * @clk: the clk whose accuracy is being returned * * Simply returns the cached accuracy of the clk, unless * CLK_GET_ACCURACY_NOCACHE flag is set, which means a recalc_rate will be * issued. * If clk is NULL then returns 0. */ long clk_get_accuracy(struct clk *clk) { long accuracy; if (!clk) return 0; clk_prepare_lock(); accuracy = clk_core_get_accuracy_recalc(clk->core); clk_prepare_unlock(); return accuracy; } EXPORT_SYMBOL_GPL(clk_get_accuracy); static unsigned long clk_recalc(struct clk_core *core, unsigned long parent_rate) { unsigned long rate = parent_rate; if (core->ops->recalc_rate && !clk_pm_runtime_get(core)) { rate = core->ops->recalc_rate(core->hw, parent_rate); clk_pm_runtime_put(core); } return rate; } /** * __clk_recalc_rates * @core: first clk in the subtree * @update_req: Whether req_rate should be updated with the new rate * @msg: notification type (see include/linux/clk.h) * * Walks the subtree of clks starting with clk and recalculates rates as it * goes. Note that if a clk does not implement the .recalc_rate callback then * it is assumed that the clock will take on the rate of its parent. * * clk_recalc_rates also propagates the POST_RATE_CHANGE notification, * if necessary. */ static void __clk_recalc_rates(struct clk_core *core, bool update_req, unsigned long msg) { unsigned long old_rate; unsigned long parent_rate = 0; struct clk_core *child; lockdep_assert_held(&prepare_lock); old_rate = core->rate; if (core->parent) parent_rate = core->parent->rate; core->rate = clk_recalc(core, parent_rate); if (update_req) core->req_rate = core->rate; /* * ignore NOTIFY_STOP and NOTIFY_BAD return values for POST_RATE_CHANGE * & ABORT_RATE_CHANGE notifiers */ if (core->notifier_count && msg) __clk_notify(core, msg, old_rate, core->rate); hlist_for_each_entry(child, &core->children, child_node) __clk_recalc_rates(child, update_req, msg); } static unsigned long clk_core_get_rate_recalc(struct clk_core *core) { if (core && (core->flags & CLK_GET_RATE_NOCACHE)) __clk_recalc_rates(core, false, 0); return clk_core_get_rate_nolock(core); } /** * clk_get_rate - return the rate of clk * @clk: the clk whose rate is being returned * * Simply returns the cached rate of the clk, unless CLK_GET_RATE_NOCACHE flag * is set, which means a recalc_rate will be issued. Can be called regardless of * the clock enabledness. If clk is NULL, or if an error occurred, then returns * 0. */ unsigned long clk_get_rate(struct clk *clk) { unsigned long rate; if (!clk) return 0; clk_prepare_lock(); rate = clk_core_get_rate_recalc(clk->core); clk_prepare_unlock(); return rate; } EXPORT_SYMBOL_GPL(clk_get_rate); static int clk_fetch_parent_index(struct clk_core *core, struct clk_core *parent) { int i; if (!parent) return -EINVAL; for (i = 0; i < core->num_parents; i++) { /* Found it first try! */ if (core->parents[i].core == parent) return i; /* Something else is here, so keep looking */ if (core->parents[i].core) continue; /* Maybe core hasn't been cached but the hw is all we know? */ if (core->parents[i].hw) { if (core->parents[i].hw == parent->hw) break; /* Didn't match, but we're expecting a clk_hw */ continue; } /* Maybe it hasn't been cached (clk_set_parent() path) */ if (parent == clk_core_get(core, i)) break; /* Fallback to comparing globally unique names */ if (core->parents[i].name && !strcmp(parent->name, core->parents[i].name)) break; } if (i == core->num_parents) return -EINVAL; core->parents[i].core = parent; return i; } /** * clk_hw_get_parent_index - return the index of the parent clock * @hw: clk_hw associated with the clk being consumed * * Fetches and returns the index of parent clock. Returns -EINVAL if the given * clock does not have a current parent. */ int clk_hw_get_parent_index(struct clk_hw *hw) { struct clk_hw *parent = clk_hw_get_parent(hw); if (WARN_ON(parent == NULL)) return -EINVAL; return clk_fetch_parent_index(hw->core, parent->core); } EXPORT_SYMBOL_GPL(clk_hw_get_parent_index); /* * Update the orphan status of @core and all its children. */ static void clk_core_update_orphan_status(struct clk_core *core, bool is_orphan) { struct clk_core *child; core->orphan = is_orphan; hlist_for_each_entry(child, &core->children, child_node) clk_core_update_orphan_status(child, is_orphan); } static void clk_reparent(struct clk_core *core, struct clk_core *new_parent) { bool was_orphan = core->orphan; hlist_del(&core->child_node); if (new_parent) { bool becomes_orphan = new_parent->orphan; /* avoid duplicate POST_RATE_CHANGE notifications */ if (new_parent->new_child == core) new_parent->new_child = NULL; hlist_add_head(&core->child_node, &new_parent->children); if (was_orphan != becomes_orphan) clk_core_update_orphan_status(core, becomes_orphan); } else { hlist_add_head(&core->child_node, &clk_orphan_list); if (!was_orphan) clk_core_update_orphan_status(core, true); } core->parent = new_parent; } static struct clk_core *__clk_set_parent_before(struct clk_core *core, struct clk_core *parent) { unsigned long flags; struct clk_core *old_parent = core->parent; /* * 1. enable parents for CLK_OPS_PARENT_ENABLE clock * * 2. Migrate prepare state between parents and prevent race with * clk_enable(). * * If the clock is not prepared, then a race with * clk_enable/disable() is impossible since we already have the * prepare lock (future calls to clk_enable() need to be preceded by * a clk_prepare()). * * If the clock is prepared, migrate the prepared state to the new * parent and also protect against a race with clk_enable() by * forcing the clock and the new parent on. This ensures that all * future calls to clk_enable() are practically NOPs with respect to * hardware and software states. * * See also: Comment for clk_set_parent() below. */ /* enable old_parent & parent if CLK_OPS_PARENT_ENABLE is set */ if (core->flags & CLK_OPS_PARENT_ENABLE) { clk_core_prepare_enable(old_parent); clk_core_prepare_enable(parent); } /* migrate prepare count if > 0 */ if (core->prepare_count) { clk_core_prepare_enable(parent); clk_core_enable_lock(core); } /* update the clk tree topology */ flags = clk_enable_lock(); clk_reparent(core, parent); clk_enable_unlock(flags); return old_parent; } static void __clk_set_parent_after(struct clk_core *core, struct clk_core *parent, struct clk_core *old_parent) { /* * Finish the migration of prepare state and undo the changes done * for preventing a race with clk_enable(). */ if (core->prepare_count) { clk_core_disable_lock(core); clk_core_disable_unprepare(old_parent); } /* re-balance ref counting if CLK_OPS_PARENT_ENABLE is set */ if (core->flags & CLK_OPS_PARENT_ENABLE) { clk_core_disable_unprepare(parent); clk_core_disable_unprepare(old_parent); } } static int __clk_set_parent(struct clk_core *core, struct clk_core *parent, u8 p_index) { unsigned long flags; int ret = 0; struct clk_core *old_parent; old_parent = __clk_set_parent_before(core, parent); trace_clk_set_parent(core, parent); /* change clock input source */ if (parent && core->ops->set_parent) ret = core->ops->set_parent(core->hw, p_index); trace_clk_set_parent_complete(core, parent); if (ret) { flags = clk_enable_lock(); clk_reparent(core, old_parent); clk_enable_unlock(flags); __clk_set_parent_after(core, old_parent, parent); return ret; } __clk_set_parent_after(core, parent, old_parent); return 0; } /** * __clk_speculate_rates * @core: first clk in the subtree * @parent_rate: the "future" rate of clk's parent * * Walks the subtree of clks starting with clk, speculating rates as it * goes and firing off PRE_RATE_CHANGE notifications as necessary. * * Unlike clk_recalc_rates, clk_speculate_rates exists only for sending * pre-rate change notifications and returns early if no clks in the * subtree have subscribed to the notifications. Note that if a clk does not * implement the .recalc_rate callback then it is assumed that the clock will * take on the rate of its parent. */ static int __clk_speculate_rates(struct clk_core *core, unsigned long parent_rate) { struct clk_core *child; unsigned long new_rate; int ret = NOTIFY_DONE; lockdep_assert_held(&prepare_lock); new_rate = clk_recalc(core, parent_rate); /* abort rate change if a driver returns NOTIFY_BAD or NOTIFY_STOP */ if (core->notifier_count) ret = __clk_notify(core, PRE_RATE_CHANGE, core->rate, new_rate); if (ret & NOTIFY_STOP_MASK) { pr_debug("%s: clk notifier callback for clock %s aborted with error %d\n", __func__, core->name, ret); goto out; } hlist_for_each_entry(child, &core->children, child_node) { ret = __clk_speculate_rates(child, new_rate); if (ret & NOTIFY_STOP_MASK) break; } out: return ret; } static void clk_calc_subtree(struct clk_core *core, unsigned long new_rate, struct clk_core *new_parent, u8 p_index) { struct clk_core *child; core->new_rate = new_rate; core->new_parent = new_parent; core->new_parent_index = p_index; /* include clk in new parent's PRE_RATE_CHANGE notifications */ core->new_child = NULL; if (new_parent && new_parent != core->parent) new_parent->new_child = core; hlist_for_each_entry(child, &core->children, child_node) { child->new_rate = clk_recalc(child, new_rate); clk_calc_subtree(child, child->new_rate, NULL, 0); } } /* * calculate the new rates returning the topmost clock that has to be * changed. */ static struct clk_core *clk_calc_new_rates(struct clk_core *core, unsigned long rate) { struct clk_core *top = core; struct clk_core *old_parent, *parent; unsigned long best_parent_rate = 0; unsigned long new_rate; unsigned long min_rate; unsigned long max_rate; int p_index = 0; long ret; /* sanity */ if (IS_ERR_OR_NULL(core)) return NULL; /* save parent rate, if it exists */ parent = old_parent = core->parent; if (parent) best_parent_rate = parent->rate; clk_core_get_boundaries(core, &min_rate, &max_rate); /* find the closest rate and parent clk/rate */ if (clk_core_can_round(core)) { struct clk_rate_request req; clk_core_init_rate_req(core, &req, rate); trace_clk_rate_request_start(&req); ret = clk_core_determine_round_nolock(core, &req); if (ret < 0) return NULL; trace_clk_rate_request_done(&req); best_parent_rate = req.best_parent_rate; new_rate = req.rate; parent = req.best_parent_hw ? req.best_parent_hw->core : NULL; if (new_rate < min_rate || new_rate > max_rate) return NULL; } else if (!parent || !(core->flags & CLK_SET_RATE_PARENT)) { /* pass-through clock without adjustable parent */ core->new_rate = core->rate; return NULL; } else { /* pass-through clock with adjustable parent */ top = clk_calc_new_rates(parent, rate); new_rate = parent->new_rate; goto out; } /* some clocks must be gated to change parent */ if (parent != old_parent && (core->flags & CLK_SET_PARENT_GATE) && core->prepare_count) { pr_debug("%s: %s not gated but wants to reparent\n", __func__, core->name); return NULL; } /* try finding the new parent index */ if (parent && core->num_parents > 1) { p_index = clk_fetch_parent_index(core, parent); if (p_index < 0) { pr_debug("%s: clk %s can not be parent of clk %s\n", __func__, parent->name, core->name); return NULL; } } if ((core->flags & CLK_SET_RATE_PARENT) && parent && best_parent_rate != parent->rate) top = clk_calc_new_rates(parent, best_parent_rate); out: clk_calc_subtree(core, new_rate, parent, p_index); return top; } /* * Notify about rate changes in a subtree. Always walk down the whole tree * so that in case of an error we can walk down the whole tree again and * abort the change. */ static struct clk_core *clk_propagate_rate_change(struct clk_core *core, unsigned long event) { struct clk_core *child, *tmp_clk, *fail_clk = NULL; int ret = NOTIFY_DONE; if (core->rate == core->new_rate) return NULL; if (core->notifier_count) { ret = __clk_notify(core, event, core->rate, core->new_rate); if (ret & NOTIFY_STOP_MASK) fail_clk = core; } hlist_for_each_entry(child, &core->children, child_node) { /* Skip children who will be reparented to another clock */ if (child->new_parent && child->new_parent != core) continue; tmp_clk = clk_propagate_rate_change(child, event); if (tmp_clk) fail_clk = tmp_clk; } /* handle the new child who might not be in core->children yet */ if (core->new_child) { tmp_clk = clk_propagate_rate_change(core->new_child, event); if (tmp_clk) fail_clk = tmp_clk; } return fail_clk; } /* * walk down a subtree and set the new rates notifying the rate * change on the way */ static void clk_change_rate(struct clk_core *core) { struct clk_core *child; struct hlist_node *tmp; unsigned long old_rate; unsigned long best_parent_rate = 0; bool skip_set_rate = false; struct clk_core *old_parent; struct clk_core *parent = NULL; old_rate = core->rate; if (core->new_parent) { parent = core->new_parent; best_parent_rate = core->new_parent->rate; } else if (core->parent) { parent = core->parent; best_parent_rate = core->parent->rate; } if (clk_pm_runtime_get(core)) return; if (core->flags & CLK_SET_RATE_UNGATE) { clk_core_prepare(core); clk_core_enable_lock(core); } if (core->new_parent && core->new_parent != core->parent) { old_parent = __clk_set_parent_before(core, core->new_parent); trace_clk_set_parent(core, core->new_parent); if (core->ops->set_rate_and_parent) { skip_set_rate = true; core->ops->set_rate_and_parent(core->hw, core->new_rate, best_parent_rate, core->new_parent_index); } else if (core->ops->set_parent) { core->ops->set_parent(core->hw, core->new_parent_index); } trace_clk_set_parent_complete(core, core->new_parent); __clk_set_parent_after(core, core->new_parent, old_parent); } if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_prepare_enable(parent); trace_clk_set_rate(core, core->new_rate); if (!skip_set_rate && core->ops->set_rate) core->ops->set_rate(core->hw, core->new_rate, best_parent_rate); trace_clk_set_rate_complete(core, core->new_rate); core->rate = clk_recalc(core, best_parent_rate); if (core->flags & CLK_SET_RATE_UNGATE) { clk_core_disable_lock(core); clk_core_unprepare(core); } if (core->flags & CLK_OPS_PARENT_ENABLE) clk_core_disable_unprepare(parent); if (core->notifier_count && old_rate != core->rate) __clk_notify(core, POST_RATE_CHANGE, old_rate, core->rate); if (core->flags & CLK_RECALC_NEW_RATES) (void)clk_calc_new_rates(core, core->new_rate); /* * Use safe iteration, as change_rate can actually swap parents * for certain clock types. */ hlist_for_each_entry_safe(child, tmp, &core->children, child_node) { /* Skip children who will be reparented to another clock */ if (child->new_parent && child->new_parent != core) continue; clk_change_rate(child); } /* handle the new child who might not be in core->children yet */ if (core->new_child) clk_change_rate(core->new_child); clk_pm_runtime_put(core); } static unsigned long clk_core_req_round_rate_nolock(struct clk_core *core, unsigned long req_rate) { int ret, cnt; struct clk_rate_request req; lockdep_assert_held(&prepare_lock); if (!core) return 0; /* simulate what the rate would be if it could be freely set */ cnt = clk_core_rate_nuke_protect(core); if (cnt < 0) return cnt; clk_core_init_rate_req(core, &req, req_rate); trace_clk_rate_request_start(&req); ret = clk_core_round_rate_nolock(core, &req); trace_clk_rate_request_done(&req); /* restore the protection */ clk_core_rate_restore_protect(core, cnt); return ret ? 0 : req.rate; } static int clk_core_set_rate_nolock(struct clk_core *core, unsigned long req_rate) { struct clk_core *top, *fail_clk; unsigned long rate; int ret; if (!core) return 0; rate = clk_core_req_round_rate_nolock(core, req_rate); /* bail early if nothing to do */ if (rate == clk_core_get_rate_nolock(core)) return 0; /* fail on a direct rate set of a protected provider */ if (clk_core_rate_is_protected(core)) return -EBUSY; /* calculate new rates and get the topmost changed clock */ top = clk_calc_new_rates(core, req_rate); if (!top) return -EINVAL; ret = clk_pm_runtime_get(core); if (ret) return ret; /* notify that we are about to change rates */ fail_clk = clk_propagate_rate_change(top, PRE_RATE_CHANGE); if (fail_clk) { pr_debug("%s: failed to set %s rate\n", __func__, fail_clk->name); clk_propagate_rate_change(top, ABORT_RATE_CHANGE); ret = -EBUSY; goto err; } /* change the rates */ clk_change_rate(top); core->req_rate = req_rate; err: clk_pm_runtime_put(core); return ret; } /** * clk_set_rate - specify a new rate for clk * @clk: the clk whose rate is being changed * @rate: the new rate for clk * * In the simplest case clk_set_rate will only adjust the rate of clk. * * Setting the CLK_SET_RATE_PARENT flag allows the rate change operation to * propagate up to clk's parent; whether or not this happens depends on the * outcome of clk's .round_rate implementation. If *parent_rate is unchanged * after calling .round_rate then upstream parent propagation is ignored. If * *parent_rate comes back with a new rate for clk's parent then we propagate * up to clk's parent and set its rate. Upward propagation will continue * until either a clk does not support the CLK_SET_RATE_PARENT flag or * .round_rate stops requesting changes to clk's parent_rate. * * Rate changes are accomplished via tree traversal that also recalculates the * rates for the clocks and fires off POST_RATE_CHANGE notifiers. * * Returns 0 on success, -EERROR otherwise. */ int clk_set_rate(struct clk *clk, unsigned long rate) { int ret; if (!clk) return 0; /* prevent racing with updates to the clock topology */ clk_prepare_lock(); if (clk->exclusive_count) clk_core_rate_unprotect(clk->core); ret = clk_core_set_rate_nolock(clk->core, rate); if (clk->exclusive_count) clk_core_rate_protect(clk->core); clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_set_rate); /** * clk_set_rate_exclusive - specify a new rate and get exclusive control * @clk: the clk whose rate is being changed * @rate: the new rate for clk * * This is a combination of clk_set_rate() and clk_rate_exclusive_get() * within a critical section * * This can be used initially to ensure that at least 1 consumer is * satisfied when several consumers are competing for exclusivity over the * same clock provider. * * The exclusivity is not applied if setting the rate failed. * * Calls to clk_rate_exclusive_get() should be balanced with calls to * clk_rate_exclusive_put(). * * Returns 0 on success, -EERROR otherwise. */ int clk_set_rate_exclusive(struct clk *clk, unsigned long rate) { int ret; if (!clk) return 0; /* prevent racing with updates to the clock topology */ clk_prepare_lock(); /* * The temporary protection removal is not here, on purpose * This function is meant to be used instead of clk_rate_protect, * so before the consumer code path protect the clock provider */ ret = clk_core_set_rate_nolock(clk->core, rate); if (!ret) { clk_core_rate_protect(clk->core); clk->exclusive_count++; } clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_set_rate_exclusive); static int clk_set_rate_range_nolock(struct clk *clk, unsigned long min, unsigned long max) { int ret = 0; unsigned long old_min, old_max, rate; lockdep_assert_held(&prepare_lock); if (!clk) return 0; trace_clk_set_rate_range(clk->core, min, max); if (min > max) { pr_err("%s: clk %s dev %s con %s: invalid range [%lu, %lu]\n", __func__, clk->core->name, clk->dev_id, clk->con_id, min, max); return -EINVAL; } if (clk->exclusive_count) clk_core_rate_unprotect(clk->core); /* Save the current values in case we need to rollback the change */ old_min = clk->min_rate; old_max = clk->max_rate; clk->min_rate = min; clk->max_rate = max; if (!clk_core_check_boundaries(clk->core, min, max)) { ret = -EINVAL; goto out; } rate = clk->core->req_rate; if (clk->core->flags & CLK_GET_RATE_NOCACHE) rate = clk_core_get_rate_recalc(clk->core); /* * Since the boundaries have been changed, let's give the * opportunity to the provider to adjust the clock rate based on * the new boundaries. * * We also need to handle the case where the clock is currently * outside of the boundaries. Clamping the last requested rate * to the current minimum and maximum will also handle this. * * FIXME: * There is a catch. It may fail for the usual reason (clock * broken, clock protected, etc) but also because: * - round_rate() was not favorable and fell on the wrong * side of the boundary * - the determine_rate() callback does not really check for * this corner case when determining the rate */ rate = clamp(rate, min, max); ret = clk_core_set_rate_nolock(clk->core, rate); if (ret) { /* rollback the changes */ clk->min_rate = old_min; clk->max_rate = old_max; } out: if (clk->exclusive_count) clk_core_rate_protect(clk->core); return ret; } /** * clk_set_rate_range - set a rate range for a clock source * @clk: clock source * @min: desired minimum clock rate in Hz, inclusive * @max: desired maximum clock rate in Hz, inclusive * * Return: 0 for success or negative errno on failure. */ int clk_set_rate_range(struct clk *clk, unsigned long min, unsigned long max) { int ret; if (!clk) return 0; clk_prepare_lock(); ret = clk_set_rate_range_nolock(clk, min, max); clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_set_rate_range); /** * clk_set_min_rate - set a minimum clock rate for a clock source * @clk: clock source * @rate: desired minimum clock rate in Hz, inclusive * * Returns success (0) or negative errno. */ int clk_set_min_rate(struct clk *clk, unsigned long rate) { if (!clk) return 0; trace_clk_set_min_rate(clk->core, rate); return clk_set_rate_range(clk, rate, clk->max_rate); } EXPORT_SYMBOL_GPL(clk_set_min_rate); /** * clk_set_max_rate - set a maximum clock rate for a clock source * @clk: clock source * @rate: desired maximum clock rate in Hz, inclusive * * Returns success (0) or negative errno. */ int clk_set_max_rate(struct clk *clk, unsigned long rate) { if (!clk) return 0; trace_clk_set_max_rate(clk->core, rate); return clk_set_rate_range(clk, clk->min_rate, rate); } EXPORT_SYMBOL_GPL(clk_set_max_rate); /** * clk_get_parent - return the parent of a clk * @clk: the clk whose parent gets returned * * Simply returns clk->parent. Returns NULL if clk is NULL. */ struct clk *clk_get_parent(struct clk *clk) { struct clk *parent; if (!clk) return NULL; clk_prepare_lock(); /* TODO: Create a per-user clk and change callers to call clk_put */ parent = !clk->core->parent ? NULL : clk->core->parent->hw->clk; clk_prepare_unlock(); return parent; } EXPORT_SYMBOL_GPL(clk_get_parent); static struct clk_core *__clk_init_parent(struct clk_core *core) { u8 index = 0; if (core->num_parents > 1 && core->ops->get_parent) index = core->ops->get_parent(core->hw); return clk_core_get_parent_by_index(core, index); } static void clk_core_reparent(struct clk_core *core, struct clk_core *new_parent) { clk_reparent(core, new_parent); __clk_recalc_accuracies(core); __clk_recalc_rates(core, true, POST_RATE_CHANGE); } void clk_hw_reparent(struct clk_hw *hw, struct clk_hw *new_parent) { if (!hw) return; clk_core_reparent(hw->core, !new_parent ? NULL : new_parent->core); } /** * clk_has_parent - check if a clock is a possible parent for another * @clk: clock source * @parent: parent clock source * * This function can be used in drivers that need to check that a clock can be * the parent of another without actually changing the parent. * * Returns true if @parent is a possible parent for @clk, false otherwise. */ bool clk_has_parent(const struct clk *clk, const struct clk *parent) { /* NULL clocks should be nops, so return success if either is NULL. */ if (!clk || !parent) return true; return clk_core_has_parent(clk->core, parent->core); } EXPORT_SYMBOL_GPL(clk_has_parent); static int clk_core_set_parent_nolock(struct clk_core *core, struct clk_core *parent) { int ret = 0; int p_index = 0; unsigned long p_rate = 0; lockdep_assert_held(&prepare_lock); if (!core) return 0; if (core->parent == parent) return 0; /* verify ops for multi-parent clks */ if (core->num_parents > 1 && !core->ops->set_parent) return -EPERM; /* check that we are allowed to re-parent if the clock is in use */ if ((core->flags & CLK_SET_PARENT_GATE) && core->prepare_count) return -EBUSY; if (clk_core_rate_is_protected(core)) return -EBUSY; /* try finding the new parent index */ if (parent) { p_index = clk_fetch_parent_index(core, parent); if (p_index < 0) { pr_debug("%s: clk %s can not be parent of clk %s\n", __func__, parent->name, core->name); return p_index; } p_rate = parent->rate; } ret = clk_pm_runtime_get(core); if (ret) return ret; /* propagate PRE_RATE_CHANGE notifications */ ret = __clk_speculate_rates(core, p_rate); /* abort if a driver objects */ if (ret & NOTIFY_STOP_MASK) goto runtime_put; /* do the re-parent */ ret = __clk_set_parent(core, parent, p_index); /* propagate rate an accuracy recalculation accordingly */ if (ret) { __clk_recalc_rates(core, true, ABORT_RATE_CHANGE); } else { __clk_recalc_rates(core, true, POST_RATE_CHANGE); __clk_recalc_accuracies(core); } runtime_put: clk_pm_runtime_put(core); return ret; } int clk_hw_set_parent(struct clk_hw *hw, struct clk_hw *parent) { return clk_core_set_parent_nolock(hw->core, parent->core); } EXPORT_SYMBOL_GPL(clk_hw_set_parent); /** * clk_set_parent - switch the parent of a mux clk * @clk: the mux clk whose input we are switching * @parent: the new input to clk * * Re-parent clk to use parent as its new input source. If clk is in * prepared state, the clk will get enabled for the duration of this call. If * that's not acceptable for a specific clk (Eg: the consumer can't handle * that, the reparenting is glitchy in hardware, etc), use the * CLK_SET_PARENT_GATE flag to allow reparenting only when clk is unprepared. * * After successfully changing clk's parent clk_set_parent will update the * clk topology, sysfs topology and propagate rate recalculation via * __clk_recalc_rates. * * Returns 0 on success, -EERROR otherwise. */ int clk_set_parent(struct clk *clk, struct clk *parent) { int ret; if (!clk) return 0; clk_prepare_lock(); if (clk->exclusive_count) clk_core_rate_unprotect(clk->core); ret = clk_core_set_parent_nolock(clk->core, parent ? parent->core : NULL); if (clk->exclusive_count) clk_core_rate_protect(clk->core); clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_set_parent); static int clk_core_set_phase_nolock(struct clk_core *core, int degrees) { int ret = -EINVAL; lockdep_assert_held(&prepare_lock); if (!core) return 0; if (clk_core_rate_is_protected(core)) return -EBUSY; trace_clk_set_phase(core, degrees); if (core->ops->set_phase) { ret = core->ops->set_phase(core->hw, degrees); if (!ret) core->phase = degrees; } trace_clk_set_phase_complete(core, degrees); return ret; } /** * clk_set_phase - adjust the phase shift of a clock signal * @clk: clock signal source * @degrees: number of degrees the signal is shifted * * Shifts the phase of a clock signal by the specified * degrees. Returns 0 on success, -EERROR otherwise. * * This function makes no distinction about the input or reference * signal that we adjust the clock signal phase against. For example * phase locked-loop clock signal generators we may shift phase with * respect to feedback clock signal input, but for other cases the * clock phase may be shifted with respect to some other, unspecified * signal. * * Additionally the concept of phase shift does not propagate through * the clock tree hierarchy, which sets it apart from clock rates and * clock accuracy. A parent clock phase attribute does not have an * impact on the phase attribute of a child clock. */ int clk_set_phase(struct clk *clk, int degrees) { int ret; if (!clk) return 0; /* sanity check degrees */ degrees %= 360; if (degrees < 0) degrees += 360; clk_prepare_lock(); if (clk->exclusive_count) clk_core_rate_unprotect(clk->core); ret = clk_core_set_phase_nolock(clk->core, degrees); if (clk->exclusive_count) clk_core_rate_protect(clk->core); clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_set_phase); static int clk_core_get_phase(struct clk_core *core) { int ret; lockdep_assert_held(&prepare_lock); if (!core->ops->get_phase) return 0; /* Always try to update cached phase if possible */ ret = core->ops->get_phase(core->hw); if (ret >= 0) core->phase = ret; return ret; } /** * clk_get_phase - return the phase shift of a clock signal * @clk: clock signal source * * Returns the phase shift of a clock node in degrees, otherwise returns * -EERROR. */ int clk_get_phase(struct clk *clk) { int ret; if (!clk) return 0; clk_prepare_lock(); ret = clk_core_get_phase(clk->core); clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_get_phase); static void clk_core_reset_duty_cycle_nolock(struct clk_core *core) { /* Assume a default value of 50% */ core->duty.num = 1; core->duty.den = 2; } static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core); static int clk_core_update_duty_cycle_nolock(struct clk_core *core) { struct clk_duty *duty = &core->duty; int ret = 0; if (!core->ops->get_duty_cycle) return clk_core_update_duty_cycle_parent_nolock(core); ret = core->ops->get_duty_cycle(core->hw, duty); if (ret) goto reset; /* Don't trust the clock provider too much */ if (duty->den == 0 || duty->num > duty->den) { ret = -EINVAL; goto reset; } return 0; reset: clk_core_reset_duty_cycle_nolock(core); return ret; } static int clk_core_update_duty_cycle_parent_nolock(struct clk_core *core) { int ret = 0; if (core->parent && core->flags & CLK_DUTY_CYCLE_PARENT) { ret = clk_core_update_duty_cycle_nolock(core->parent); memcpy(&core->duty, &core->parent->duty, sizeof(core->duty)); } else { clk_core_reset_duty_cycle_nolock(core); } return ret; } static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core, struct clk_duty *duty); static int clk_core_set_duty_cycle_nolock(struct clk_core *core, struct clk_duty *duty) { int ret; lockdep_assert_held(&prepare_lock); if (clk_core_rate_is_protected(core)) return -EBUSY; trace_clk_set_duty_cycle(core, duty); if (!core->ops->set_duty_cycle) return clk_core_set_duty_cycle_parent_nolock(core, duty); ret = core->ops->set_duty_cycle(core->hw, duty); if (!ret) memcpy(&core->duty, duty, sizeof(*duty)); trace_clk_set_duty_cycle_complete(core, duty); return ret; } static int clk_core_set_duty_cycle_parent_nolock(struct clk_core *core, struct clk_duty *duty) { int ret = 0; if (core->parent && core->flags & (CLK_DUTY_CYCLE_PARENT | CLK_SET_RATE_PARENT)) { ret = clk_core_set_duty_cycle_nolock(core->parent, duty); memcpy(&core->duty, &core->parent->duty, sizeof(core->duty)); } return ret; } /** * clk_set_duty_cycle - adjust the duty cycle ratio of a clock signal * @clk: clock signal source * @num: numerator of the duty cycle ratio to be applied * @den: denominator of the duty cycle ratio to be applied * * Apply the duty cycle ratio if the ratio is valid and the clock can * perform this operation * * Returns (0) on success, a negative errno otherwise. */ int clk_set_duty_cycle(struct clk *clk, unsigned int num, unsigned int den) { int ret; struct clk_duty duty; if (!clk) return 0; /* sanity check the ratio */ if (den == 0 || num > den) return -EINVAL; duty.num = num; duty.den = den; clk_prepare_lock(); if (clk->exclusive_count) clk_core_rate_unprotect(clk->core); ret = clk_core_set_duty_cycle_nolock(clk->core, &duty); if (clk->exclusive_count) clk_core_rate_protect(clk->core); clk_prepare_unlock(); return ret; } EXPORT_SYMBOL_GPL(clk_set_duty_cycle); static int clk_core_get_scaled_duty_cycle(struct clk_core *core, unsigned int scale) { struct clk_duty *duty = &core->duty; int ret; clk_prepare_lock(); ret = clk_core_update_duty_cycle_nolock(core); if (!ret) ret = mult_frac(scale, duty->num, duty->den); clk_prepare_unlock(); return ret; } /** * clk_get_scaled_duty_cycle - return the duty cycle ratio of a clock signal * @clk: clock signal source * @scale: scaling factor to be applied to represent the ratio as an integer * * Returns the duty cycle ratio of a clock node multiplied by the provided * scaling factor, or negative errno on error. */ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale) { if (!clk) return 0; return clk_core_get_scaled_duty_cycle(clk->core, scale); } EXPORT_SYMBOL_GPL(clk_get_scaled_duty_cycle); /** * clk_is_match - check if two clk's point to the same hardware clock * @p: clk compared against q * @q: clk compared against p * * Returns true if the two struct clk pointers both point to the same hardware * clock node. Put differently, returns true if struct clk *p and struct clk *q * share the same struct clk_core object. * * Returns false otherwise. Note that two NULL clks are treated as matching. */ bool clk_is_match(const struct clk *p, const struct clk *q) { /* trivial case: identical struct clk's or both NULL */ if (p == q) return true; /* true if clk->core pointers match. Avoid dereferencing garbage */ if (!IS_ERR_OR_NULL(p) && !IS_ERR_OR_NULL(q)) if (p->core == q->core) return true; return false; } EXPORT_SYMBOL_GPL(clk_is_match); /*** debugfs support ***/ #ifdef CONFIG_DEBUG_FS #include <linux/debugfs.h> static struct dentry *rootdir; static int inited = 0; static DEFINE_MUTEX(clk_debug_lock); static HLIST_HEAD(clk_debug_list); static struct hlist_head *orphan_list[] = { &clk_orphan_list, NULL, }; static void clk_summary_show_one(struct seq_file *s, struct clk_core *c, int level) { int phase; struct clk *clk_user; int multi_node = 0; seq_printf(s, "%*s%-*s %-7d %-8d %-8d %-11lu %-10lu ", level * 3 + 1, "", 35 - level * 3, c->name, c->enable_count, c->prepare_count, c->protect_count, clk_core_get_rate_recalc(c), clk_core_get_accuracy_recalc(c)); phase = clk_core_get_phase(c); if (phase >= 0) seq_printf(s, "%-5d", phase); else seq_puts(s, "-----"); seq_printf(s, " %-6d", clk_core_get_scaled_duty_cycle(c, 100000)); if (c->ops->is_enabled) seq_printf(s, " %5c ", clk_core_is_enabled(c) ? 'Y' : 'N'); else if (!c->ops->enable) seq_printf(s, " %5c ", 'Y'); else seq_printf(s, " %5c ", '?'); hlist_for_each_entry(clk_user, &c->clks, clks_node) { seq_printf(s, "%*s%-*s %-25s\n", level * 3 + 2 + 105 * multi_node, "", 30, clk_user->dev_id ? clk_user->dev_id : "deviceless", clk_user->con_id ? clk_user->con_id : "no_connection_id"); multi_node = 1; } } static void clk_summary_show_subtree(struct seq_file *s, struct clk_core *c, int level) { struct clk_core *child; clk_summary_show_one(s, c, level); hlist_for_each_entry(child, &c->children, child_node) clk_summary_show_subtree(s, child, level + 1); } static int clk_summary_show(struct seq_file *s, void *data) { struct clk_core *c; struct hlist_head **lists = s->private; int ret; seq_puts(s, " enable prepare protect duty hardware connection\n"); seq_puts(s, " clock count count count rate accuracy phase cycle enable consumer id\n"); seq_puts(s, "---------------------------------------------------------------------------------------------------------------------------------------------\n"); ret = clk_pm_runtime_get_all(); if (ret) return ret; clk_prepare_lock(); for (; *lists; lists++) hlist_for_each_entry(c, *lists, child_node) clk_summary_show_subtree(s, c, 0); clk_prepare_unlock(); clk_pm_runtime_put_all(); return 0; } DEFINE_SHOW_ATTRIBUTE(clk_summary); static void clk_dump_one(struct seq_file *s, struct clk_core *c, int level) { int phase; unsigned long min_rate, max_rate; clk_core_get_boundaries(c, &min_rate, &max_rate); /* This should be JSON format, i.e. elements separated with a comma */ seq_printf(s, "\"%s\": { ", c->name); seq_printf(s, "\"enable_count\": %d,", c->enable_count); seq_printf(s, "\"prepare_count\": %d,", c->prepare_count); seq_printf(s, "\"protect_count\": %d,", c->protect_count); seq_printf(s, "\"rate\": %lu,", clk_core_get_rate_recalc(c)); seq_printf(s, "\"min_rate\": %lu,", min_rate); seq_printf(s, "\"max_rate\": %lu,", max_rate); seq_printf(s, "\"accuracy\": %lu,", clk_core_get_accuracy_recalc(c)); phase = clk_core_get_phase(c); if (phase >= 0) seq_printf(s, "\"phase\": %d,", phase); seq_printf(s, "\"duty_cycle\": %u", clk_core_get_scaled_duty_cycle(c, 100000)); } static void clk_dump_subtree(struct seq_file *s, struct clk_core *c, int level) { struct clk_core *child; clk_dump_one(s, c, level); hlist_for_each_entry(child, &c->children, child_node) { seq_putc(s, ','); clk_dump_subtree(s, child, level + 1); } seq_putc(s, '}'); } static int clk_dump_show(struct seq_file *s, void *data) { struct clk_core *c; bool first_node = true; struct hlist_head **lists = s->private; int ret; ret = clk_pm_runtime_get_all(); if (ret) return ret; seq_putc(s, '{'); clk_prepare_lock(); for (; *lists; lists++) { hlist_for_each_entry(c, *lists, child_node) { if (!first_node) seq_putc(s, ','); first_node = false; clk_dump_subtree(s, c, 0); } } clk_prepare_unlock(); clk_pm_runtime_put_all(); seq_puts(s, "}\n"); return 0; } DEFINE_SHOW_ATTRIBUTE(clk_dump); #undef CLOCK_ALLOW_WRITE_DEBUGFS #ifdef CLOCK_ALLOW_WRITE_DEBUGFS /* * This can be dangerous, therefore don't provide any real compile time * configuration option for this feature. * People who want to use this will need to modify the source code directly. */ static int clk_rate_set(void *data, u64 val) { struct clk_core *core = data; int ret; clk_prepare_lock(); ret = clk_core_set_rate_nolock(core, val); clk_prepare_unlock(); return ret; } #define clk_rate_mode 0644 static int clk_phase_set(void *data, u64 val) { struct clk_core *core = data; int degrees = do_div(val, 360); int ret; clk_prepare_lock(); ret = clk_core_set_phase_nolock(core, degrees); clk_prepare_unlock(); return ret; } #define clk_phase_mode 0644 static int clk_prepare_enable_set(void *data, u64 val) { struct clk_core *core = data; int ret = 0; if (val) ret = clk_prepare_enable(core->hw->clk); else clk_disable_unprepare(core->hw->clk); return ret; } static int clk_prepare_enable_get(void *data, u64 *val) { struct clk_core *core = data; *val = core->enable_count && core->prepare_count; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(clk_prepare_enable_fops, clk_prepare_enable_get, clk_prepare_enable_set, "%llu\n"); #else #define clk_rate_set NULL #define clk_rate_mode 0444 #define clk_phase_set NULL #define clk_phase_mode 0644 #endif static int clk_rate_get(void *data, u64 *val) { struct clk_core *core = data; clk_prepare_lock(); *val = clk_core_get_rate_recalc(core); clk_prepare_unlock(); return 0; } DEFINE_DEBUGFS_ATTRIBUTE(clk_rate_fops, clk_rate_get, clk_rate_set, "%llu\n"); static int clk_phase_get(void *data, u64 *val) { struct clk_core *core = data; *val = core->phase; return 0; } DEFINE_DEBUGFS_ATTRIBUTE(clk_phase_fops, clk_phase_get, clk_phase_set, "%llu\n"); static const struct { unsigned long flag; const char *name; } clk_flags[] = { #define ENTRY(f) { f, #f } ENTRY(CLK_SET_RATE_GATE), ENTRY(CLK_SET_PARENT_GATE), ENTRY(CLK_SET_RATE_PARENT), ENTRY(CLK_IGNORE_UNUSED), ENTRY(CLK_GET_RATE_NOCACHE), ENTRY(CLK_SET_RATE_NO_REPARENT), ENTRY(CLK_GET_ACCURACY_NOCACHE), ENTRY(CLK_RECALC_NEW_RATES), ENTRY(CLK_SET_RATE_UNGATE), ENTRY(CLK_IS_CRITICAL), ENTRY(CLK_OPS_PARENT_ENABLE), ENTRY(CLK_DUTY_CYCLE_PARENT), #undef ENTRY }; static int clk_flags_show(struct seq_file *s, void *data) { struct clk_core *core = s->private; unsigned long flags = core->flags; unsigned int i; for (i = 0; flags && i < ARRAY_SIZE(clk_flags); i++) { if (flags & clk_flags[i].flag) { seq_printf(s, "%s\n", clk_flags[i].name); flags &= ~clk_flags[i].flag; } } if (flags) { /* Unknown flags */ seq_printf(s, "0x%lx\n", flags); } return 0; } DEFINE_SHOW_ATTRIBUTE(clk_flags); static void possible_parent_show(struct seq_file *s, struct clk_core *core, unsigned int i, char terminator) { struct clk_core *parent; const char *name = NULL; /* * Go through the following options to fetch a parent's name. * * 1. Fetch the registered parent clock and use its name * 2. Use the global (fallback) name if specified * 3. Use the local fw_name if provided * 4. Fetch parent clock's clock-output-name if DT index was set * * This may still fail in some cases, such as when the parent is * specified directly via a struct clk_hw pointer, but it isn't * registered (yet). */ parent = clk_core_get_parent_by_index(core, i); if (parent) { seq_puts(s, parent->name); } else if (core->parents[i].name) { seq_puts(s, core->parents[i].name); } else if (core->parents[i].fw_name) { seq_printf(s, "<%s>(fw)", core->parents[i].fw_name); } else { if (core->parents[i].index >= 0) name = of_clk_get_parent_name(core->of_node, core->parents[i].index); if (!name) name = "(missing)"; seq_puts(s, name); } seq_putc(s, terminator); } static int possible_parents_show(struct seq_file *s, void *data) { struct clk_core *core = s->private; int i; for (i = 0; i < core->num_parents - 1; i++) possible_parent_show(s, core, i, ' '); possible_parent_show(s, core, i, '\n'); return 0; } DEFINE_SHOW_ATTRIBUTE(possible_parents); static int current_parent_show(struct seq_file *s, void *data) { struct clk_core *core = s->private; if (core->parent) seq_printf(s, "%s\n", core->parent->name); return 0; } DEFINE_SHOW_ATTRIBUTE(current_parent); #ifdef CLOCK_ALLOW_WRITE_DEBUGFS static ssize_t current_parent_write(struct file *file, const char __user *ubuf, size_t count, loff_t *ppos) { struct seq_file *s = file->private_data; struct clk_core *core = s->private; struct clk_core *parent; u8 idx; int err; err = kstrtou8_from_user(ubuf, count, 0, &idx); if (err < 0) return err; parent = clk_core_get_parent_by_index(core, idx); if (!parent) return -ENOENT; clk_prepare_lock(); err = clk_core_set_parent_nolock(core, parent); clk_prepare_unlock(); if (err) return err; return count; } static const struct file_operations current_parent_rw_fops = { .open = current_parent_open, .write = current_parent_write, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; #endif static int clk_duty_cycle_show(struct seq_file *s, void *data) { struct clk_core *core = s->private; struct clk_duty *duty = &core->duty; seq_printf(s, "%u/%u\n", duty->num, duty->den); return 0; } DEFINE_SHOW_ATTRIBUTE(clk_duty_cycle); static int clk_min_rate_show(struct seq_file *s, void *data) { struct clk_core *core = s->private; unsigned long min_rate, max_rate; clk_prepare_lock(); clk_core_get_boundaries(core, &min_rate, &max_rate); clk_prepare_unlock(); seq_printf(s, "%lu\n", min_rate); return 0; } DEFINE_SHOW_ATTRIBUTE(clk_min_rate); static int clk_max_rate_show(struct seq_file *s, void *data) { struct clk_core *core = s->private; unsigned long min_rate, max_rate; clk_prepare_lock(); clk_core_get_boundaries(core, &min_rate, &max_rate); clk_prepare_unlock(); seq_printf(s, "%lu\n", max_rate); return 0; } DEFINE_SHOW_ATTRIBUTE(clk_max_rate); static void clk_debug_create_one(struct clk_core *core, struct dentry *pdentry) { struct dentry *root; if (!core || !pdentry) return; root = debugfs_create_dir(core->name, pdentry); core->dentry = root; debugfs_create_file("clk_rate", clk_rate_mode, root, core, &clk_rate_fops); debugfs_create_file("clk_min_rate", 0444, root, core, &clk_min_rate_fops); debugfs_create_file("clk_max_rate", 0444, root, core, &clk_max_rate_fops); debugfs_create_ulong("clk_accuracy", 0444, root, &core->accuracy); debugfs_create_file("clk_phase", clk_phase_mode, root, core, &clk_phase_fops); debugfs_create_file("clk_flags", 0444, root, core, &clk_flags_fops); debugfs_create_u32("clk_prepare_count", 0444, root, &core->prepare_count); debugfs_create_u32("clk_enable_count", 0444, root, &core->enable_count); debugfs_create_u32("clk_protect_count", 0444, root, &core->protect_count); debugfs_create_u32("clk_notifier_count", 0444, root, &core->notifier_count); debugfs_create_file("clk_duty_cycle", 0444, root, core, &clk_duty_cycle_fops); #ifdef CLOCK_ALLOW_WRITE_DEBUGFS debugfs_create_file("clk_prepare_enable", 0644, root, core, &clk_prepare_enable_fops); if (core->num_parents > 1) debugfs_create_file("clk_parent", 0644, root, core, &current_parent_rw_fops); else #endif if (core->num_parents > 0) debugfs_create_file("clk_parent", 0444, root, core, &current_parent_fops); if (core->num_parents > 1) debugfs_create_file("clk_possible_parents", 0444, root, core, &possible_parents_fops); if (core->ops->debug_init) core->ops->debug_init(core->hw, core->dentry); } /** * clk_debug_register - add a clk node to the debugfs clk directory * @core: the clk being added to the debugfs clk directory * * Dynamically adds a clk to the debugfs clk directory if debugfs has been * initialized. Otherwise it bails out early since the debugfs clk directory * will be created lazily by clk_debug_init as part of a late_initcall. */ static void clk_debug_register(struct clk_core *core) { mutex_lock(&clk_debug_lock); hlist_add_head(&core->debug_node, &clk_debug_list); if (inited) clk_debug_create_one(core, rootdir); mutex_unlock(&clk_debug_lock); } /** * clk_debug_unregister - remove a clk node from the debugfs clk directory * @core: the clk being removed from the debugfs clk directory * * Dynamically removes a clk and all its child nodes from the * debugfs clk directory if clk->dentry points to debugfs created by * clk_debug_register in __clk_core_init. */ static void clk_debug_unregister(struct clk_core *core) { mutex_lock(&clk_debug_lock); hlist_del_init(&core->debug_node); debugfs_remove_recursive(core->dentry); core->dentry = NULL; mutex_unlock(&clk_debug_lock); } /** * clk_debug_init - lazily populate the debugfs clk directory * * clks are often initialized very early during boot before memory can be * dynamically allocated and well before debugfs is setup. This function * populates the debugfs clk directory once at boot-time when we know that * debugfs is setup. It should only be called once at boot-time, all other clks * added dynamically will be done so with clk_debug_register. */ static int __init clk_debug_init(void) { struct clk_core *core; #ifdef CLOCK_ALLOW_WRITE_DEBUGFS pr_warn("\n"); pr_warn("********************************************************************\n"); pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n"); pr_warn("** **\n"); pr_warn("** WRITEABLE clk DebugFS SUPPORT HAS BEEN ENABLED IN THIS KERNEL **\n"); pr_warn("**