Total coverage: 267182 (18%)of 1563800
20 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 /* SPDX-License-Identifier: GPL-2.0-only */ /* * fence-chain: chain fences together in a timeline * * Copyright (C) 2018 Advanced Micro Devices, Inc. * Authors: * Christian König <christian.koenig@amd.com> */ #ifndef __LINUX_DMA_FENCE_CHAIN_H #define __LINUX_DMA_FENCE_CHAIN_H #include <linux/dma-fence.h> #include <linux/irq_work.h> #include <linux/slab.h> /** * struct dma_fence_chain - fence to represent an node of a fence chain * @base: fence base class * @prev: previous fence of the chain * @prev_seqno: original previous seqno before garbage collection * @fence: encapsulated fence * @lock: spinlock for fence handling */ struct dma_fence_chain { struct dma_fence base; struct dma_fence __rcu *prev; u64 prev_seqno; struct dma_fence *fence; union { /** * @cb: callback for signaling * * This is used to add the callback for signaling the * complection of the fence chain. Never used at the same time * as the irq work. */ struct dma_fence_cb cb; /** * @work: irq work item for signaling * * Irq work structure to allow us to add the callback without * running into lock inversion. Never used at the same time as * the callback. */ struct irq_work work; }; spinlock_t lock; }; /** * to_dma_fence_chain - cast a fence to a dma_fence_chain * @fence: fence to cast to a dma_fence_array * * Returns NULL if the fence is not a dma_fence_chain, * or the dma_fence_chain otherwise. */ static inline struct dma_fence_chain * to_dma_fence_chain(struct dma_fence *fence) { if (!fence || !dma_fence_is_chain(fence)) return NULL; return container_of(fence, struct dma_fence_chain, base); } /** * dma_fence_chain_contained - return the contained fence * @fence: the fence to test * * If the fence is a dma_fence_chain the function returns the fence contained * inside the chain object, otherwise it returns the fence itself. */ static inline struct dma_fence * dma_fence_chain_contained(struct dma_fence *fence) { struct dma_fence_chain *chain = to_dma_fence_chain(fence); return chain ? chain->fence : fence; } /** * dma_fence_chain_alloc * * Returns a new struct dma_fence_chain object or NULL on failure. * * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). The typecast is * intentional to enforce typesafety. */ #define dma_fence_chain_alloc() \ ((struct dma_fence_chain *)kmalloc(sizeof(struct dma_fence_chain), GFP_KERNEL)) /** * dma_fence_chain_free * @chain: chain node to free * * Frees up an allocated but not used struct dma_fence_chain object. This * doesn't need an RCU grace period since the fence was never initialized nor * published. After dma_fence_chain_init() has been called the fence must be * released by calling dma_fence_put(), and not through this function. */ static inline void dma_fence_chain_free(struct dma_fence_chain *chain) { kfree(chain); }; /** * dma_fence_chain_for_each - iterate over all fences in chain * @iter: current fence * @head: starting point * * Iterate over all fences in the chain. We keep a reference to the current * fence while inside the loop which must be dropped when breaking out. * * For a deep dive iterator see dma_fence_unwrap_for_each(). */ #define dma_fence_chain_for_each(iter, head) \ for (iter = dma_fence_get(head); iter; \ iter = dma_fence_chain_walk(iter)) struct dma_fence *dma_fence_chain_walk(struct dma_fence *fence); int dma_fence_chain_find_seqno(struct dma_fence **pfence, uint64_t seqno); void dma_fence_chain_init(struct dma_fence_chain *chain, struct dma_fence *prev, struct dma_fence *fence, uint64_t seqno); #endif /* __LINUX_DMA_FENCE_CHAIN_H */
33 33 33 2 33 31 2 33 32 26 33 33 26 26 33 4 31 31 33 33 33 33 29 33 33 33 33 33 33 33 33 33 33 25 28 22 33 33 33 31 31 31 31 33 33 33 1 1 2 2 33 31 2 2 2 2 2 31 31 29 1 1 1 31 68 67 68 67 66 67 67 68 68 68 7 7 61 7 61 7 31 33 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 // SPDX-License-Identifier: GPL-2.0 #include <linux/acpi.h> #include <linux/array_size.h> #include <linux/bitmap.h> #include <linux/cleanup.h> #include <linux/compat.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irqdesc.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/module.h> #include <linux/nospec.h> #include <linux/of.h> #include <linux/pinctrl/consumer.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/srcu.h> #include <linux/string.h> #include <linux/gpio.h> #include <linux/gpio/driver.h> #include <linux/gpio/machine.h> #include <uapi/linux/gpio.h> #include "gpiolib-acpi.h" #include "gpiolib-cdev.h" #include "gpiolib-of.h" #include "gpiolib-swnode.h" #include "gpiolib-sysfs.h" #include "gpiolib.h" #define CREATE_TRACE_POINTS #include <trace/events/gpio.h> /* Implementation infrastructure for GPIO interfaces. * * The GPIO programming interface allows for inlining speed-critical * get/set operations for common cases, so that access to SOC-integrated * GPIOs can sometimes cost only an instruction or two per bit. */ /* Device and char device-related information */ static DEFINE_IDA(gpio_ida); static dev_t gpio_devt; #define GPIO_DEV_MAX 256 /* 256 GPIO chip devices supported */ static int gpio_bus_match(struct device *dev, const struct device_driver *drv) { struct fwnode_handle *fwnode = dev_fwnode(dev); /* * Only match if the fwnode doesn't already have a proper struct device * created for it. */ if (fwnode && fwnode->dev != dev) return 0; return 1; } static const struct bus_type gpio_bus_type = { .name = "gpio", .match = gpio_bus_match, }; /* * Number of GPIOs to use for the fast path in set array */ #define FASTPATH_NGPIO CONFIG_GPIOLIB_FASTPATH_LIMIT static DEFINE_MUTEX(gpio_lookup_lock); static LIST_HEAD(gpio_lookup_list); static LIST_HEAD(gpio_devices); /* Protects the GPIO device list against concurrent modifications. */ static DEFINE_MUTEX(gpio_devices_lock); /* Ensures coherence during read-only accesses to the list of GPIO devices. */ DEFINE_STATIC_SRCU(gpio_devices_srcu); static DEFINE_MUTEX(gpio_machine_hogs_mutex); static LIST_HEAD(gpio_machine_hogs); const char *const gpio_suffixes[] = { "gpios", "gpio", NULL }; static void gpiochip_free_hogs(struct gpio_chip *gc); static int gpiochip_add_irqchip(struct gpio_chip *gc, struct lock_class_key *lock_key, struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gc); static int gpiochip_irqchip_init_hw(struct gpio_chip *gc); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc); static bool gpiolib_initialized; const char *gpiod_get_label(struct gpio_desc *desc) { struct gpio_desc_label *label; unsigned long flags; flags = READ_ONCE(desc->flags); label = srcu_dereference_check(desc->label, &desc->gdev->desc_srcu, srcu_read_lock_held(&desc->gdev->desc_srcu)); if (test_bit(FLAG_USED_AS_IRQ, &flags)) return label ? label->str : "interrupt"; if (!test_bit(FLAG_REQUESTED, &flags)) return NULL; return label ? label->str : NULL; } static void desc_free_label(struct rcu_head *rh) { kfree(container_of(rh, struct gpio_desc_label, rh)); } static int desc_set_label(struct gpio_desc *desc, const char *label) { struct gpio_desc_label *new = NULL, *old; if (label) { new = kzalloc(struct_size(new, str, strlen(label) + 1), GFP_KERNEL); if (!new) return -ENOMEM; strcpy(new->str, label); } old = rcu_replace_pointer(desc->label, new, 1); if (old) call_srcu(&desc->gdev->desc_srcu, &old->rh, desc_free_label); return 0; } /** * gpio_to_desc - Convert a GPIO number to its descriptor * @gpio: global GPIO number * * Returns: * The GPIO descriptor associated with the given GPIO, or %NULL if no GPIO * with the given number exists in the system. */ struct gpio_desc *gpio_to_desc(unsigned gpio) { struct gpio_device *gdev; scoped_guard(srcu, &gpio_devices_srcu) { list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { if (gdev->base <= gpio && gdev->base + gdev->ngpio > gpio) return &gdev->descs[gpio - gdev->base]; } } return NULL; } EXPORT_SYMBOL_GPL(gpio_to_desc); /* This function is deprecated and will be removed soon, don't use. */ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum) { return gpio_device_get_desc(gc->gpiodev, hwnum); } /** * gpio_device_get_desc() - get the GPIO descriptor corresponding to the given * hardware number for this GPIO device * @gdev: GPIO device to get the descriptor from * @hwnum: hardware number of the GPIO for this chip * * Returns: * A pointer to the GPIO descriptor or %EINVAL if no GPIO exists in the given * chip for the specified hardware number or %ENODEV if the underlying chip * already vanished. * * The reference count of struct gpio_device is *NOT* increased like when the * GPIO is being requested for exclusive usage. It's up to the caller to make * sure the GPIO device will stay alive together with the descriptor returned * by this function. */ struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum) { if (hwnum >= gdev->ngpio) return ERR_PTR(-EINVAL); return &gdev->descs[array_index_nospec(hwnum, gdev->ngpio)]; } EXPORT_SYMBOL_GPL(gpio_device_get_desc); /** * desc_to_gpio - convert a GPIO descriptor to the integer namespace * @desc: GPIO descriptor * * This should disappear in the future but is needed since we still * use GPIO numbers for error messages and sysfs nodes. * * Returns: * The global GPIO number for the GPIO specified by its descriptor. */ int desc_to_gpio(const struct gpio_desc *desc) { return desc->gdev->base + (desc - &desc->gdev->descs[0]); } EXPORT_SYMBOL_GPL(desc_to_gpio); /** * gpiod_to_chip - Return the GPIO chip to which a GPIO descriptor belongs * @desc: descriptor to return the chip of * * *DEPRECATED* * This function is unsafe and should not be used. Using the chip address * without taking the SRCU read lock may result in dereferencing a dangling * pointer. * * Returns: * Address of the GPIO chip backing this device. */ struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) { if (!desc) return NULL; return gpio_device_get_chip(desc->gdev); } EXPORT_SYMBOL_GPL(gpiod_to_chip); /** * gpiod_to_gpio_device() - Return the GPIO device to which this descriptor * belongs. * @desc: Descriptor for which to return the GPIO device. * * This *DOES NOT* increase the reference count of the GPIO device as it's * expected that the descriptor is requested and the users already holds a * reference to the device. * * Returns: * Address of the GPIO device owning this descriptor. */ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) { if (!desc) return NULL; return desc->gdev; } EXPORT_SYMBOL_GPL(gpiod_to_gpio_device); /** * gpio_device_get_base() - Get the base GPIO number allocated by this device * @gdev: GPIO device * * Returns: * First GPIO number in the global GPIO numberspace for this device. */ int gpio_device_get_base(struct gpio_device *gdev) { return gdev->base; } EXPORT_SYMBOL_GPL(gpio_device_get_base); /** * gpio_device_get_label() - Get the label of this GPIO device * @gdev: GPIO device * * Returns: * Pointer to the string containing the GPIO device label. The string's * lifetime is tied to that of the underlying GPIO device. */ const char *gpio_device_get_label(struct gpio_device *gdev) { return gdev->label; } EXPORT_SYMBOL(gpio_device_get_label); /** * gpio_device_get_chip() - Get the gpio_chip implementation of this GPIO device * @gdev: GPIO device * * Returns: * Address of the GPIO chip backing this device. * * *DEPRECATED* * Until we can get rid of all non-driver users of struct gpio_chip, we must * provide a way of retrieving the pointer to it from struct gpio_device. This * is *NOT* safe as the GPIO API is considered to be hot-unpluggable and the * chip can dissapear at any moment (unlike reference-counted struct * gpio_device). * * Use at your own risk. */ struct gpio_chip *gpio_device_get_chip(struct gpio_device *gdev) { return rcu_dereference_check(gdev->chip, 1); } EXPORT_SYMBOL_GPL(gpio_device_get_chip); /* dynamic allocation of GPIOs, e.g. on a hotplugged device */ static int gpiochip_find_base_unlocked(u16 ngpio) { unsigned int base = GPIO_DYNAMIC_BASE; struct gpio_device *gdev; list_for_each_entry_srcu(gdev, &gpio_devices, list, lockdep_is_held(&gpio_devices_lock)) { /* found a free space? */ if (gdev->base >= base + ngpio) break; /* nope, check the space right after the chip */ base = gdev->base + gdev->ngpio; if (base < GPIO_DYNAMIC_BASE) base = GPIO_DYNAMIC_BASE; if (base > GPIO_DYNAMIC_MAX - ngpio) break; } if (base <= GPIO_DYNAMIC_MAX - ngpio) { pr_debug("%s: found new base at %d\n", __func__, base); return base; } else { pr_err("%s: cannot find free range\n", __func__); return -ENOSPC; } } /** * gpiod_get_direction - return the current direction of a GPIO * @desc: GPIO to get the direction of * * Returns: * 0 for output, 1 for input, or an error code in case of error. * * This function may sleep if gpiod_cansleep() is true. */ int gpiod_get_direction(struct gpio_desc *desc) { unsigned long flags; unsigned int offset; int ret; /* * We cannot use VALIDATE_DESC() as we must not return 0 for a NULL * descriptor like we usually do. */ if (IS_ERR_OR_NULL(desc)) return -EINVAL; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; offset = gpio_chip_hwgpio(desc); flags = READ_ONCE(desc->flags); /* * Open drain emulation using input mode may incorrectly report * input here, fix that up. */ if (test_bit(FLAG_OPEN_DRAIN, &flags) && test_bit(FLAG_IS_OUT, &flags)) return 0; if (!guard.gc->get_direction) return -ENOTSUPP; ret = guard.gc->get_direction(guard.gc, offset); if (ret < 0) return ret; /* * GPIO_LINE_DIRECTION_IN or other positive, * otherwise GPIO_LINE_DIRECTION_OUT. */ if (ret > 0) ret = 1; assign_bit(FLAG_IS_OUT, &flags, !ret); WRITE_ONCE(desc->flags, flags); return ret; } EXPORT_SYMBOL_GPL(gpiod_get_direction); /* * Add a new chip to the global chips list, keeping the list of chips sorted * by range(means [base, base + ngpio - 1]) order. * * Returns: * -EBUSY if the new chip overlaps with some other chip's integer space. */ static int gpiodev_add_to_list_unlocked(struct gpio_device *gdev) { struct gpio_device *prev, *next; lockdep_assert_held(&gpio_devices_lock); if (list_empty(&gpio_devices)) { /* initial entry in list */ list_add_tail_rcu(&gdev->list, &gpio_devices); return 0; } next = list_first_entry(&gpio_devices, struct gpio_device, list); if (gdev->base + gdev->ngpio <= next->base) { /* add before first entry */ list_add_rcu(&gdev->list, &gpio_devices); return 0; } prev = list_last_entry(&gpio_devices, struct gpio_device, list); if (prev->base + prev->ngpio <= gdev->base) { /* add behind last entry */ list_add_tail_rcu(&gdev->list, &gpio_devices); return 0; } list_for_each_entry_safe(prev, next, &gpio_devices, list) { /* at the end of the list */ if (&next->list == &gpio_devices) break; /* add between prev and next */ if (prev->base + prev->ngpio <= gdev->base && gdev->base + gdev->ngpio <= next->base) { list_add_rcu(&gdev->list, &prev->list); return 0; } } synchronize_srcu(&gpio_devices_srcu); return -EBUSY; } /* * Convert a GPIO name to its descriptor * Note that there is no guarantee that GPIO names are globally unique! * Hence this function will return, if it exists, a reference to the first GPIO * line found that matches the given name. */ static struct gpio_desc *gpio_name_to_desc(const char * const name) { struct gpio_device *gdev; struct gpio_desc *desc; struct gpio_chip *gc; if (!name) return NULL; guard(srcu)(&gpio_devices_srcu); list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); if (!gc) continue; for_each_gpio_desc(gc, desc) { if (desc->name && !strcmp(desc->name, name)) return desc; } } return NULL; } /* * Take the names from gc->names and assign them to their GPIO descriptors. * Warn if a name is already used for a GPIO line on a different GPIO chip. * * Note that: * 1. Non-unique names are still accepted, * 2. Name collisions within the same GPIO chip are not reported. */ static void gpiochip_set_desc_names(struct gpio_chip *gc) { struct gpio_device *gdev = gc->gpiodev; int i; /* First check all names if they are unique */ for (i = 0; i != gc->ngpio; ++i) { struct gpio_desc *gpio; gpio = gpio_name_to_desc(gc->names[i]); if (gpio) dev_warn(&gdev->dev, "Detected name collision for GPIO name '%s'\n", gc->names[i]); } /* Then add all names to the GPIO descriptors */ for (i = 0; i != gc->ngpio; ++i) gdev->descs[i].name = gc->names[i]; } /* * gpiochip_set_names - Set GPIO line names using device properties * @chip: GPIO chip whose lines should be named, if possible * * Looks for device property "gpio-line-names" and if it exists assigns * GPIO line names for the chip. The memory allocated for the assigned * names belong to the underlying firmware node and should not be released * by the caller. */ static int gpiochip_set_names(struct gpio_chip *chip) { struct gpio_device *gdev = chip->gpiodev; struct device *dev = &gdev->dev; const char **names; int ret, i; int count; count = device_property_string_array_count(dev, "gpio-line-names"); if (count < 0) return 0; /* * When offset is set in the driver side we assume the driver internally * is using more than one gpiochip per the same device. We have to stop * setting friendly names if the specified ones with 'gpio-line-names' * are less than the offset in the device itself. This means all the * lines are not present for every single pin within all the internal * gpiochips. */ if (count <= chip->offset) { dev_warn(dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n", count, chip->offset); return 0; } names = kcalloc(count, sizeof(*names), GFP_KERNEL); if (!names) return -ENOMEM; ret = device_property_read_string_array(dev, "gpio-line-names", names, count); if (ret < 0) { dev_warn(dev, "failed to read GPIO line names\n"); kfree(names); return ret; } /* * When more that one gpiochip per device is used, 'count' can * contain at most number gpiochips x chip->ngpio. We have to * correctly distribute all defined lines taking into account * chip->offset as starting point from where we will assign * the names to pins from the 'names' array. Since property * 'gpio-line-names' cannot contains gaps, we have to be sure * we only assign those pins that really exists since chip->ngpio * can be different of the chip->offset. */ count = (count > chip->offset) ? count - chip->offset : count; if (count > chip->ngpio) count = chip->ngpio; for (i = 0; i < count; i++) { /* * Allow overriding "fixed" names provided by the GPIO * provider. The "fixed" names are more often than not * generic and less informative than the names given in * device properties. */ if (names[chip->offset + i] && names[chip->offset + i][0]) gdev->descs[i].name = names[chip->offset + i]; } kfree(names); return 0; } static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc) { unsigned long *p; p = bitmap_alloc(gc->ngpio, GFP_KERNEL); if (!p) return NULL; /* Assume by default all GPIOs are valid */ bitmap_fill(p, gc->ngpio); return p; } static void gpiochip_free_mask(unsigned long **p) { bitmap_free(*p); *p = NULL; } static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc) { struct device *dev = &gc->gpiodev->dev; int size; /* Format is "start, count, ..." */ size = device_property_count_u32(dev, "gpio-reserved-ranges"); if (size > 0 && size % 2 == 0) return size; return 0; } static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc) { struct device *dev = &gc->gpiodev->dev; unsigned int size; u32 *ranges; int ret; size = gpiochip_count_reserved_ranges(gc); if (size == 0) return 0; ranges = kmalloc_array(size, sizeof(*ranges), GFP_KERNEL); if (!ranges) return -ENOMEM; ret = device_property_read_u32_array(dev, "gpio-reserved-ranges", ranges, size); if (ret) { kfree(ranges); return ret; } while (size) { u32 count = ranges[--size]; u32 start = ranges[--size]; if (start >= gc->ngpio || start + count > gc->ngpio) continue; bitmap_clear(gc->valid_mask, start, count); } kfree(ranges); return 0; } static int gpiochip_init_valid_mask(struct gpio_chip *gc) { int ret; if (!(gpiochip_count_reserved_ranges(gc) || gc->init_valid_mask)) return 0; gc->valid_mask = gpiochip_allocate_mask(gc); if (!gc->valid_mask) return -ENOMEM; ret = gpiochip_apply_reserved_ranges(gc); if (ret) return ret; if (gc->init_valid_mask) return gc->init_valid_mask(gc, gc->valid_mask, gc->ngpio); return 0; } static void gpiochip_free_valid_mask(struct gpio_chip *gc) { gpiochip_free_mask(&gc->valid_mask); } static int gpiochip_add_pin_ranges(struct gpio_chip *gc) { /* * Device Tree platforms are supposed to use "gpio-ranges" * property. This check ensures that the ->add_pin_ranges() * won't be called for them. */ if (device_property_present(&gc->gpiodev->dev, "gpio-ranges")) return 0; if (gc->add_pin_ranges) return gc->add_pin_ranges(gc); return 0; } bool gpiochip_line_is_valid(const struct gpio_chip *gc, unsigned int offset) { /* No mask means all valid */ if (likely(!gc->valid_mask)) return true; return test_bit(offset, gc->valid_mask); } EXPORT_SYMBOL_GPL(gpiochip_line_is_valid); static void gpiod_free_irqs(struct gpio_desc *desc) { int irq = gpiod_to_irq(desc); struct irq_desc *irqd = irq_to_desc(irq); void *cookie; for (;;) { /* * Make sure the action doesn't go away while we're * dereferencing it. Retrieve and store the cookie value. * If the irq is freed after we release the lock, that's * alright - the underlying maple tree lookup will return NULL * and nothing will happen in free_irq(). */ scoped_guard(mutex, &irqd->request_mutex) { if (!irq_desc_has_action(irqd)) return; cookie = irqd->action->dev_id; } free_irq(irq, cookie); } } /* * The chip is going away but there may be users who had requested interrupts * on its GPIO lines who have no idea about its removal and have no way of * being notified about it. We need to free any interrupts still in use here or * we'll leak memory and resources (like procfs files). */ static void gpiochip_free_remaining_irqs(struct gpio_chip *gc) { struct gpio_desc *desc; for_each_gpio_desc_with_flag(gc, desc, FLAG_USED_AS_IRQ) gpiod_free_irqs(desc); } static void gpiodev_release(struct device *dev) { struct gpio_device *gdev = to_gpio_device(dev); /* Call pending kfree()s for descriptor labels. */ synchronize_srcu(&gdev->desc_srcu); cleanup_srcu_struct(&gdev->desc_srcu); ida_free(&gpio_ida, gdev->id); kfree_const(gdev->label); kfree(gdev->descs); cleanup_srcu_struct(&gdev->srcu); kfree(gdev); } static const struct device_type gpio_dev_type = { .name = "gpio_chip", .release = gpiodev_release, }; #ifdef CONFIG_GPIO_CDEV #define gcdev_register(gdev, devt) gpiolib_cdev_register((gdev), (devt)) #define gcdev_unregister(gdev) gpiolib_cdev_unregister((gdev)) #else /* * gpiolib_cdev_register() indirectly calls device_add(), which is still * required even when cdev is not selected. */ #define gcdev_register(gdev, devt) device_add(&(gdev)->dev) #define gcdev_unregister(gdev) device_del(&(gdev)->dev) #endif static int gpiochip_setup_dev(struct gpio_device *gdev) { struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); int ret; device_initialize(&gdev->dev); /* * If fwnode doesn't belong to another device, it's safe to clear its * initialized flag. */ if (fwnode && !fwnode->dev) fwnode_dev_initialized(fwnode, false); ret = gcdev_register(gdev, gpio_devt); if (ret) return ret; ret = gpiochip_sysfs_register(gdev); if (ret) goto err_remove_device; dev_dbg(&gdev->dev, "registered GPIOs %u to %u on %s\n", gdev->base, gdev->base + gdev->ngpio - 1, gdev->label); return 0; err_remove_device: gcdev_unregister(gdev); return ret; } static void gpiochip_machine_hog(struct gpio_chip *gc, struct gpiod_hog *hog) { struct gpio_desc *desc; int rv; desc = gpiochip_get_desc(gc, hog->chip_hwnum); if (IS_ERR(desc)) { chip_err(gc, "%s: unable to get GPIO desc: %ld\n", __func__, PTR_ERR(desc)); return; } rv = gpiod_hog(desc, hog->line_name, hog->lflags, hog->dflags); if (rv) gpiod_err(desc, "%s: unable to hog GPIO line (%s:%u): %d\n", __func__, gc->label, hog->chip_hwnum, rv); } static void machine_gpiochip_add(struct gpio_chip *gc) { struct gpiod_hog *hog; mutex_lock(&gpio_machine_hogs_mutex); list_for_each_entry(hog, &gpio_machine_hogs, list) { if (!strcmp(gc->label, hog->chip_label)) gpiochip_machine_hog(gc, hog); } mutex_unlock(&gpio_machine_hogs_mutex); } static void gpiochip_setup_devs(void) { struct gpio_device *gdev; int ret; guard(srcu)(&gpio_devices_srcu); list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { ret = gpiochip_setup_dev(gdev); if (ret) dev_err(&gdev->dev, "Failed to initialize gpio device (%d)\n", ret); } } static void gpiochip_set_data(struct gpio_chip *gc, void *data) { gc->gpiodev->data = data; } /** * gpiochip_get_data() - get per-subdriver data for the chip * @gc: GPIO chip * * Returns: * The per-subdriver data for the chip. */ void *gpiochip_get_data(struct gpio_chip *gc) { return gc->gpiodev->data; } EXPORT_SYMBOL_GPL(gpiochip_get_data); int gpiochip_get_ngpios(struct gpio_chip *gc, struct device *dev) { u32 ngpios = gc->ngpio; int ret; if (ngpios == 0) { ret = device_property_read_u32(dev, "ngpios", &ngpios); if (ret == -ENODATA) /* * -ENODATA means that there is no property found and * we want to issue the error message to the user. * Besides that, we want to return different error code * to state that supplied value is not valid. */ ngpios = 0; else if (ret) return ret; gc->ngpio = ngpios; } if (gc->ngpio == 0) { chip_err(gc, "tried to insert a GPIO chip with zero lines\n"); return -EINVAL; } if (gc->ngpio > FASTPATH_NGPIO) chip_warn(gc, "line cnt %u is greater than fast path cnt %u\n", gc->ngpio, FASTPATH_NGPIO); return 0; } EXPORT_SYMBOL_GPL(gpiochip_get_ngpios); int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key) { struct gpio_device *gdev; unsigned int desc_index; int base = 0; int ret = 0; /* * First: allocate and populate the internal stat container, and * set up the struct device. */ gdev = kzalloc(sizeof(*gdev), GFP_KERNEL); if (!gdev) return -ENOMEM; gdev->dev.type = &gpio_dev_type; gdev->dev.bus = &gpio_bus_type; gdev->dev.parent = gc->parent; rcu_assign_pointer(gdev->chip, gc); gc->gpiodev = gdev; gpiochip_set_data(gc, data); /* * If the calling driver did not initialize firmware node, * do it here using the parent device, if any. */ if (gc->fwnode) device_set_node(&gdev->dev, gc->fwnode); else if (gc->parent) device_set_node(&gdev->dev, dev_fwnode(gc->parent)); gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL); if (gdev->id < 0) { ret = gdev->id; goto err_free_gdev; } ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id); if (ret) goto err_free_ida; if (gc->parent && gc->parent->driver) gdev->owner = gc->parent->driver->owner; else if (gc->owner) /* TODO: remove chip->owner */ gdev->owner = gc->owner; else gdev->owner = THIS_MODULE; ret = gpiochip_get_ngpios(gc, &gdev->dev); if (ret) goto err_free_dev_name; gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL); if (!gdev->descs) { ret = -ENOMEM; goto err_free_dev_name; } gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL); if (!gdev->label) { ret = -ENOMEM; goto err_free_descs; } gdev->ngpio = gc->ngpio; gdev->can_sleep = gc->can_sleep; scoped_guard(mutex, &gpio_devices_lock) { /* * TODO: this allocates a Linux GPIO number base in the global * GPIO numberspace for this chip. In the long run we want to * get *rid* of this numberspace and use only descriptors, but * it may be a pipe dream. It will not happen before we get rid * of the sysfs interface anyways. */ base = gc->base; if (base < 0) { base = gpiochip_find_base_unlocked(gc->ngpio); if (base < 0) { ret = base; base = 0; goto err_free_label; } /* * TODO: it should not be necessary to reflect the * assigned base outside of the GPIO subsystem. Go over * drivers and see if anyone makes use of this, else * drop this and assign a poison instead. */ gc->base = base; } else { dev_warn(&gdev->dev, "Static allocation of GPIO base is deprecated, use dynamic allocation.\n"); } gdev->base = base; ret = gpiodev_add_to_list_unlocked(gdev); if (ret) { chip_err(gc, "GPIO integer space overlap, cannot add chip\n"); goto err_free_label; } } ATOMIC_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier); BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier); ret = init_srcu_struct(&gdev->srcu); if (ret) goto err_remove_from_list; ret = init_srcu_struct(&gdev->desc_srcu); if (ret) goto err_cleanup_gdev_srcu; #ifdef CONFIG_PINCTRL INIT_LIST_HEAD(&gdev->pin_ranges); #endif if (gc->names) gpiochip_set_desc_names(gc); ret = gpiochip_set_names(gc); if (ret) goto err_cleanup_desc_srcu; ret = gpiochip_init_valid_mask(gc); if (ret) goto err_cleanup_desc_srcu; for (desc_index = 0; desc_index < gc->ngpio; desc_index++) { struct gpio_desc *desc = &gdev->descs[desc_index]; desc->gdev = gdev; if (gc->get_direction && gpiochip_line_is_valid(gc, desc_index)) { assign_bit(FLAG_IS_OUT, &desc->flags, !gc->get_direction(gc, desc_index)); } else { assign_bit(FLAG_IS_OUT, &desc->flags, !gc->direction_input); } } ret = of_gpiochip_add(gc); if (ret) goto err_free_valid_mask; ret = gpiochip_add_pin_ranges(gc); if (ret) goto err_remove_of_chip; acpi_gpiochip_add(gc); machine_gpiochip_add(gc); ret = gpiochip_irqchip_init_valid_mask(gc); if (ret) goto err_free_hogs; ret = gpiochip_irqchip_init_hw(gc); if (ret) goto err_remove_irqchip_mask; ret = gpiochip_add_irqchip(gc, lock_key, request_key); if (ret) goto err_remove_irqchip_mask; /* * By first adding the chardev, and then adding the device, * we get a device node entry in sysfs under * /sys/bus/gpio/devices/gpiochipN/dev that can be used for * coldplug of device nodes and other udev business. * We can do this only if gpiolib has been initialized. * Otherwise, defer until later. */ if (gpiolib_initialized) { ret = gpiochip_setup_dev(gdev); if (ret) goto err_remove_irqchip; } return 0; err_remove_irqchip: gpiochip_irqchip_remove(gc); err_remove_irqchip_mask: gpiochip_irqchip_free_valid_mask(gc); err_free_hogs: gpiochip_free_hogs(gc); acpi_gpiochip_remove(gc); gpiochip_remove_pin_ranges(gc); err_remove_of_chip: of_gpiochip_remove(gc); err_free_valid_mask: gpiochip_free_valid_mask(gc); err_cleanup_desc_srcu: cleanup_srcu_struct(&gdev->desc_srcu); err_cleanup_gdev_srcu: cleanup_srcu_struct(&gdev->srcu); err_remove_from_list: scoped_guard(mutex, &gpio_devices_lock) list_del_rcu(&gdev->list); synchronize_srcu(&gpio_devices_srcu); if (gdev->dev.release) { /* release() has been registered by gpiochip_setup_dev() */ gpio_device_put(gdev); goto err_print_message; } err_free_label: kfree_const(gdev->label); err_free_descs: kfree(gdev->descs); err_free_dev_name: kfree(dev_name(&gdev->dev)); err_free_ida: ida_free(&gpio_ida, gdev->id); err_free_gdev: kfree(gdev); err_print_message: /* failures here can mean systems won't boot... */ if (ret != -EPROBE_DEFER) { pr_err("%s: GPIOs %d..%d (%s) failed to register, %d\n", __func__, base, base + (int)gc->ngpio - 1, gc->label ? : "generic", ret); } return ret; } EXPORT_SYMBOL_GPL(gpiochip_add_data_with_key); /** * gpiochip_remove() - unregister a gpio_chip * @gc: the chip to unregister * * A gpio_chip with any GPIOs still requested may not be removed. */ void gpiochip_remove(struct gpio_chip *gc) { struct gpio_device *gdev = gc->gpiodev; /* FIXME: should the legacy sysfs handling be moved to gpio_device? */ gpiochip_sysfs_unregister(gdev); gpiochip_free_hogs(gc); gpiochip_free_remaining_irqs(gc); scoped_guard(mutex, &gpio_devices_lock) list_del_rcu(&gdev->list); synchronize_srcu(&gpio_devices_srcu); /* Numb the device, cancelling all outstanding operations */ rcu_assign_pointer(gdev->chip, NULL); synchronize_srcu(&gdev->srcu); gpiochip_irqchip_remove(gc); acpi_gpiochip_remove(gc); of_gpiochip_remove(gc); gpiochip_remove_pin_ranges(gc); gpiochip_free_valid_mask(gc); /* * We accept no more calls into the driver from this point, so * NULL the driver data pointer. */ gpiochip_set_data(gc, NULL); /* * The gpiochip side puts its use of the device to rest here: * if there are no userspace clients, the chardev and device will * be removed, else it will be dangling until the last user is * gone. */ gcdev_unregister(gdev); gpio_device_put(gdev); } EXPORT_SYMBOL_GPL(gpiochip_remove); /** * gpio_device_find() - find a specific GPIO device * @data: data to pass to match function * @match: Callback function to check gpio_chip * * Returns: * New reference to struct gpio_device. * * Similar to bus_find_device(). It returns a reference to a gpio_device as * determined by a user supplied @match callback. The callback should return * 0 if the device doesn't match and non-zero if it does. If the callback * returns non-zero, this function will return to the caller and not iterate * over any more gpio_devices. * * The callback takes the GPIO chip structure as argument. During the execution * of the callback function the chip is protected from being freed. TODO: This * actually has yet to be implemented. * * If the function returns non-NULL, the returned reference must be freed by * the caller using gpio_device_put(). */ struct gpio_device *gpio_device_find(const void *data, int (*match)(struct gpio_chip *gc, const void *data)) { struct gpio_device *gdev; struct gpio_chip *gc; might_sleep(); guard(srcu)(&gpio_devices_srcu); list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { if (!device_is_registered(&gdev->dev)) continue; guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); if (gc && match(gc, data)) return gpio_device_get(gdev); } return NULL; } EXPORT_SYMBOL_GPL(gpio_device_find); static int gpio_chip_match_by_label(struct gpio_chip *gc, const void *label) { return gc->label && !strcmp(gc->label, label); } /** * gpio_device_find_by_label() - wrapper around gpio_device_find() finding the * GPIO device by its backing chip's label * @label: Label to lookup * * Returns: * Reference to the GPIO device or NULL. Reference must be released with * gpio_device_put(). */ struct gpio_device *gpio_device_find_by_label(const char *label) { return gpio_device_find((void *)label, gpio_chip_match_by_label); } EXPORT_SYMBOL_GPL(gpio_device_find_by_label); static int gpio_chip_match_by_fwnode(struct gpio_chip *gc, const void *fwnode) { return device_match_fwnode(&gc->gpiodev->dev, fwnode); } /** * gpio_device_find_by_fwnode() - wrapper around gpio_device_find() finding * the GPIO device by its fwnode * @fwnode: Firmware node to lookup * * Returns: * Reference to the GPIO device or NULL. Reference must be released with * gpio_device_put(). */ struct gpio_device *gpio_device_find_by_fwnode(const struct fwnode_handle *fwnode) { return gpio_device_find((void *)fwnode, gpio_chip_match_by_fwnode); } EXPORT_SYMBOL_GPL(gpio_device_find_by_fwnode); /** * gpio_device_get() - Increase the reference count of this GPIO device * @gdev: GPIO device to increase the refcount for * * Returns: * Pointer to @gdev. */ struct gpio_device *gpio_device_get(struct gpio_device *gdev) { return to_gpio_device(get_device(&gdev->dev)); } EXPORT_SYMBOL_GPL(gpio_device_get); /** * gpio_device_put() - Decrease the reference count of this GPIO device and * possibly free all resources associated with it. * @gdev: GPIO device to decrease the reference count for */ void gpio_device_put(struct gpio_device *gdev) { put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gpio_device_put); /** * gpio_device_to_device() - Retrieve the address of the underlying struct * device. * @gdev: GPIO device for which to return the address. * * This does not increase the reference count of the GPIO device nor the * underlying struct device. * * Returns: * Address of struct device backing this GPIO device. */ struct device *gpio_device_to_device(struct gpio_device *gdev) { return &gdev->dev; } EXPORT_SYMBOL_GPL(gpio_device_to_device); #ifdef CONFIG_GPIOLIB_IRQCHIP /* * The following is irqchip helper code for gpiochips. */ static int gpiochip_irqchip_init_hw(struct gpio_chip *gc) { struct gpio_irq_chip *girq = &gc->irq; if (!girq->init_hw) return 0; return girq->init_hw(gc); } static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc) { struct gpio_irq_chip *girq = &gc->irq; if (!girq->init_valid_mask) return 0; girq->valid_mask = gpiochip_allocate_mask(gc); if (!girq->valid_mask) return -ENOMEM; girq->init_valid_mask(gc, girq->valid_mask, gc->ngpio); return 0; } static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc) { gpiochip_free_mask(&gc->irq.valid_mask); } static bool gpiochip_irqchip_irq_valid(const struct gpio_chip *gc, unsigned int offset) { if (!gpiochip_line_is_valid(gc, offset)) return false; /* No mask means all valid */ if (likely(!gc->irq.valid_mask)) return true; return test_bit(offset, gc->irq.valid_mask); } #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /** * gpiochip_set_hierarchical_irqchip() - connects a hierarchical irqchip * to a gpiochip * @gc: the gpiochip to set the irqchip hierarchical handler to * @irqchip: the irqchip to handle this level of the hierarchy, the interrupt * will then percolate up to the parent */ static void gpiochip_set_hierarchical_irqchip(struct gpio_chip *gc, struct irq_chip *irqchip) { /* DT will deal with mapping each IRQ as we go along */ if (is_of_node(gc->irq.fwnode)) return; /* * This is for legacy and boardfile "irqchip" fwnodes: allocate * irqs upfront instead of dynamically since we don't have the * dynamic type of allocation that hardware description languages * provide. Once all GPIO drivers using board files are gone from * the kernel we can delete this code, but for a transitional period * it is necessary to keep this around. */ if (is_fwnode_irqchip(gc->irq.fwnode)) { int i; int ret; for (i = 0; i < gc->ngpio; i++) { struct irq_fwspec fwspec; unsigned int parent_hwirq; unsigned int parent_type; struct gpio_irq_chip *girq = &gc->irq; /* * We call the child to parent translation function * only to check if the child IRQ is valid or not. * Just pick the rising edge type here as that is what * we likely need to support. */ ret = girq->child_to_parent_hwirq(gc, i, IRQ_TYPE_EDGE_RISING, &parent_hwirq, &parent_type); if (ret) { chip_err(gc, "skip set-up on hwirq %d\n", i); continue; } fwspec.fwnode = gc->irq.fwnode; /* This is the hwirq for the GPIO line side of things */ fwspec.param[0] = girq->child_offset_to_irq(gc, i); /* Just pick something */ fwspec.param[1] = IRQ_TYPE_EDGE_RISING; fwspec.param_count = 2; ret = irq_domain_alloc_irqs(gc->irq.domain, 1, NUMA_NO_NODE, &fwspec); if (ret < 0) { chip_err(gc, "can not allocate irq for GPIO line %d parent hwirq %d in hierarchy domain: %d\n", i, parent_hwirq, ret); } } } chip_err(gc, "%s unknown fwnode type proceed anyway\n", __func__); return; } static int gpiochip_hierarchy_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { /* We support standard DT translation */ if (is_of_node(fwspec->fwnode) && fwspec->param_count == 2) { return irq_domain_translate_twocell(d, fwspec, hwirq, type); } /* This is for board files and others not using DT */ if (is_fwnode_irqchip(fwspec->fwnode)) { int ret; ret = irq_domain_translate_twocell(d, fwspec, hwirq, type); if (ret) return ret; WARN_ON(*type == IRQ_TYPE_NONE); return 0; } return -EINVAL; } static int gpiochip_hierarchy_irq_domain_alloc(struct irq_domain *d, unsigned int irq, unsigned int nr_irqs, void *data) { struct gpio_chip *gc = d->host_data; irq_hw_number_t hwirq; unsigned int type = IRQ_TYPE_NONE; struct irq_fwspec *fwspec = data; union gpio_irq_fwspec gpio_parent_fwspec = {}; unsigned int parent_hwirq; unsigned int parent_type; struct gpio_irq_chip *girq = &gc->irq; int ret; /* * The nr_irqs parameter is always one except for PCI multi-MSI * so this should not happen. */ WARN_ON(nr_irqs != 1); ret = gc->irq.child_irq_domain_ops.translate(d, fwspec, &hwirq, &type); if (ret) return ret; chip_dbg(gc, "allocate IRQ %d, hwirq %lu\n", irq, hwirq); ret = girq->child_to_parent_hwirq(gc, hwirq, type, &parent_hwirq, &parent_type); if (ret) { chip_err(gc, "can't look up hwirq %lu\n", hwirq); return ret; } chip_dbg(gc, "found parent hwirq %u\n", parent_hwirq); /* * We set handle_bad_irq because the .set_type() should * always be invoked and set the right type of handler. */ irq_domain_set_info(d, irq, hwirq, gc->irq.chip, gc, girq->handler, NULL, NULL); irq_set_probe(irq); /* This parent only handles asserted level IRQs */ ret = girq->populate_parent_alloc_arg(gc, &gpio_parent_fwspec, parent_hwirq, parent_type); if (ret) return ret; chip_dbg(gc, "alloc_irqs_parent for %d parent hwirq %d\n", irq, parent_hwirq); irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key); ret = irq_domain_alloc_irqs_parent(d, irq, 1, &gpio_parent_fwspec); /* * If the parent irqdomain is msi, the interrupts have already * been allocated, so the EEXIST is good. */ if (irq_domain_is_msi(d->parent) && (ret == -EEXIST)) ret = 0; if (ret) chip_err(gc, "failed to allocate parent hwirq %d for hwirq %lu\n", parent_hwirq, hwirq); return ret; } static unsigned int gpiochip_child_offset_to_irq_noop(struct gpio_chip *gc, unsigned int offset) { return offset; } /** * gpiochip_irq_domain_activate() - Lock a GPIO to be used as an IRQ * @domain: The IRQ domain used by this IRQ chip * @data: Outermost irq_data associated with the IRQ * @reserve: If set, only reserve an interrupt vector instead of assigning one * * This function is a wrapper that calls gpiochip_lock_as_irq() and is to be * used as the activate function for the &struct irq_domain_ops. The host_data * for the IRQ domain must be the &struct gpio_chip. * * Returns: * 0 on success, or negative errno on failure. */ static int gpiochip_irq_domain_activate(struct irq_domain *domain, struct irq_data *data, bool reserve) { struct gpio_chip *gc = domain->host_data; unsigned int hwirq = irqd_to_hwirq(data); return gpiochip_lock_as_irq(gc, hwirq); } /** * gpiochip_irq_domain_deactivate() - Unlock a GPIO used as an IRQ * @domain: The IRQ domain used by this IRQ chip * @data: Outermost irq_data associated with the IRQ * * This function is a wrapper that will call gpiochip_unlock_as_irq() and is to * be used as the deactivate function for the &struct irq_domain_ops. The * host_data for the IRQ domain must be the &struct gpio_chip. */ static void gpiochip_irq_domain_deactivate(struct irq_domain *domain, struct irq_data *data) { struct gpio_chip *gc = domain->host_data; unsigned int hwirq = irqd_to_hwirq(data); return gpiochip_unlock_as_irq(gc, hwirq); } static void gpiochip_hierarchy_setup_domain_ops(struct irq_domain_ops *ops) { ops->activate = gpiochip_irq_domain_activate; ops->deactivate = gpiochip_irq_domain_deactivate; ops->alloc = gpiochip_hierarchy_irq_domain_alloc; /* * We only allow overriding the translate() and free() functions for * hierarchical chips, and this should only be done if the user * really need something other than 1:1 translation for translate() * callback and free if user wants to free up any resources which * were allocated during callbacks, for example populate_parent_alloc_arg. */ if (!ops->translate) ops->translate = gpiochip_hierarchy_irq_domain_translate; if (!ops->free) ops->free = irq_domain_free_irqs_common; } static struct irq_domain *gpiochip_hierarchy_create_domain(struct gpio_chip *gc) { struct irq_domain *domain; if (!gc->irq.child_to_parent_hwirq || !gc->irq.fwnode) { chip_err(gc, "missing irqdomain vital data\n"); return ERR_PTR(-EINVAL); } if (!gc->irq.child_offset_to_irq) gc->irq.child_offset_to_irq = gpiochip_child_offset_to_irq_noop; if (!gc->irq.populate_parent_alloc_arg) gc->irq.populate_parent_alloc_arg = gpiochip_populate_parent_fwspec_twocell; gpiochip_hierarchy_setup_domain_ops(&gc->irq.child_irq_domain_ops); domain = irq_domain_create_hierarchy( gc->irq.parent_domain, 0, gc->ngpio, gc->irq.fwnode, &gc->irq.child_irq_domain_ops, gc); if (!domain) return ERR_PTR(-ENOMEM); gpiochip_set_hierarchical_irqchip(gc, gc->irq.chip); return domain; } static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc) { return !!gc->irq.parent_domain; } int gpiochip_populate_parent_fwspec_twocell(struct gpio_chip *gc, union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type) { struct irq_fwspec *fwspec = &gfwspec->fwspec; fwspec->fwnode = gc->irq.parent_domain->fwnode; fwspec->param_count = 2; fwspec->param[0] = parent_hwirq; fwspec->param[1] = parent_type; return 0; } EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_twocell); int gpiochip_populate_parent_fwspec_fourcell(struct gpio_chip *gc, union gpio_irq_fwspec *gfwspec, unsigned int parent_hwirq, unsigned int parent_type) { struct irq_fwspec *fwspec = &gfwspec->fwspec; fwspec->fwnode = gc->irq.parent_domain->fwnode; fwspec->param_count = 4; fwspec->param[0] = 0; fwspec->param[1] = parent_hwirq; fwspec->param[2] = 0; fwspec->param[3] = parent_type; return 0; } EXPORT_SYMBOL_GPL(gpiochip_populate_parent_fwspec_fourcell); #else static struct irq_domain *gpiochip_hierarchy_create_domain(struct gpio_chip *gc) { return ERR_PTR(-EINVAL); } static bool gpiochip_hierarchy_is_hierarchical(struct gpio_chip *gc) { return false; } #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ /** * gpiochip_irq_map() - maps an IRQ into a GPIO irqchip * @d: the irqdomain used by this irqchip * @irq: the global irq number used by this GPIO irqchip irq * @hwirq: the local IRQ/GPIO line offset on this gpiochip * * This function will set up the mapping for a certain IRQ line on a * gpiochip by assigning the gpiochip as chip data, and using the irqchip * stored inside the gpiochip. * * Returns: * 0 on success, or negative errno on failure. */ static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { struct gpio_chip *gc = d->host_data; int ret = 0; if (!gpiochip_irqchip_irq_valid(gc, hwirq)) return -ENXIO; irq_set_chip_data(irq, gc); /* * This lock class tells lockdep that GPIO irqs are in a different * category than their parents, so it won't report false recursion. */ irq_set_lockdep_class(irq, gc->irq.lock_key, gc->irq.request_key); irq_set_chip_and_handler(irq, gc->irq.chip, gc->irq.handler); /* Chips that use nested thread handlers have them marked */ if (gc->irq.threaded) irq_set_nested_thread(irq, 1); irq_set_noprobe(irq); if (gc->irq.num_parents == 1) ret = irq_set_parent(irq, gc->irq.parents[0]); else if (gc->irq.map) ret = irq_set_parent(irq, gc->irq.map[hwirq]); if (ret < 0) return ret; /* * No set-up of the hardware will happen if IRQ_TYPE_NONE * is passed as default type. */ if (gc->irq.default_type != IRQ_TYPE_NONE) irq_set_irq_type(irq, gc->irq.default_type); return 0; } static void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq) { struct gpio_chip *gc = d->host_data; if (gc->irq.threaded) irq_set_nested_thread(irq, 0); irq_set_chip_and_handler(irq, NULL, NULL); irq_set_chip_data(irq, NULL); } static const struct irq_domain_ops gpiochip_domain_ops = { .map = gpiochip_irq_map, .unmap = gpiochip_irq_unmap, /* Virtually all GPIO irqchips are twocell:ed */ .xlate = irq_domain_xlate_twocell, }; static struct irq_domain *gpiochip_simple_create_domain(struct gpio_chip *gc) { struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev); struct irq_domain *domain; domain = irq_domain_create_simple(fwnode, gc->ngpio, gc->irq.first, &gpiochip_domain_ops, gc); if (!domain) return ERR_PTR(-EINVAL); return domain; } static int gpiochip_to_irq(struct gpio_chip *gc, unsigned int offset) { struct irq_domain *domain = gc->irq.domain; #ifdef CONFIG_GPIOLIB_IRQCHIP /* * Avoid race condition with other code, which tries to lookup * an IRQ before the irqchip has been properly registered, * i.e. while gpiochip is still being brought up. */ if (!gc->irq.initialized) return -EPROBE_DEFER; #endif if (!gpiochip_irqchip_irq_valid(gc, offset)) return -ENXIO; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY if (irq_domain_is_hierarchy(domain)) { struct irq_fwspec spec; spec.fwnode = domain->fwnode; spec.param_count = 2; spec.param[0] = gc->irq.child_offset_to_irq(gc, offset); spec.param[1] = IRQ_TYPE_NONE; return irq_create_fwspec_mapping(&spec); } #endif return irq_create_mapping(domain, offset); } int gpiochip_irq_reqres(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); unsigned int hwirq = irqd_to_hwirq(d); return gpiochip_reqres_irq(gc, hwirq); } EXPORT_SYMBOL(gpiochip_irq_reqres); void gpiochip_irq_relres(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); unsigned int hwirq = irqd_to_hwirq(d); gpiochip_relres_irq(gc, hwirq); } EXPORT_SYMBOL(gpiochip_irq_relres); static void gpiochip_irq_mask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); unsigned int hwirq = irqd_to_hwirq(d); if (gc->irq.irq_mask) gc->irq.irq_mask(d); gpiochip_disable_irq(gc, hwirq); } static void gpiochip_irq_unmask(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); unsigned int hwirq = irqd_to_hwirq(d); gpiochip_enable_irq(gc, hwirq); if (gc->irq.irq_unmask) gc->irq.irq_unmask(d); } static void gpiochip_irq_enable(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); unsigned int hwirq = irqd_to_hwirq(d); gpiochip_enable_irq(gc, hwirq); gc->irq.irq_enable(d); } static void gpiochip_irq_disable(struct irq_data *d) { struct gpio_chip *gc = irq_data_get_irq_chip_data(d); unsigned int hwirq = irqd_to_hwirq(d); gc->irq.irq_disable(d); gpiochip_disable_irq(gc, hwirq); } static void gpiochip_set_irq_hooks(struct gpio_chip *gc) { struct irq_chip *irqchip = gc->irq.chip; if (irqchip->flags & IRQCHIP_IMMUTABLE) return; chip_warn(gc, "not an immutable chip, please consider fixing it!\n"); if (!irqchip->irq_request_resources && !irqchip->irq_release_resources) { irqchip->irq_request_resources = gpiochip_irq_reqres; irqchip->irq_release_resources = gpiochip_irq_relres; } if (WARN_ON(gc->irq.irq_enable)) return; /* Check if the irqchip already has this hook... */ if (irqchip->irq_enable == gpiochip_irq_enable || irqchip->irq_mask == gpiochip_irq_mask) { /* * ...and if so, give a gentle warning that this is bad * practice. */ chip_info(gc, "detected irqchip that is shared with multiple gpiochips: please fix the driver.\n"); return; } if (irqchip->irq_disable) { gc->irq.irq_disable = irqchip->irq_disable; irqchip->irq_disable = gpiochip_irq_disable; } else { gc->irq.irq_mask = irqchip->irq_mask; irqchip->irq_mask = gpiochip_irq_mask; } if (irqchip->irq_enable) { gc->irq.irq_enable = irqchip->irq_enable; irqchip->irq_enable = gpiochip_irq_enable; } else { gc->irq.irq_unmask = irqchip->irq_unmask; irqchip->irq_unmask = gpiochip_irq_unmask; } } static int gpiochip_irqchip_add_allocated_domain(struct gpio_chip *gc, struct irq_domain *domain, bool allocated_externally) { if (!domain) return -EINVAL; if (gc->to_irq) chip_warn(gc, "to_irq is redefined in %s and you shouldn't rely on it\n", __func__); gc->to_irq = gpiochip_to_irq; gc->irq.domain = domain; gc->irq.domain_is_allocated_externally = allocated_externally; /* * Using barrier() here to prevent compiler from reordering * gc->irq.initialized before adding irqdomain. */ barrier(); gc->irq.initialized = true; return 0; } /** * gpiochip_add_irqchip() - adds an IRQ chip to a GPIO chip * @gc: the GPIO chip to add the IRQ chip to * @lock_key: lockdep class for IRQ lock * @request_key: lockdep class for IRQ request * * Returns: * 0 on success, or a negative errno on failure. */ static int gpiochip_add_irqchip(struct gpio_chip *gc, struct lock_class_key *lock_key, struct lock_class_key *request_key) { struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev); struct irq_chip *irqchip = gc->irq.chip; struct irq_domain *domain; unsigned int type; unsigned int i; int ret; if (!irqchip) return 0; if (gc->irq.parent_handler && gc->can_sleep) { chip_err(gc, "you cannot have chained interrupts on a chip that may sleep\n"); return -EINVAL; } type = gc->irq.default_type; /* * Specifying a default trigger is a terrible idea if DT or ACPI is * used to configure the interrupts, as you may end up with * conflicting triggers. Tell the user, and reset to NONE. */ if (WARN(fwnode && type != IRQ_TYPE_NONE, "%pfw: Ignoring %u default trigger\n", fwnode, type)) type = IRQ_TYPE_NONE; gc->irq.default_type = type; gc->irq.lock_key = lock_key; gc->irq.request_key = request_key; /* If a parent irqdomain is provided, let's build a hierarchy */ if (gpiochip_hierarchy_is_hierarchical(gc)) { domain = gpiochip_hierarchy_create_domain(gc); } else { domain = gpiochip_simple_create_domain(gc); } if (IS_ERR(domain)) return PTR_ERR(domain); if (gc->irq.parent_handler) { for (i = 0; i < gc->irq.num_parents; i++) { void *data; if (gc->irq.per_parent_data) data = gc->irq.parent_handler_data_array[i]; else data = gc->irq.parent_handler_data ?: gc; /* * The parent IRQ chip is already using the chip_data * for this IRQ chip, so our callbacks simply use the * handler_data. */ irq_set_chained_handler_and_data(gc->irq.parents[i], gc->irq.parent_handler, data); } } gpiochip_set_irq_hooks(gc); ret = gpiochip_irqchip_add_allocated_domain(gc, domain, false); if (ret) return ret; acpi_gpiochip_request_interrupts(gc); return 0; } /** * gpiochip_irqchip_remove() - removes an irqchip added to a gpiochip * @gc: the gpiochip to remove the irqchip from * * This is called only from gpiochip_remove() */ static void gpiochip_irqchip_remove(struct gpio_chip *gc) { struct irq_chip *irqchip = gc->irq.chip; unsigned int offset; acpi_gpiochip_free_interrupts(gc); if (irqchip && gc->irq.parent_handler) { struct gpio_irq_chip *irq = &gc->irq; unsigned int i; for (i = 0; i < irq->num_parents; i++) irq_set_chained_handler_and_data(irq->parents[i], NULL, NULL); } /* Remove all IRQ mappings and delete the domain */ if (!gc->irq.domain_is_allocated_externally && gc->irq.domain) { unsigned int irq; for (offset = 0; offset < gc->ngpio; offset++) { if (!gpiochip_irqchip_irq_valid(gc, offset)) continue; irq = irq_find_mapping(gc->irq.domain, offset); irq_dispose_mapping(irq); } irq_domain_remove(gc->irq.domain); } if (irqchip && !(irqchip->flags & IRQCHIP_IMMUTABLE)) { if (irqchip->irq_request_resources == gpiochip_irq_reqres) { irqchip->irq_request_resources = NULL; irqchip->irq_release_resources = NULL; } if (irqchip->irq_enable == gpiochip_irq_enable) { irqchip->irq_enable = gc->irq.irq_enable; irqchip->irq_disable = gc->irq.irq_disable; } } gc->irq.irq_enable = NULL; gc->irq.irq_disable = NULL; gc->irq.chip = NULL; gpiochip_irqchip_free_valid_mask(gc); } /** * gpiochip_irqchip_add_domain() - adds an irqdomain to a gpiochip * @gc: the gpiochip to add the irqchip to * @domain: the irqdomain to add to the gpiochip * * This function adds an IRQ domain to the gpiochip. * * Returns: * 0 on success, or negative errno on failure. */ int gpiochip_irqchip_add_domain(struct gpio_chip *gc, struct irq_domain *domain) { return gpiochip_irqchip_add_allocated_domain(gc, domain, true); } EXPORT_SYMBOL_GPL(gpiochip_irqchip_add_domain); #else /* CONFIG_GPIOLIB_IRQCHIP */ static inline int gpiochip_add_irqchip(struct gpio_chip *gc, struct lock_class_key *lock_key, struct lock_class_key *request_key) { return 0; } static void gpiochip_irqchip_remove(struct gpio_chip *gc) {} static inline int gpiochip_irqchip_init_hw(struct gpio_chip *gc) { return 0; } static inline int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc) { return 0; } static inline void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc) { } #endif /* CONFIG_GPIOLIB_IRQCHIP */ /** * gpiochip_generic_request() - request the gpio function for a pin * @gc: the gpiochip owning the GPIO * @offset: the offset of the GPIO to request for GPIO function * * Returns: * 0 on success, or negative errno on failure. */ int gpiochip_generic_request(struct gpio_chip *gc, unsigned int offset) { #ifdef CONFIG_PINCTRL if (list_empty(&gc->gpiodev->pin_ranges)) return 0; #endif return pinctrl_gpio_request(gc, offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_request); /** * gpiochip_generic_free() - free the gpio function from a pin * @gc: the gpiochip to request the gpio function for * @offset: the offset of the GPIO to free from GPIO function */ void gpiochip_generic_free(struct gpio_chip *gc, unsigned int offset) { #ifdef CONFIG_PINCTRL if (list_empty(&gc->gpiodev->pin_ranges)) return; #endif pinctrl_gpio_free(gc, offset); } EXPORT_SYMBOL_GPL(gpiochip_generic_free); /** * gpiochip_generic_config() - apply configuration for a pin * @gc: the gpiochip owning the GPIO * @offset: the offset of the GPIO to apply the configuration * @config: the configuration to be applied * * Returns: * 0 on success, or negative errno on failure. */ int gpiochip_generic_config(struct gpio_chip *gc, unsigned int offset, unsigned long config) { #ifdef CONFIG_PINCTRL if (list_empty(&gc->gpiodev->pin_ranges)) return -ENOTSUPP; #endif return pinctrl_gpio_set_config(gc, offset, config); } EXPORT_SYMBOL_GPL(gpiochip_generic_config); #ifdef CONFIG_PINCTRL /** * gpiochip_add_pingroup_range() - add a range for GPIO <-> pin mapping * @gc: the gpiochip to add the range for * @pctldev: the pin controller to map to * @gpio_offset: the start offset in the current gpio_chip number space * @pin_group: name of the pin group inside the pin controller * * Calling this function directly from a DeviceTree-supported * pinctrl driver is DEPRECATED. Please see Section 2.1 of * Documentation/devicetree/bindings/gpio/gpio.txt on how to * bind pinctrl and gpio drivers via the "gpio-ranges" property. * * Returns: * 0 on success, or negative errno on failure. */ int gpiochip_add_pingroup_range(struct gpio_chip *gc, struct pinctrl_dev *pctldev, unsigned int gpio_offset, const char *pin_group) { struct gpio_pin_range *pin_range; struct gpio_device *gdev = gc->gpiodev; int ret; pin_range = kzalloc(sizeof(*pin_range), GFP_KERNEL); if (!pin_range) { chip_err(gc, "failed to allocate pin ranges\n"); return -ENOMEM; } /* Use local offset as range ID */ pin_range->range.id = gpio_offset; pin_range->range.gc = gc; pin_range->range.name = gc->label; pin_range->range.base = gdev->base + gpio_offset; pin_range->pctldev = pctldev; ret = pinctrl_get_group_pins(pctldev, pin_group, &pin_range->range.pins, &pin_range->range.npins); if (ret < 0) { kfree(pin_range); return ret; } pinctrl_add_gpio_range(pctldev, &pin_range->range); chip_dbg(gc, "created GPIO range %d->%d ==> %s PINGRP %s\n", gpio_offset, gpio_offset + pin_range->range.npins - 1, pinctrl_dev_get_devname(pctldev), pin_group); list_add_tail(&pin_range->node, &gdev->pin_ranges); return 0; } EXPORT_SYMBOL_GPL(gpiochip_add_pingroup_range); /** * gpiochip_add_pin_range() - add a range for GPIO <-> pin mapping * @gc: the gpiochip to add the range for * @pinctl_name: the dev_name() of the pin controller to map to * @gpio_offset: the start offset in the current gpio_chip number space * @pin_offset: the start offset in the pin controller number space * @npins: the number of pins from the offset of each pin space (GPIO and * pin controller) to accumulate in this range * * Calling this function directly from a DeviceTree-supported * pinctrl driver is DEPRECATED. Please see Section 2.1 of * Documentation/devicetree/bindings/gpio/gpio.txt on how to * bind pinctrl and gpio drivers via the "gpio-ranges" property. * * Returns: * 0 on success, or a negative errno on failure. */ int gpiochip_add_pin_range(struct gpio_chip *gc, const char *pinctl_name, unsigned int gpio_offset, unsigned int pin_offset, unsigned int npins) { struct gpio_pin_range *pin_range; struct gpio_device *gdev = gc->gpiodev; int ret; pin_range = kzalloc(sizeof(*pin_range), GFP_KERNEL); if (!pin_range) { chip_err(gc, "failed to allocate pin ranges\n"); return -ENOMEM; } /* Use local offset as range ID */ pin_range->range.id = gpio_offset; pin_range->range.gc = gc; pin_range->range.name = gc->label; pin_range->range.base = gdev->base + gpio_offset; pin_range->range.pin_base = pin_offset; pin_range->range.npins = npins; pin_range->pctldev = pinctrl_find_and_add_gpio_range(pinctl_name, &pin_range->range); if (IS_ERR(pin_range->pctldev)) { ret = PTR_ERR(pin_range->pctldev); chip_err(gc, "could not create pin range\n"); kfree(pin_range); return ret; } chip_dbg(gc, "created GPIO range %d->%d ==> %s PIN %d->%d\n", gpio_offset, gpio_offset + npins - 1, pinctl_name, pin_offset, pin_offset + npins - 1); list_add_tail(&pin_range->node, &gdev->pin_ranges); return 0; } EXPORT_SYMBOL_GPL(gpiochip_add_pin_range); /** * gpiochip_remove_pin_ranges() - remove all the GPIO <-> pin mappings * @gc: the chip to remove all the mappings for */ void gpiochip_remove_pin_ranges(struct gpio_chip *gc) { struct gpio_pin_range *pin_range, *tmp; struct gpio_device *gdev = gc->gpiodev; list_for_each_entry_safe(pin_range, tmp, &gdev->pin_ranges, node) { list_del(&pin_range->node); pinctrl_remove_gpio_range(pin_range->pctldev, &pin_range->range); kfree(pin_range); } } EXPORT_SYMBOL_GPL(gpiochip_remove_pin_ranges); #endif /* CONFIG_PINCTRL */ /* These "optional" allocation calls help prevent drivers from stomping * on each other, and help provide better diagnostics in debugfs. * They're called even less than the "set direction" calls. */ static int gpiod_request_commit(struct gpio_desc *desc, const char *label) { unsigned int offset; int ret; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; if (test_and_set_bit(FLAG_REQUESTED, &desc->flags)) return -EBUSY; /* NOTE: gpio_request() can be called in early boot, * before IRQs are enabled, for non-sleeping (SOC) GPIOs. */ if (guard.gc->request) { offset = gpio_chip_hwgpio(desc); if (gpiochip_line_is_valid(guard.gc, offset)) ret = guard.gc->request(guard.gc, offset); else ret = -EINVAL; if (ret) goto out_clear_bit; } if (guard.gc->get_direction) gpiod_get_direction(desc); ret = desc_set_label(desc, label ? : "?"); if (ret) goto out_clear_bit; return 0; out_clear_bit: clear_bit(FLAG_REQUESTED, &desc->flags); return ret; } /* * This descriptor validation needs to be inserted verbatim into each * function taking a descriptor, so we need to use a preprocessor * macro to avoid endless duplication. If the desc is NULL it is an * optional GPIO and calls should just bail out. */ static int validate_desc(const struct gpio_desc *desc, const char *func) { if (!desc) return 0; if (IS_ERR(desc)) { pr_warn("%s: invalid GPIO (errorpointer)\n", func); return PTR_ERR(desc); } return 1; } #define VALIDATE_DESC(desc) do { \ int __valid = validate_desc(desc, __func__); \ if (__valid <= 0) \ return __valid; \ } while (0) #define VALIDATE_DESC_VOID(desc) do { \ int __valid = validate_desc(desc, __func__); \ if (__valid <= 0) \ return; \ } while (0) int gpiod_request(struct gpio_desc *desc, const char *label) { int ret = -EPROBE_DEFER; VALIDATE_DESC(desc); if (try_module_get(desc->gdev->owner)) { ret = gpiod_request_commit(desc, label); if (ret) module_put(desc->gdev->owner); else gpio_device_get(desc->gdev); } if (ret) gpiod_dbg(desc, "%s: status %d\n", __func__, ret); return ret; } static void gpiod_free_commit(struct gpio_desc *desc) { unsigned long flags; might_sleep(); CLASS(gpio_chip_guard, guard)(desc); flags = READ_ONCE(desc->flags); if (guard.gc && test_bit(FLAG_REQUESTED, &flags)) { if (guard.gc->free) guard.gc->free(guard.gc, gpio_chip_hwgpio(desc)); clear_bit(FLAG_ACTIVE_LOW, &flags); clear_bit(FLAG_REQUESTED, &flags); clear_bit(FLAG_OPEN_DRAIN, &flags); clear_bit(FLAG_OPEN_SOURCE, &flags); clear_bit(FLAG_PULL_UP, &flags); clear_bit(FLAG_PULL_DOWN, &flags); clear_bit(FLAG_BIAS_DISABLE, &flags); clear_bit(FLAG_EDGE_RISING, &flags); clear_bit(FLAG_EDGE_FALLING, &flags); clear_bit(FLAG_IS_HOGGED, &flags); #ifdef CONFIG_OF_DYNAMIC WRITE_ONCE(desc->hog, NULL); #endif desc_set_label(desc, NULL); WRITE_ONCE(desc->flags, flags); #ifdef CONFIG_GPIO_CDEV WRITE_ONCE(desc->debounce_period_us, 0); #endif gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_RELEASED); } } void gpiod_free(struct gpio_desc *desc) { VALIDATE_DESC_VOID(desc); gpiod_free_commit(desc); module_put(desc->gdev->owner); gpio_device_put(desc->gdev); } /** * gpiochip_dup_line_label - Get a copy of the consumer label. * @gc: GPIO chip controlling this line. * @offset: Hardware offset of the line. * * Returns: * Pointer to a copy of the consumer label if the line is requested or NULL * if it's not. If a valid pointer was returned, it must be freed using * kfree(). In case of a memory allocation error, the function returns %ENOMEM. * * Must not be called from atomic context. */ char *gpiochip_dup_line_label(struct gpio_chip *gc, unsigned int offset) { struct gpio_desc *desc; char *label; desc = gpiochip_get_desc(gc, offset); if (IS_ERR(desc)) return NULL; if (!test_bit(FLAG_REQUESTED, &desc->flags)) return NULL; guard(srcu)(&desc->gdev->desc_srcu); label = kstrdup(gpiod_get_label(desc), GFP_KERNEL); if (!label) return ERR_PTR(-ENOMEM); return label; } EXPORT_SYMBOL_GPL(gpiochip_dup_line_label); static inline const char *function_name_or_default(const char *con_id) { return con_id ?: "(default)"; } /** * gpiochip_request_own_desc - Allow GPIO chip to request its own descriptor * @gc: GPIO chip * @hwnum: hardware number of the GPIO for which to request the descriptor * @label: label for the GPIO * @lflags: lookup flags for this GPIO or 0 if default, this can be used to * specify things like line inversion semantics with the machine flags * such as GPIO_OUT_LOW * @dflags: descriptor request flags for this GPIO or 0 if default, this * can be used to specify consumer semantics such as open drain * * Function allows GPIO chip drivers to request and use their own GPIO * descriptors via gpiolib API. Difference to gpiod_request() is that this * function will not increase reference count of the GPIO chip module. This * allows the GPIO chip module to be unloaded as needed (we assume that the * GPIO chip driver handles freeing the GPIOs it has requested). * * Returns: * A pointer to the GPIO descriptor, or an ERR_PTR()-encoded negative error * code on failure. */ struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *gc, unsigned int hwnum, const char *label, enum gpio_lookup_flags lflags, enum gpiod_flags dflags) { struct gpio_desc *desc = gpiochip_get_desc(gc, hwnum); const char *name = function_name_or_default(label); int ret; if (IS_ERR(desc)) { chip_err(gc, "failed to get GPIO %s descriptor\n", name); return desc; } ret = gpiod_request_commit(desc, label); if (ret < 0) return ERR_PTR(ret); ret = gpiod_configure_flags(desc, label, lflags, dflags); if (ret) { gpiod_free_commit(desc); chip_err(gc, "setup of own GPIO %s failed\n", name); return ERR_PTR(ret); } gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_REQUESTED); return desc; } EXPORT_SYMBOL_GPL(gpiochip_request_own_desc); /** * gpiochip_free_own_desc - Free GPIO requested by the chip driver * @desc: GPIO descriptor to free * * Function frees the given GPIO requested previously with * gpiochip_request_own_desc(). */ void gpiochip_free_own_desc(struct gpio_desc *desc) { if (desc) gpiod_free_commit(desc); } EXPORT_SYMBOL_GPL(gpiochip_free_own_desc); /* * Drivers MUST set GPIO direction before making get/set calls. In * some cases this is done in early boot, before IRQs are enabled. * * As a rule these aren't called more than once (except for drivers * using the open-drain emulation idiom) so these are natural places * to accumulate extra debugging checks. Note that we can't (yet) * rely on gpio_request() having been called beforehand. */ int gpio_do_set_config(struct gpio_desc *desc, unsigned long config) { int ret; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; if (!guard.gc->set_config) return -ENOTSUPP; ret = guard.gc->set_config(guard.gc, gpio_chip_hwgpio(desc), config); #ifdef CONFIG_GPIO_CDEV /* * Special case - if we're setting debounce period, we need to store * it in the descriptor in case user-space wants to know it. */ if (!ret && pinconf_to_config_param(config) == PIN_CONFIG_INPUT_DEBOUNCE) WRITE_ONCE(desc->debounce_period_us, pinconf_to_config_argument(config)); #endif return ret; } static int gpio_set_config_with_argument(struct gpio_desc *desc, enum pin_config_param mode, u32 argument) { unsigned long config; config = pinconf_to_config_packed(mode, argument); return gpio_do_set_config(desc, config); } static int gpio_set_config_with_argument_optional(struct gpio_desc *desc, enum pin_config_param mode, u32 argument) { struct device *dev = &desc->gdev->dev; int gpio = gpio_chip_hwgpio(desc); int ret; ret = gpio_set_config_with_argument(desc, mode, argument); if (ret != -ENOTSUPP) return ret; switch (mode) { case PIN_CONFIG_PERSIST_STATE: dev_dbg(dev, "Persistence not supported for GPIO %d\n", gpio); break; default: break; } return 0; } static int gpio_set_config(struct gpio_desc *desc, enum pin_config_param mode) { return gpio_set_config_with_argument(desc, mode, 0); } static int gpio_set_bias(struct gpio_desc *desc) { enum pin_config_param bias; unsigned long flags; unsigned int arg; flags = READ_ONCE(desc->flags); if (test_bit(FLAG_BIAS_DISABLE, &flags)) bias = PIN_CONFIG_BIAS_DISABLE; else if (test_bit(FLAG_PULL_UP, &flags)) bias = PIN_CONFIG_BIAS_PULL_UP; else if (test_bit(FLAG_PULL_DOWN, &flags)) bias = PIN_CONFIG_BIAS_PULL_DOWN; else return 0; switch (bias) { case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: arg = 1; break; default: arg = 0; break; } return gpio_set_config_with_argument_optional(desc, bias, arg); } /** * gpio_set_debounce_timeout() - Set debounce timeout * @desc: GPIO descriptor to set the debounce timeout * @debounce: Debounce timeout in microseconds * * The function calls the certain GPIO driver to set debounce timeout * in the hardware. * * Returns: * 0 on success, or negative errno on failure. */ int gpio_set_debounce_timeout(struct gpio_desc *desc, unsigned int debounce) { int ret; ret = gpio_set_config_with_argument_optional(desc, PIN_CONFIG_INPUT_DEBOUNCE, debounce); if (!ret) gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); return ret; } /** * gpiod_direction_input - set the GPIO direction to input * @desc: GPIO to set to input * * Set the direction of the passed GPIO to input, such as gpiod_get_value() can * be called safely on it. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_direction_input(struct gpio_desc *desc) { int ret; VALIDATE_DESC(desc); ret = gpiod_direction_input_nonotify(desc); if (ret == 0) gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); return ret; } EXPORT_SYMBOL_GPL(gpiod_direction_input); int gpiod_direction_input_nonotify(struct gpio_desc *desc) { int ret = 0; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; /* * It is legal to have no .get() and .direction_input() specified if * the chip is output-only, but you can't specify .direction_input() * and not support the .get() operation, that doesn't make sense. */ if (!guard.gc->get && guard.gc->direction_input) { gpiod_warn(desc, "%s: missing get() but have direction_input()\n", __func__); return -EIO; } /* * If we have a .direction_input() callback, things are simple, * just call it. Else we are some input-only chip so try to check the * direction (if .get_direction() is supported) else we silently * assume we are in input mode after this. */ if (guard.gc->direction_input) { ret = guard.gc->direction_input(guard.gc, gpio_chip_hwgpio(desc)); } else if (guard.gc->get_direction && (guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc)) != 1)) { gpiod_warn(desc, "%s: missing direction_input() operation and line is output\n", __func__); return -EIO; } if (ret == 0) { clear_bit(FLAG_IS_OUT, &desc->flags); ret = gpio_set_bias(desc); } trace_gpio_direction(desc_to_gpio(desc), 1, ret); return ret; } static int gpiod_direction_output_raw_commit(struct gpio_desc *desc, int value) { int val = !!value, ret = 0; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; /* * It's OK not to specify .direction_output() if the gpiochip is * output-only, but if there is then not even a .set() operation it * is pretty tricky to drive the output line. */ if (!guard.gc->set && !guard.gc->direction_output) { gpiod_warn(desc, "%s: missing set() and direction_output() operations\n", __func__); return -EIO; } if (guard.gc->direction_output) { ret = guard.gc->direction_output(guard.gc, gpio_chip_hwgpio(desc), val); } else { /* Check that we are in output mode if we can */ if (guard.gc->get_direction && guard.gc->get_direction(guard.gc, gpio_chip_hwgpio(desc))) { gpiod_warn(desc, "%s: missing direction_output() operation\n", __func__); return -EIO; } /* * If we can't actively set the direction, we are some * output-only chip, so just drive the output as desired. */ guard.gc->set(guard.gc, gpio_chip_hwgpio(desc), val); } if (!ret) set_bit(FLAG_IS_OUT, &desc->flags); trace_gpio_value(desc_to_gpio(desc), 0, val); trace_gpio_direction(desc_to_gpio(desc), 0, ret); return ret; } /** * gpiod_direction_output_raw - set the GPIO direction to output * @desc: GPIO to set to output * @value: initial output value of the GPIO * * Set the direction of the passed GPIO to output, such as gpiod_set_value() can * be called safely on it. The initial value of the output must be specified * as raw value on the physical line without regard for the ACTIVE_LOW status. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_direction_output_raw(struct gpio_desc *desc, int value) { int ret; VALIDATE_DESC(desc); ret = gpiod_direction_output_raw_commit(desc, value); if (ret == 0) gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); return ret; } EXPORT_SYMBOL_GPL(gpiod_direction_output_raw); /** * gpiod_direction_output - set the GPIO direction to output * @desc: GPIO to set to output * @value: initial output value of the GPIO * * Set the direction of the passed GPIO to output, such as gpiod_set_value() can * be called safely on it. The initial value of the output must be specified * as the logical value of the GPIO, i.e. taking its ACTIVE_LOW status into * account. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_direction_output(struct gpio_desc *desc, int value) { int ret; VALIDATE_DESC(desc); ret = gpiod_direction_output_nonotify(desc, value); if (ret == 0) gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); return ret; } EXPORT_SYMBOL_GPL(gpiod_direction_output); int gpiod_direction_output_nonotify(struct gpio_desc *desc, int value) { unsigned long flags; int ret; flags = READ_ONCE(desc->flags); if (test_bit(FLAG_ACTIVE_LOW, &flags)) value = !value; else value = !!value; /* GPIOs used for enabled IRQs shall not be set as output */ if (test_bit(FLAG_USED_AS_IRQ, &flags) && test_bit(FLAG_IRQ_IS_ENABLED, &flags)) { gpiod_err(desc, "%s: tried to set a GPIO tied to an IRQ as output\n", __func__); return -EIO; } if (test_bit(FLAG_OPEN_DRAIN, &flags)) { /* First see if we can enable open drain in hardware */ ret = gpio_set_config(desc, PIN_CONFIG_DRIVE_OPEN_DRAIN); if (!ret) goto set_output_value; /* Emulate open drain by not actively driving the line high */ if (value) { ret = gpiod_direction_input_nonotify(desc); goto set_output_flag; } } else if (test_bit(FLAG_OPEN_SOURCE, &flags)) { ret = gpio_set_config(desc, PIN_CONFIG_DRIVE_OPEN_SOURCE); if (!ret) goto set_output_value; /* Emulate open source by not actively driving the line low */ if (!value) { ret = gpiod_direction_input_nonotify(desc); goto set_output_flag; } } else { gpio_set_config(desc, PIN_CONFIG_DRIVE_PUSH_PULL); } set_output_value: ret = gpio_set_bias(desc); if (ret) return ret; return gpiod_direction_output_raw_commit(desc, value); set_output_flag: /* * When emulating open-source or open-drain functionalities by not * actively driving the line (setting mode to input) we still need to * set the IS_OUT flag or otherwise we won't be able to set the line * value anymore. */ if (ret == 0) set_bit(FLAG_IS_OUT, &desc->flags); return ret; } /** * gpiod_enable_hw_timestamp_ns - Enable hardware timestamp in nanoseconds. * * @desc: GPIO to enable. * @flags: Flags related to GPIO edge. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_enable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags) { int ret = 0; VALIDATE_DESC(desc); CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; if (!guard.gc->en_hw_timestamp) { gpiod_warn(desc, "%s: hw ts not supported\n", __func__); return -ENOTSUPP; } ret = guard.gc->en_hw_timestamp(guard.gc, gpio_chip_hwgpio(desc), flags); if (ret) gpiod_warn(desc, "%s: hw ts request failed\n", __func__); return ret; } EXPORT_SYMBOL_GPL(gpiod_enable_hw_timestamp_ns); /** * gpiod_disable_hw_timestamp_ns - Disable hardware timestamp. * * @desc: GPIO to disable. * @flags: Flags related to GPIO edge, same value as used during enable call. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_disable_hw_timestamp_ns(struct gpio_desc *desc, unsigned long flags) { int ret = 0; VALIDATE_DESC(desc); CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; if (!guard.gc->dis_hw_timestamp) { gpiod_warn(desc, "%s: hw ts not supported\n", __func__); return -ENOTSUPP; } ret = guard.gc->dis_hw_timestamp(guard.gc, gpio_chip_hwgpio(desc), flags); if (ret) gpiod_warn(desc, "%s: hw ts release failed\n", __func__); return ret; } EXPORT_SYMBOL_GPL(gpiod_disable_hw_timestamp_ns); /** * gpiod_set_config - sets @config for a GPIO * @desc: descriptor of the GPIO for which to set the configuration * @config: Same packed config format as generic pinconf * * Returns: * 0 on success, %-ENOTSUPP if the controller doesn't support setting the * configuration. */ int gpiod_set_config(struct gpio_desc *desc, unsigned long config) { int ret; VALIDATE_DESC(desc); ret = gpio_do_set_config(desc, config); if (!ret) { /* These are the only options we notify the userspace about. */ switch (pinconf_to_config_param(config)) { case PIN_CONFIG_BIAS_DISABLE: case PIN_CONFIG_BIAS_PULL_DOWN: case PIN_CONFIG_BIAS_PULL_UP: case PIN_CONFIG_DRIVE_OPEN_DRAIN: case PIN_CONFIG_DRIVE_OPEN_SOURCE: case PIN_CONFIG_DRIVE_PUSH_PULL: case PIN_CONFIG_INPUT_DEBOUNCE: gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); break; default: break; } } return ret; } EXPORT_SYMBOL_GPL(gpiod_set_config); /** * gpiod_set_debounce - sets @debounce time for a GPIO * @desc: descriptor of the GPIO for which to set debounce time * @debounce: debounce time in microseconds * * Returns: * 0 on success, %-ENOTSUPP if the controller doesn't support setting the * debounce time. */ int gpiod_set_debounce(struct gpio_desc *desc, unsigned int debounce) { unsigned long config; config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce); return gpiod_set_config(desc, config); } EXPORT_SYMBOL_GPL(gpiod_set_debounce); /** * gpiod_set_transitory - Lose or retain GPIO state on suspend or reset * @desc: descriptor of the GPIO for which to configure persistence * @transitory: True to lose state on suspend or reset, false for persistence * * Returns: * 0 on success, otherwise a negative error code. */ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) { VALIDATE_DESC(desc); /* * Handle FLAG_TRANSITORY first, enabling queries to gpiolib for * persistence state. */ assign_bit(FLAG_TRANSITORY, &desc->flags, transitory); /* If the driver supports it, set the persistence state now */ return gpio_set_config_with_argument_optional(desc, PIN_CONFIG_PERSIST_STATE, !transitory); } /** * gpiod_is_active_low - test whether a GPIO is active-low or not * @desc: the gpio descriptor to test * * Returns: * 1 if the GPIO is active-low, 0 otherwise. */ int gpiod_is_active_low(const struct gpio_desc *desc) { VALIDATE_DESC(desc); return test_bit(FLAG_ACTIVE_LOW, &desc->flags); } EXPORT_SYMBOL_GPL(gpiod_is_active_low); /** * gpiod_toggle_active_low - toggle whether a GPIO is active-low or not * @desc: the gpio descriptor to change */ void gpiod_toggle_active_low(struct gpio_desc *desc) { VALIDATE_DESC_VOID(desc); change_bit(FLAG_ACTIVE_LOW, &desc->flags); gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); } EXPORT_SYMBOL_GPL(gpiod_toggle_active_low); static int gpio_chip_get_value(struct gpio_chip *gc, const struct gpio_desc *desc) { return gc->get ? gc->get(gc, gpio_chip_hwgpio(desc)) : -EIO; } /* I/O calls are only valid after configuration completed; the relevant * "is this a valid GPIO" error checks should already have been done. * * "Get" operations are often inlinable as reading a pin value register, * and masking the relevant bit in that register. * * When "set" operations are inlinable, they involve writing that mask to * one register to set a low value, or a different register to set it high. * Otherwise locking is needed, so there may be little value to inlining. * *------------------------------------------------------------------------ * * IMPORTANT!!! The hot paths -- get/set value -- assume that callers * have requested the GPIO. That can include implicit requesting by * a direction setting call. Marking a gpio as requested locks its chip * in memory, guaranteeing that these table lookups need no more locking * and that gpiochip_remove() will fail. * * REVISIT when debugging, consider adding some instrumentation to ensure * that the GPIO was actually requested. */ static int gpiod_get_raw_value_commit(const struct gpio_desc *desc) { struct gpio_device *gdev; struct gpio_chip *gc; int value; /* FIXME Unable to use gpio_chip_guard due to const desc. */ gdev = desc->gdev; guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); if (!gc) return -ENODEV; value = gpio_chip_get_value(gc, desc); value = value < 0 ? value : !!value; trace_gpio_value(desc_to_gpio(desc), 1, value); return value; } static int gpio_chip_get_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { if (gc->get_multiple) return gc->get_multiple(gc, mask, bits); if (gc->get) { int i, value; for_each_set_bit(i, mask, gc->ngpio) { value = gc->get(gc, i); if (value < 0) return value; __assign_bit(i, bits, value); } return 0; } return -EIO; } /* The 'other' chip must be protected with its GPIO device's SRCU. */ static bool gpio_device_chip_cmp(struct gpio_device *gdev, struct gpio_chip *gc) { guard(srcu)(&gdev->srcu); return gc == srcu_dereference(gdev->chip, &gdev->srcu); } int gpiod_get_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { int ret, i = 0; /* * Validate array_info against desc_array and its size. * It should immediately follow desc_array if both * have been obtained from the same gpiod_get_array() call. */ if (array_info && array_info->desc == desc_array && array_size <= array_info->size && (void *)array_info == desc_array + array_info->size) { if (!can_sleep) WARN_ON(array_info->chip->can_sleep); ret = gpio_chip_get_multiple(array_info->chip, array_info->get_mask, value_bitmap); if (ret) return ret; if (!raw && !bitmap_empty(array_info->invert_mask, array_size)) bitmap_xor(value_bitmap, value_bitmap, array_info->invert_mask, array_size); i = find_first_zero_bit(array_info->get_mask, array_size); if (i == array_size) return 0; } else { array_info = NULL; } while (i < array_size) { DECLARE_BITMAP(fastpath_mask, FASTPATH_NGPIO); DECLARE_BITMAP(fastpath_bits, FASTPATH_NGPIO); unsigned long *mask, *bits; int first, j; CLASS(gpio_chip_guard, guard)(desc_array[i]); if (!guard.gc) return -ENODEV; if (likely(guard.gc->ngpio <= FASTPATH_NGPIO)) { mask = fastpath_mask; bits = fastpath_bits; } else { gfp_t flags = can_sleep ? GFP_KERNEL : GFP_ATOMIC; mask = bitmap_alloc(guard.gc->ngpio, flags); if (!mask) return -ENOMEM; bits = bitmap_alloc(guard.gc->ngpio, flags); if (!bits) { bitmap_free(mask); return -ENOMEM; } } bitmap_zero(mask, guard.gc->ngpio); if (!can_sleep) WARN_ON(guard.gc->can_sleep); /* collect all inputs belonging to the same chip */ first = i; do { const struct gpio_desc *desc = desc_array[i]; int hwgpio = gpio_chip_hwgpio(desc); __set_bit(hwgpio, mask); i++; if (array_info) i = find_next_zero_bit(array_info->get_mask, array_size, i); } while ((i < array_size) && gpio_device_chip_cmp(desc_array[i]->gdev, guard.gc)); ret = gpio_chip_get_multiple(guard.gc, mask, bits); if (ret) { if (mask != fastpath_mask) bitmap_free(mask); if (bits != fastpath_bits) bitmap_free(bits); return ret; } for (j = first; j < i; ) { const struct gpio_desc *desc = desc_array[j]; int hwgpio = gpio_chip_hwgpio(desc); int value = test_bit(hwgpio, bits); if (!raw && test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; __assign_bit(j, value_bitmap, value); trace_gpio_value(desc_to_gpio(desc), 1, value); j++; if (array_info) j = find_next_zero_bit(array_info->get_mask, i, j); } if (mask != fastpath_mask) bitmap_free(mask); if (bits != fastpath_bits) bitmap_free(bits); } return 0; } /** * gpiod_get_raw_value() - return a gpio's raw value * @desc: gpio whose value will be returned * * Returns: * The GPIO's raw value, i.e. the value of the physical line disregarding * its ACTIVE_LOW status, or negative errno on failure. * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ int gpiod_get_raw_value(const struct gpio_desc *desc) { VALIDATE_DESC(desc); /* Should be using gpiod_get_raw_value_cansleep() */ WARN_ON(desc->gdev->can_sleep); return gpiod_get_raw_value_commit(desc); } EXPORT_SYMBOL_GPL(gpiod_get_raw_value); /** * gpiod_get_value() - return a gpio's value * @desc: gpio whose value will be returned * * Returns: * The GPIO's logical value, i.e. taking the ACTIVE_LOW status into * account, or negative errno on failure. * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ int gpiod_get_value(const struct gpio_desc *desc) { int value; VALIDATE_DESC(desc); /* Should be using gpiod_get_value_cansleep() */ WARN_ON(desc->gdev->can_sleep); value = gpiod_get_raw_value_commit(desc); if (value < 0) return value; if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; return value; } EXPORT_SYMBOL_GPL(gpiod_get_value); /** * gpiod_get_raw_array_value() - read raw values from an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be read * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap to store the read values * * Read the raw values of the GPIOs, i.e. the values of the physical lines * without regard for their ACTIVE_LOW status. * * This function can be called from contexts where we cannot sleep, * and it will complain if the GPIO chip functions potentially sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_get_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { if (!desc_array) return -EINVAL; return gpiod_get_array_value_complex(true, false, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value); /** * gpiod_get_array_value() - read values from an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be read * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap to store the read values * * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status * into account. * * This function can be called from contexts where we cannot sleep, * and it will complain if the GPIO chip functions potentially sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_get_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { if (!desc_array) return -EINVAL; return gpiod_get_array_value_complex(false, false, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_get_array_value); /* * gpio_set_open_drain_value_commit() - Set the open drain gpio's value. * @desc: gpio descriptor whose state need to be set. * @value: Non-zero for setting it HIGH otherwise it will set to LOW. */ static void gpio_set_open_drain_value_commit(struct gpio_desc *desc, bool value) { int ret = 0, offset = gpio_chip_hwgpio(desc); CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return; if (value) { ret = guard.gc->direction_input(guard.gc, offset); } else { ret = guard.gc->direction_output(guard.gc, offset, 0); if (!ret) set_bit(FLAG_IS_OUT, &desc->flags); } trace_gpio_direction(desc_to_gpio(desc), value, ret); if (ret < 0) gpiod_err(desc, "%s: Error in set_value for open drain err %d\n", __func__, ret); } /* * _gpio_set_open_source_value() - Set the open source gpio's value. * @desc: gpio descriptor whose state need to be set. * @value: Non-zero for setting it HIGH otherwise it will set to LOW. */ static void gpio_set_open_source_value_commit(struct gpio_desc *desc, bool value) { int ret = 0, offset = gpio_chip_hwgpio(desc); CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return; if (value) { ret = guard.gc->direction_output(guard.gc, offset, 1); if (!ret) set_bit(FLAG_IS_OUT, &desc->flags); } else { ret = guard.gc->direction_input(guard.gc, offset); } trace_gpio_direction(desc_to_gpio(desc), !value, ret); if (ret < 0) gpiod_err(desc, "%s: Error in set_value for open source err %d\n", __func__, ret); } static void gpiod_set_raw_value_commit(struct gpio_desc *desc, bool value) { CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return; trace_gpio_value(desc_to_gpio(desc), 0, value); guard.gc->set(guard.gc, gpio_chip_hwgpio(desc), value); } /* * set multiple outputs on the same chip; * use the chip's set_multiple function if available; * otherwise set the outputs sequentially; * @chip: the GPIO chip we operate on * @mask: bit mask array; one bit per output; BITS_PER_LONG bits per word * defines which outputs are to be changed * @bits: bit value array; one bit per output; BITS_PER_LONG bits per word * defines the values the outputs specified by mask are to be set to */ static void gpio_chip_set_multiple(struct gpio_chip *gc, unsigned long *mask, unsigned long *bits) { if (gc->set_multiple) { gc->set_multiple(gc, mask, bits); } else { unsigned int i; /* set outputs if the corresponding mask bit is set */ for_each_set_bit(i, mask, gc->ngpio) gc->set(gc, i, test_bit(i, bits)); } } int gpiod_set_array_value_complex(bool raw, bool can_sleep, unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { int i = 0; /* * Validate array_info against desc_array and its size. * It should immediately follow desc_array if both * have been obtained from the same gpiod_get_array() call. */ if (array_info && array_info->desc == desc_array && array_size <= array_info->size && (void *)array_info == desc_array + array_info->size) { if (!can_sleep) WARN_ON(array_info->chip->can_sleep); if (!raw && !bitmap_empty(array_info->invert_mask, array_size)) bitmap_xor(value_bitmap, value_bitmap, array_info->invert_mask, array_size); gpio_chip_set_multiple(array_info->chip, array_info->set_mask, value_bitmap); i = find_first_zero_bit(array_info->set_mask, array_size); if (i == array_size) return 0; } else { array_info = NULL; } while (i < array_size) { DECLARE_BITMAP(fastpath_mask, FASTPATH_NGPIO); DECLARE_BITMAP(fastpath_bits, FASTPATH_NGPIO); unsigned long *mask, *bits; int count = 0; CLASS(gpio_chip_guard, guard)(desc_array[i]); if (!guard.gc) return -ENODEV; if (likely(guard.gc->ngpio <= FASTPATH_NGPIO)) { mask = fastpath_mask; bits = fastpath_bits; } else { gfp_t flags = can_sleep ? GFP_KERNEL : GFP_ATOMIC; mask = bitmap_alloc(guard.gc->ngpio, flags); if (!mask) return -ENOMEM; bits = bitmap_alloc(guard.gc->ngpio, flags); if (!bits) { bitmap_free(mask); return -ENOMEM; } } bitmap_zero(mask, guard.gc->ngpio); if (!can_sleep) WARN_ON(guard.gc->can_sleep); do { struct gpio_desc *desc = desc_array[i]; int hwgpio = gpio_chip_hwgpio(desc); int value = test_bit(i, value_bitmap); /* * Pins applicable for fast input but not for * fast output processing may have been already * inverted inside the fast path, skip them. */ if (!raw && !(array_info && test_bit(i, array_info->invert_mask)) && test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; trace_gpio_value(desc_to_gpio(desc), 0, value); /* * collect all normal outputs belonging to the same chip * open drain and open source outputs are set individually */ if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) && !raw) { gpio_set_open_drain_value_commit(desc, value); } else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags) && !raw) { gpio_set_open_source_value_commit(desc, value); } else { __set_bit(hwgpio, mask); __assign_bit(hwgpio, bits, value); count++; } i++; if (array_info) i = find_next_zero_bit(array_info->set_mask, array_size, i); } while ((i < array_size) && gpio_device_chip_cmp(desc_array[i]->gdev, guard.gc)); /* push collected bits to outputs */ if (count != 0) gpio_chip_set_multiple(guard.gc, mask, bits); if (mask != fastpath_mask) bitmap_free(mask); if (bits != fastpath_bits) bitmap_free(bits); } return 0; } /** * gpiod_set_raw_value() - assign a gpio's raw value * @desc: gpio whose value will be assigned * @value: value to assign * * Set the raw value of the GPIO, i.e. the value of its physical line without * regard for its ACTIVE_LOW status. * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ void gpiod_set_raw_value(struct gpio_desc *desc, int value) { VALIDATE_DESC_VOID(desc); /* Should be using gpiod_set_raw_value_cansleep() */ WARN_ON(desc->gdev->can_sleep); gpiod_set_raw_value_commit(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_raw_value); /** * gpiod_set_value_nocheck() - set a GPIO line value without checking * @desc: the descriptor to set the value on * @value: value to set * * This sets the value of a GPIO line backing a descriptor, applying * different semantic quirks like active low and open drain/source * handling. */ static void gpiod_set_value_nocheck(struct gpio_desc *desc, int value) { if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) gpio_set_open_drain_value_commit(desc, value); else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) gpio_set_open_source_value_commit(desc, value); else gpiod_set_raw_value_commit(desc, value); } /** * gpiod_set_value() - assign a gpio's value * @desc: gpio whose value will be assigned * @value: value to assign * * Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW, * OPEN_DRAIN and OPEN_SOURCE flags into account. * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. */ void gpiod_set_value(struct gpio_desc *desc, int value) { VALIDATE_DESC_VOID(desc); /* Should be using gpiod_set_value_cansleep() */ WARN_ON(desc->gdev->can_sleep); gpiod_set_value_nocheck(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_value); /** * gpiod_set_raw_array_value() - assign values to an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be assigned * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap of values to assign * * Set the raw values of the GPIOs, i.e. the values of the physical lines * without regard for their ACTIVE_LOW status. * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { if (!desc_array) return -EINVAL; return gpiod_set_array_value_complex(true, false, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value); /** * gpiod_set_array_value() - assign values to an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be assigned * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap of values to assign * * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status * into account. * * This function can be called from contexts where we cannot sleep, and will * complain if the GPIO chip functions potentially sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { if (!desc_array) return -EINVAL; return gpiod_set_array_value_complex(false, false, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_set_array_value); /** * gpiod_cansleep() - report whether gpio value access may sleep * @desc: gpio to check * * Returns: * 0 for non-sleepable, 1 for sleepable, or an error code in case of error. */ int gpiod_cansleep(const struct gpio_desc *desc) { VALIDATE_DESC(desc); return desc->gdev->can_sleep; } EXPORT_SYMBOL_GPL(gpiod_cansleep); /** * gpiod_set_consumer_name() - set the consumer name for the descriptor * @desc: gpio to set the consumer name on * @name: the new consumer name * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_set_consumer_name(struct gpio_desc *desc, const char *name) { int ret; VALIDATE_DESC(desc); ret = desc_set_label(desc, name); if (ret == 0) gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_CONFIG); return ret; } EXPORT_SYMBOL_GPL(gpiod_set_consumer_name); /** * gpiod_to_irq() - return the IRQ corresponding to a GPIO * @desc: gpio whose IRQ will be returned (already requested) * * Returns: * The IRQ corresponding to the passed GPIO, or an error code in case of error. */ int gpiod_to_irq(const struct gpio_desc *desc) { struct gpio_device *gdev; struct gpio_chip *gc; int offset; /* * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics * requires this function to not return zero on an invalid descriptor * but rather a negative error number. */ if (IS_ERR_OR_NULL(desc)) return -EINVAL; gdev = desc->gdev; /* FIXME Cannot use gpio_chip_guard due to const desc. */ guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); if (!gc) return -ENODEV; offset = gpio_chip_hwgpio(desc); if (gc->to_irq) { int retirq = gc->to_irq(gc, offset); /* Zero means NO_IRQ */ if (!retirq) return -ENXIO; return retirq; } #ifdef CONFIG_GPIOLIB_IRQCHIP if (gc->irq.chip) { /* * Avoid race condition with other code, which tries to lookup * an IRQ before the irqchip has been properly registered, * i.e. while gpiochip is still being brought up. */ return -EPROBE_DEFER; } #endif return -ENXIO; } EXPORT_SYMBOL_GPL(gpiod_to_irq); /** * gpiochip_lock_as_irq() - lock a GPIO to be used as IRQ * @gc: the chip the GPIO to lock belongs to * @offset: the offset of the GPIO to lock as IRQ * * This is used directly by GPIO drivers that want to lock down * a certain GPIO line to be used for IRQs. * * Returns: * 0 on success, or negative errno on failure. */ int gpiochip_lock_as_irq(struct gpio_chip *gc, unsigned int offset) { struct gpio_desc *desc; desc = gpiochip_get_desc(gc, offset); if (IS_ERR(desc)) return PTR_ERR(desc); /* * If it's fast: flush the direction setting if something changed * behind our back */ if (!gc->can_sleep && gc->get_direction) { int dir = gpiod_get_direction(desc); if (dir < 0) { chip_err(gc, "%s: cannot get GPIO direction\n", __func__); return dir; } } /* To be valid for IRQ the line needs to be input or open drain */ if (test_bit(FLAG_IS_OUT, &desc->flags) && !test_bit(FLAG_OPEN_DRAIN, &desc->flags)) { chip_err(gc, "%s: tried to flag a GPIO set as output for IRQ\n", __func__); return -EIO; } set_bit(FLAG_USED_AS_IRQ, &desc->flags); set_bit(FLAG_IRQ_IS_ENABLED, &desc->flags); return 0; } EXPORT_SYMBOL_GPL(gpiochip_lock_as_irq); /** * gpiochip_unlock_as_irq() - unlock a GPIO used as IRQ * @gc: the chip the GPIO to lock belongs to * @offset: the offset of the GPIO to lock as IRQ * * This is used directly by GPIO drivers that want to indicate * that a certain GPIO is no longer used exclusively for IRQ. */ void gpiochip_unlock_as_irq(struct gpio_chip *gc, unsigned int offset) { struct gpio_desc *desc; desc = gpiochip_get_desc(gc, offset); if (IS_ERR(desc)) return; clear_bit(FLAG_USED_AS_IRQ, &desc->flags); clear_bit(FLAG_IRQ_IS_ENABLED, &desc->flags); } EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq); void gpiochip_disable_irq(struct gpio_chip *gc, unsigned int offset) { struct gpio_desc *desc = gpiochip_get_desc(gc, offset); if (!IS_ERR(desc) && !WARN_ON(!test_bit(FLAG_USED_AS_IRQ, &desc->flags))) clear_bit(FLAG_IRQ_IS_ENABLED, &desc->flags); } EXPORT_SYMBOL_GPL(gpiochip_disable_irq); void gpiochip_enable_irq(struct gpio_chip *gc, unsigned int offset) { struct gpio_desc *desc = gpiochip_get_desc(gc, offset); if (!IS_ERR(desc) && !WARN_ON(!test_bit(FLAG_USED_AS_IRQ, &desc->flags))) { /* * We must not be output when using IRQ UNLESS we are * open drain. */ WARN_ON(test_bit(FLAG_IS_OUT, &desc->flags) && !test_bit(FLAG_OPEN_DRAIN, &desc->flags)); set_bit(FLAG_IRQ_IS_ENABLED, &desc->flags); } } EXPORT_SYMBOL_GPL(gpiochip_enable_irq); bool gpiochip_line_is_irq(struct gpio_chip *gc, unsigned int offset) { if (offset >= gc->ngpio) return false; return test_bit(FLAG_USED_AS_IRQ, &gc->gpiodev->descs[offset].flags); } EXPORT_SYMBOL_GPL(gpiochip_line_is_irq); int gpiochip_reqres_irq(struct gpio_chip *gc, unsigned int offset) { int ret; if (!try_module_get(gc->gpiodev->owner)) return -ENODEV; ret = gpiochip_lock_as_irq(gc, offset); if (ret) { chip_err(gc, "unable to lock HW IRQ %u for IRQ\n", offset); module_put(gc->gpiodev->owner); return ret; } return 0; } EXPORT_SYMBOL_GPL(gpiochip_reqres_irq); void gpiochip_relres_irq(struct gpio_chip *gc, unsigned int offset) { gpiochip_unlock_as_irq(gc, offset); module_put(gc->gpiodev->owner); } EXPORT_SYMBOL_GPL(gpiochip_relres_irq); bool gpiochip_line_is_open_drain(struct gpio_chip *gc, unsigned int offset) { if (offset >= gc->ngpio) return false; return test_bit(FLAG_OPEN_DRAIN, &gc->gpiodev->descs[offset].flags); } EXPORT_SYMBOL_GPL(gpiochip_line_is_open_drain); bool gpiochip_line_is_open_source(struct gpio_chip *gc, unsigned int offset) { if (offset >= gc->ngpio) return false; return test_bit(FLAG_OPEN_SOURCE, &gc->gpiodev->descs[offset].flags); } EXPORT_SYMBOL_GPL(gpiochip_line_is_open_source); bool gpiochip_line_is_persistent(struct gpio_chip *gc, unsigned int offset) { if (offset >= gc->ngpio) return false; return !test_bit(FLAG_TRANSITORY, &gc->gpiodev->descs[offset].flags); } EXPORT_SYMBOL_GPL(gpiochip_line_is_persistent); /** * gpiod_get_raw_value_cansleep() - return a gpio's raw value * @desc: gpio whose value will be returned * * Returns: * The GPIO's raw value, i.e. the value of the physical line disregarding * its ACTIVE_LOW status, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) { might_sleep(); VALIDATE_DESC(desc); return gpiod_get_raw_value_commit(desc); } EXPORT_SYMBOL_GPL(gpiod_get_raw_value_cansleep); /** * gpiod_get_value_cansleep() - return a gpio's value * @desc: gpio whose value will be returned * * Returns: * The GPIO's logical value, i.e. taking the ACTIVE_LOW status into * account, or negative errno on failure. * * This function is to be called from contexts that can sleep. */ int gpiod_get_value_cansleep(const struct gpio_desc *desc) { int value; might_sleep(); VALIDATE_DESC(desc); value = gpiod_get_raw_value_commit(desc); if (value < 0) return value; if (test_bit(FLAG_ACTIVE_LOW, &desc->flags)) value = !value; return value; } EXPORT_SYMBOL_GPL(gpiod_get_value_cansleep); /** * gpiod_get_raw_array_value_cansleep() - read raw values from an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be read * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap to store the read values * * Read the raw values of the GPIOs, i.e. the values of the physical lines * without regard for their ACTIVE_LOW status. * * This function is to be called from contexts that can sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_get_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { might_sleep(); if (!desc_array) return -EINVAL; return gpiod_get_array_value_complex(true, true, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_get_raw_array_value_cansleep); /** * gpiod_get_array_value_cansleep() - read values from an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be read * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap to store the read values * * Read the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status * into account. * * This function is to be called from contexts that can sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_get_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { might_sleep(); if (!desc_array) return -EINVAL; return gpiod_get_array_value_complex(false, true, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_get_array_value_cansleep); /** * gpiod_set_raw_value_cansleep() - assign a gpio's raw value * @desc: gpio whose value will be assigned * @value: value to assign * * Set the raw value of the GPIO, i.e. the value of its physical line without * regard for its ACTIVE_LOW status. * * This function is to be called from contexts that can sleep. */ void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) { might_sleep(); VALIDATE_DESC_VOID(desc); gpiod_set_raw_value_commit(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_raw_value_cansleep); /** * gpiod_set_value_cansleep() - assign a gpio's value * @desc: gpio whose value will be assigned * @value: value to assign * * Set the logical value of the GPIO, i.e. taking its ACTIVE_LOW status into * account * * This function is to be called from contexts that can sleep. */ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { might_sleep(); VALIDATE_DESC_VOID(desc); gpiod_set_value_nocheck(desc, value); } EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep); /** * gpiod_set_raw_array_value_cansleep() - assign values to an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be assigned * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap of values to assign * * Set the raw values of the GPIOs, i.e. the values of the physical lines * without regard for their ACTIVE_LOW status. * * This function is to be called from contexts that can sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { might_sleep(); if (!desc_array) return -EINVAL; return gpiod_set_array_value_complex(true, true, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_set_raw_array_value_cansleep); /** * gpiod_add_lookup_tables() - register GPIO device consumers * @tables: list of tables of consumers to register * @n: number of tables in the list */ void gpiod_add_lookup_tables(struct gpiod_lookup_table **tables, size_t n) { unsigned int i; mutex_lock(&gpio_lookup_lock); for (i = 0; i < n; i++) list_add_tail(&tables[i]->list, &gpio_lookup_list); mutex_unlock(&gpio_lookup_lock); } /** * gpiod_set_array_value_cansleep() - assign values to an array of GPIOs * @array_size: number of elements in the descriptor array / value bitmap * @desc_array: array of GPIO descriptors whose values will be assigned * @array_info: information on applicability of fast bitmap processing path * @value_bitmap: bitmap of values to assign * * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status * into account. * * This function is to be called from contexts that can sleep. * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, struct gpio_array *array_info, unsigned long *value_bitmap) { might_sleep(); if (!desc_array) return -EINVAL; return gpiod_set_array_value_complex(false, true, array_size, desc_array, array_info, value_bitmap); } EXPORT_SYMBOL_GPL(gpiod_set_array_value_cansleep); void gpiod_line_state_notify(struct gpio_desc *desc, unsigned long action) { atomic_notifier_call_chain(&desc->gdev->line_state_notifier, action, desc); } /** * gpiod_add_lookup_table() - register GPIO device consumers * @table: table of consumers to register */ void gpiod_add_lookup_table(struct gpiod_lookup_table *table) { gpiod_add_lookup_tables(&table, 1); } EXPORT_SYMBOL_GPL(gpiod_add_lookup_table); /** * gpiod_remove_lookup_table() - unregister GPIO device consumers * @table: table of consumers to unregister */ void gpiod_remove_lookup_table(struct gpiod_lookup_table *table) { /* Nothing to remove */ if (!table) return; mutex_lock(&gpio_lookup_lock); list_del(&table->list); mutex_unlock(&gpio_lookup_lock); } EXPORT_SYMBOL_GPL(gpiod_remove_lookup_table); /** * gpiod_add_hogs() - register a set of GPIO hogs from machine code * @hogs: table of gpio hog entries with a zeroed sentinel at the end */ void gpiod_add_hogs(struct gpiod_hog *hogs) { struct gpiod_hog *hog; mutex_lock(&gpio_machine_hogs_mutex); for (hog = &hogs[0]; hog->chip_label; hog++) { list_add_tail(&hog->list, &gpio_machine_hogs); /* * The chip may have been registered earlier, so check if it * exists and, if so, try to hog the line now. */ struct gpio_device *gdev __free(gpio_device_put) = gpio_device_find_by_label(hog->chip_label); if (gdev) gpiochip_machine_hog(gpio_device_get_chip(gdev), hog); } mutex_unlock(&gpio_machine_hogs_mutex); } EXPORT_SYMBOL_GPL(gpiod_add_hogs); void gpiod_remove_hogs(struct gpiod_hog *hogs) { struct gpiod_hog *hog; mutex_lock(&gpio_machine_hogs_mutex); for (hog = &hogs[0]; hog->chip_label; hog++) list_del(&hog->list); mutex_unlock(&gpio_machine_hogs_mutex); } EXPORT_SYMBOL_GPL(gpiod_remove_hogs); static struct gpiod_lookup_table *gpiod_find_lookup_table(struct device *dev) { const char *dev_id = dev ? dev_name(dev) : NULL; struct gpiod_lookup_table *table; list_for_each_entry(table, &gpio_lookup_list, list) { if (table->dev_id && dev_id) { /* * Valid strings on both ends, must be identical to have * a match */ if (!strcmp(table->dev_id, dev_id)) return table; } else { /* * One of the pointers is NULL, so both must be to have * a match */ if (dev_id == table->dev_id) return table; } } return NULL; } static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id, unsigned int idx, unsigned long *flags) { struct gpio_desc *desc = ERR_PTR(-ENOENT); struct gpiod_lookup_table *table; struct gpiod_lookup *p; struct gpio_chip *gc; guard(mutex)(&gpio_lookup_lock); table = gpiod_find_lookup_table(dev); if (!table) return desc; for (p = &table->table[0]; p->key; p++) { /* idx must always match exactly */ if (p->idx != idx) continue; /* If the lookup entry has a con_id, require exact match */ if (p->con_id && (!con_id || strcmp(p->con_id, con_id))) continue; if (p->chip_hwnum == U16_MAX) { desc = gpio_name_to_desc(p->key); if (desc) { *flags = p->flags; return desc; } dev_warn(dev, "cannot find GPIO line %s, deferring\n", p->key); return ERR_PTR(-EPROBE_DEFER); } struct gpio_device *gdev __free(gpio_device_put) = gpio_device_find_by_label(p->key); if (!gdev) { /* * As the lookup table indicates a chip with * p->key should exist, assume it may * still appear later and let the interested * consumer be probed again or let the Deferred * Probe infrastructure handle the error. */ dev_warn(dev, "cannot find GPIO chip %s, deferring\n", p->key); return ERR_PTR(-EPROBE_DEFER); } gc = gpio_device_get_chip(gdev); if (gc->ngpio <= p->chip_hwnum) { dev_err(dev, "requested GPIO %u (%u) is out of range [0..%u] for chip %s\n", idx, p->chip_hwnum, gc->ngpio - 1, gc->label); return ERR_PTR(-EINVAL); } desc = gpio_device_get_desc(gdev, p->chip_hwnum); *flags = p->flags; return desc; } return desc; } static int platform_gpio_count(struct device *dev, const char *con_id) { struct gpiod_lookup_table *table; struct gpiod_lookup *p; unsigned int count = 0; scoped_guard(mutex, &gpio_lookup_lock) { table = gpiod_find_lookup_table(dev); if (!table) return -ENOENT; for (p = &table->table[0]; p->key; p++) { if ((con_id && p->con_id && !strcmp(con_id, p->con_id)) || (!con_id && !p->con_id)) count++; } } if (!count) return -ENOENT; return count; } static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode, struct device *consumer, const char *con_id, unsigned int idx, enum gpiod_flags *flags, unsigned long *lookupflags) { const char *name = function_name_or_default(con_id); struct gpio_desc *desc = ERR_PTR(-ENOENT); if (is_of_node(fwnode)) { dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags); } else if (is_acpi_node(fwnode)) { dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags); } else if (is_software_node(fwnode)) { dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", fwnode, name); desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags); } return desc; } struct gpio_desc *gpiod_find_and_request(struct device *consumer, struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags flags, const char *label, bool platform_lookup_allowed) { unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT; const char *name = function_name_or_default(con_id); /* * scoped_guard() is implemented as a for loop, meaning static * analyzers will complain about these two not being initialized. */ struct gpio_desc *desc = NULL; int ret = 0; scoped_guard(srcu, &gpio_devices_srcu) { desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx, &flags, &lookupflags); if (gpiod_not_found(desc) && platform_lookup_allowed) { /* * Either we are not using DT or ACPI, or their lookup * did not return a result. In that case, use platform * lookup as a fallback. */ dev_dbg(consumer, "using lookup tables for GPIO lookup\n"); desc = gpiod_find(consumer, con_id, idx, &lookupflags); } if (IS_ERR(desc)) { dev_dbg(consumer, "No GPIO consumer %s found\n", name); return desc; } /* * If a connection label was passed use that, else attempt to use * the device name as label */ ret = gpiod_request(desc, label); } if (ret) { if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) return ERR_PTR(ret); /* * This happens when there are several consumers for * the same GPIO line: we just return here without * further initialization. It is a bit of a hack. * This is necessary to support fixed regulators. * * FIXME: Make this more sane and safe. */ dev_info(consumer, "nonexclusive access to GPIO for %s\n", name); return desc; } ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); if (ret < 0) { gpiod_put(desc); dev_err(consumer, "setup of GPIO %s failed: %d\n", name, ret); return ERR_PTR(ret); } gpiod_line_state_notify(desc, GPIO_V2_LINE_CHANGED_REQUESTED); return desc; } /** * fwnode_gpiod_get_index - obtain a GPIO from firmware node * @fwnode: handle of the firmware node * @con_id: function within the GPIO consumer * @index: index of the GPIO to obtain for the consumer * @flags: GPIO initialization flags * @label: label to attach to the requested GPIO * * This function can be used for drivers that get their configuration * from opaque firmware. * * The function properly finds the corresponding GPIO using whatever is the * underlying firmware interface and then makes sure that the GPIO * descriptor is requested before it is returned to the caller. * * Returns: * On successful request the GPIO pin is configured in accordance with * provided @flags. * * In case of error an ERR_PTR() is returned. */ struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, const char *con_id, int index, enum gpiod_flags flags, const char *label) { return gpiod_find_and_request(NULL, fwnode, con_id, index, flags, label, false); } EXPORT_SYMBOL_GPL(fwnode_gpiod_get_index); /** * gpiod_count - return the number of GPIOs associated with a device / function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * * Returns: * The number of GPIOs associated with a device / function or -ENOENT if no * GPIO has been assigned to the requested function. */ int gpiod_count(struct device *dev, const char *con_id) { const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; int count = -ENOENT; if (is_of_node(fwnode)) count = of_gpio_count(fwnode, con_id); else if (is_acpi_node(fwnode)) count = acpi_gpio_count(fwnode, con_id); else if (is_software_node(fwnode)) count = swnode_gpio_count(fwnode, con_id); if (count < 0) count = platform_gpio_count(dev, con_id); return count; } EXPORT_SYMBOL_GPL(gpiod_count); /** * gpiod_get - obtain a GPIO for a given GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @flags: optional GPIO initialization flags * * Returns: * The GPIO descriptor corresponding to the function @con_id of device * dev, -ENOENT if no GPIO has been assigned to the requested function, or * another IS_ERR() code if an error occurred while trying to acquire the GPIO. */ struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id, enum gpiod_flags flags) { return gpiod_get_index(dev, con_id, 0, flags); } EXPORT_SYMBOL_GPL(gpiod_get); /** * gpiod_get_optional - obtain an optional GPIO for a given GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @flags: optional GPIO initialization flags * * This is equivalent to gpiod_get(), except that when no GPIO was assigned to * the requested function it will return NULL. This is convenient for drivers * that need to handle optional GPIOs. * * Returns: * The GPIO descriptor corresponding to the function @con_id of device * dev, NULL if no GPIO has been assigned to the requested function, or * another IS_ERR() code if an error occurred while trying to acquire the GPIO. */ struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { return gpiod_get_index_optional(dev, con_id, 0, flags); } EXPORT_SYMBOL_GPL(gpiod_get_optional); /** * gpiod_configure_flags - helper function to configure a given GPIO * @desc: gpio whose value will be assigned * @con_id: function within the GPIO consumer * @lflags: bitmask of gpio_lookup_flags GPIO_* values - returned from * of_find_gpio() or of_get_gpio_hog() * @dflags: gpiod_flags - optional GPIO initialization flags * * Returns: * 0 on success, -ENOENT if no GPIO has been assigned to the * requested function and/or index, or another IS_ERR() code if an error * occurred while trying to acquire the GPIO. */ int gpiod_configure_flags(struct gpio_desc *desc, const char *con_id, unsigned long lflags, enum gpiod_flags dflags) { const char *name = function_name_or_default(con_id); int ret; if (lflags & GPIO_ACTIVE_LOW) set_bit(FLAG_ACTIVE_LOW, &desc->flags); if (lflags & GPIO_OPEN_DRAIN) set_bit(FLAG_OPEN_DRAIN, &desc->flags); else if (dflags & GPIOD_FLAGS_BIT_OPEN_DRAIN) { /* * This enforces open drain mode from the consumer side. * This is necessary for some busses like I2C, but the lookup * should *REALLY* have specified them as open drain in the * first place, so print a little warning here. */ set_bit(FLAG_OPEN_DRAIN, &desc->flags); gpiod_warn(desc, "enforced open drain please flag it properly in DT/ACPI DSDT/board file\n"); } if (lflags & GPIO_OPEN_SOURCE) set_bit(FLAG_OPEN_SOURCE, &desc->flags); if (((lflags & GPIO_PULL_UP) && (lflags & GPIO_PULL_DOWN)) || ((lflags & GPIO_PULL_UP) && (lflags & GPIO_PULL_DISABLE)) || ((lflags & GPIO_PULL_DOWN) && (lflags & GPIO_PULL_DISABLE))) { gpiod_err(desc, "multiple pull-up, pull-down or pull-disable enabled, invalid configuration\n"); return -EINVAL; } if (lflags & GPIO_PULL_UP) set_bit(FLAG_PULL_UP, &desc->flags); else if (lflags & GPIO_PULL_DOWN) set_bit(FLAG_PULL_DOWN, &desc->flags); else if (lflags & GPIO_PULL_DISABLE) set_bit(FLAG_BIAS_DISABLE, &desc->flags); ret = gpiod_set_transitory(desc, (lflags & GPIO_TRANSITORY)); if (ret < 0) return ret; /* No particular flag request, return here... */ if (!(dflags & GPIOD_FLAGS_BIT_DIR_SET)) { gpiod_dbg(desc, "no flags found for GPIO %s\n", name); return 0; } /* Process flags */ if (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ret = gpiod_direction_output_nonotify(desc, !!(dflags & GPIOD_FLAGS_BIT_DIR_VAL)); else ret = gpiod_direction_input_nonotify(desc); return ret; } /** * gpiod_get_index - obtain a GPIO from a multi-index GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @idx: index of the GPIO to obtain in the consumer * @flags: optional GPIO initialization flags * * This variant of gpiod_get() allows to access GPIOs other than the first * defined one for functions that define several GPIOs. * * Returns: * A valid GPIO descriptor, -ENOENT if no GPIO has been assigned to the * requested function and/or index, or another IS_ERR() code if an error * occurred while trying to acquire the GPIO. */ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, const char *con_id, unsigned int idx, enum gpiod_flags flags) { struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; const char *devname = dev ? dev_name(dev) : "?"; const char *label = con_id ?: devname; return gpiod_find_and_request(dev, fwnode, con_id, idx, flags, label, true); } EXPORT_SYMBOL_GPL(gpiod_get_index); /** * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO * function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @index: index of the GPIO to obtain in the consumer * @flags: optional GPIO initialization flags * * This is equivalent to gpiod_get_index(), except that when no GPIO with the * specified index was assigned to the requested function it will return NULL. * This is convenient for drivers that need to handle optional GPIOs. * * Returns: * A valid GPIO descriptor, NULL if no GPIO has been assigned to the * requested function and/or index, or another IS_ERR() code if an error * occurred while trying to acquire the GPIO. */ struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) { struct gpio_desc *desc; desc = gpiod_get_index(dev, con_id, index, flags); if (gpiod_not_found(desc)) return NULL; return desc; } EXPORT_SYMBOL_GPL(gpiod_get_index_optional); /** * gpiod_hog - Hog the specified GPIO desc given the provided flags * @desc: gpio whose value will be assigned * @name: gpio line name * @lflags: bitmask of gpio_lookup_flags GPIO_* values - returned from * of_find_gpio() or of_get_gpio_hog() * @dflags: gpiod_flags - optional GPIO initialization flags * * Returns: * 0 on success, or negative errno on failure. */ int gpiod_hog(struct gpio_desc *desc, const char *name, unsigned long lflags, enum gpiod_flags dflags) { struct gpio_device *gdev = desc->gdev; struct gpio_desc *local_desc; int hwnum; int ret; CLASS(gpio_chip_guard, guard)(desc); if (!guard.gc) return -ENODEV; if (test_and_set_bit(FLAG_IS_HOGGED, &desc->flags)) return 0; hwnum = gpio_chip_hwgpio(desc); local_desc = gpiochip_request_own_desc(guard.gc, hwnum, name, lflags, dflags); if (IS_ERR(local_desc)) { clear_bit(FLAG_IS_HOGGED, &desc->flags); ret = PTR_ERR(local_desc); pr_err("requesting hog GPIO %s (chip %s, offset %d) failed, %d\n", name, gdev->label, hwnum, ret); return ret; } gpiod_dbg(desc, "hogged as %s%s\n", (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ? "output" : "input", (dflags & GPIOD_FLAGS_BIT_DIR_OUT) ? (dflags & GPIOD_FLAGS_BIT_DIR_VAL) ? "/high" : "/low" : ""); return 0; } /** * gpiochip_free_hogs - Scan gpio-controller chip and release GPIO hog * @gc: gpio chip to act on */ static void gpiochip_free_hogs(struct gpio_chip *gc) { struct gpio_desc *desc; for_each_gpio_desc_with_flag(gc, desc, FLAG_IS_HOGGED) gpiochip_free_own_desc(desc); } /** * gpiod_get_array - obtain multiple GPIOs from a multi-index GPIO function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @flags: optional GPIO initialization flags * * This function acquires all the GPIOs defined under a given function. * * Returns: * The GPIO descriptors corresponding to the function @con_id of device * dev, -ENOENT if no GPIO has been assigned to the requested function, * or another IS_ERR() code if an error occurred while trying to acquire * the GPIOs. */ struct gpio_descs *__must_check gpiod_get_array(struct device *dev, const char *con_id, enum gpiod_flags flags) { struct gpio_desc *desc; struct gpio_descs *descs; struct gpio_array *array_info = NULL; struct gpio_chip *gc; int count, bitmap_size; size_t descs_size; count = gpiod_count(dev, con_id); if (count < 0) return ERR_PTR(count); descs_size = struct_size(descs, desc, count); descs = kzalloc(descs_size, GFP_KERNEL); if (!descs) return ERR_PTR(-ENOMEM); for (descs->ndescs = 0; descs->ndescs < count; descs->ndescs++) { desc = gpiod_get_index(dev, con_id, descs->ndescs, flags); if (IS_ERR(desc)) { gpiod_put_array(descs); return ERR_CAST(desc); } descs->desc[descs->ndescs] = desc; gc = gpiod_to_chip(desc); /* * If pin hardware number of array member 0 is also 0, select * its chip as a candidate for fast bitmap processing path. */ if (descs->ndescs == 0 && gpio_chip_hwgpio(desc) == 0) { struct gpio_descs *array; bitmap_size = BITS_TO_LONGS(gc->ngpio > count ? gc->ngpio : count); array = krealloc(descs, descs_size + struct_size(array_info, invert_mask, 3 * bitmap_size), GFP_KERNEL | __GFP_ZERO); if (!array) { gpiod_put_array(descs); return ERR_PTR(-ENOMEM); } descs = array; array_info = (void *)descs + descs_size; array_info->get_mask = array_info->invert_mask + bitmap_size; array_info->set_mask = array_info->get_mask + bitmap_size; array_info->desc = descs->desc; array_info->size = count; array_info->chip = gc; bitmap_set(array_info->get_mask, descs->ndescs, count - descs->ndescs); bitmap_set(array_info->set_mask, descs->ndescs, count - descs->ndescs); descs->info = array_info; } /* If there is no cache for fast bitmap processing path, continue */ if (!array_info) continue; /* Unmark array members which don't belong to the 'fast' chip */ if (array_info->chip != gc) { __clear_bit(descs->ndescs, array_info->get_mask); __clear_bit(descs->ndescs, array_info->set_mask); } /* * Detect array members which belong to the 'fast' chip * but their pins are not in hardware order. */ else if (gpio_chip_hwgpio(desc) != descs->ndescs) { /* * Don't use fast path if all array members processed so * far belong to the same chip as this one but its pin * hardware number is different from its array index. */ if (bitmap_full(array_info->get_mask, descs->ndescs)) { array_info = NULL; } else { __clear_bit(descs->ndescs, array_info->get_mask); __clear_bit(descs->ndescs, array_info->set_mask); } } else { /* Exclude open drain or open source from fast output */ if (gpiochip_line_is_open_drain(gc, descs->ndescs) || gpiochip_line_is_open_source(gc, descs->ndescs)) __clear_bit(descs->ndescs, array_info->set_mask); /* Identify 'fast' pins which require invertion */ if (gpiod_is_active_low(desc)) __set_bit(descs->ndescs, array_info->invert_mask); } } if (array_info) dev_dbg(dev, "GPIO array info: chip=%s, size=%d, get_mask=%lx, set_mask=%lx, invert_mask=%lx\n", array_info->chip->label, array_info->size, *array_info->get_mask, *array_info->set_mask, *array_info->invert_mask); return descs; } EXPORT_SYMBOL_GPL(gpiod_get_array); /** * gpiod_get_array_optional - obtain multiple GPIOs from a multi-index GPIO * function * @dev: GPIO consumer, can be NULL for system-global GPIOs * @con_id: function within the GPIO consumer * @flags: optional GPIO initialization flags * * This is equivalent to gpiod_get_array(), except that when no GPIO was * assigned to the requested function it will return NULL. * * Returns: * The GPIO descriptors corresponding to the function @con_id of device * dev, NULL if no GPIO has been assigned to the requested function, * or another IS_ERR() code if an error occurred while trying to acquire * the GPIOs. */ struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { struct gpio_descs *descs; descs = gpiod_get_array(dev, con_id, flags); if (gpiod_not_found(descs)) return NULL; return descs; } EXPORT_SYMBOL_GPL(gpiod_get_array_optional); /** * gpiod_put - dispose of a GPIO descriptor * @desc: GPIO descriptor to dispose of * * No descriptor can be used after gpiod_put() has been called on it. */ void gpiod_put(struct gpio_desc *desc) { if (desc) gpiod_free(desc); } EXPORT_SYMBOL_GPL(gpiod_put); /** * gpiod_put_array - dispose of multiple GPIO descriptors * @descs: struct gpio_descs containing an array of descriptors */ void gpiod_put_array(struct gpio_descs *descs) { unsigned int i; for (i = 0; i < descs->ndescs; i++) gpiod_put(descs->desc[i]); kfree(descs); } EXPORT_SYMBOL_GPL(gpiod_put_array); static int gpio_stub_drv_probe(struct device *dev) { /* * The DT node of some GPIO chips have a "compatible" property, but * never have a struct device added and probed by a driver to register * the GPIO chip with gpiolib. In such cases, fw_devlink=on will cause * the consumers of the GPIO chip to get probe deferred forever because * they will be waiting for a device associated with the GPIO chip * firmware node to get added and bound to a driver. * * To allow these consumers to probe, we associate the struct * gpio_device of the GPIO chip with the firmware node and then simply * bind it to this stub driver. */ return 0; } static struct device_driver gpio_stub_drv = { .name = "gpio_stub_drv", .bus = &gpio_bus_type, .probe = gpio_stub_drv_probe, }; static int __init gpiolib_dev_init(void) { int ret; /* Register GPIO sysfs bus */ ret = bus_register(&gpio_bus_type); if (ret < 0) { pr_err("gpiolib: could not register GPIO bus type\n"); return ret; } ret = driver_register(&gpio_stub_drv); if (ret < 0) { pr_err("gpiolib: could not register GPIO stub driver\n"); bus_unregister(&gpio_bus_type); return ret; } ret = alloc_chrdev_region(&gpio_devt, 0, GPIO_DEV_MAX, GPIOCHIP_NAME); if (ret < 0) { pr_err("gpiolib: failed to allocate char dev region\n"); driver_unregister(&gpio_stub_drv); bus_unregister(&gpio_bus_type); return ret; } gpiolib_initialized = true; gpiochip_setup_devs(); #if IS_ENABLED(CONFIG_OF_DYNAMIC) && IS_ENABLED(CONFIG_OF_GPIO) WARN_ON(of_reconfig_notifier_register(&gpio_of_notifier)); #endif /* CONFIG_OF_DYNAMIC && CONFIG_OF_GPIO */ return ret; } core_initcall(gpiolib_dev_init); #ifdef CONFIG_DEBUG_FS static void gpiolib_dbg_show(struct seq_file *s, struct gpio_device *gdev) { bool active_low, is_irq, is_out; unsigned int gpio = gdev->base; struct gpio_desc *desc; struct gpio_chip *gc; int value; guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); if (!gc) { seq_puts(s, "Underlying GPIO chip is gone\n"); return; } for_each_gpio_desc(gc, desc) { guard(srcu)(&desc->gdev->desc_srcu); is_irq = test_bit(FLAG_USED_AS_IRQ, &desc->flags); if (is_irq || test_bit(FLAG_REQUESTED, &desc->flags)) { gpiod_get_direction(desc); is_out = test_bit(FLAG_IS_OUT, &desc->flags); value = gpio_chip_get_value(gc, desc); active_low = test_bit(FLAG_ACTIVE_LOW, &desc->flags); seq_printf(s, " gpio-%-3u (%-20.20s|%-20.20s) %s %s %s%s\n", gpio, desc->name ?: "", gpiod_get_label(desc), is_out ? "out" : "in ", value >= 0 ? (value ? "hi" : "lo") : "? ", is_irq ? "IRQ " : "", active_low ? "ACTIVE LOW" : ""); } else if (desc->name) { seq_printf(s, " gpio-%-3u (%-20.20s)\n", gpio, desc->name); } gpio++; } } struct gpiolib_seq_priv { bool newline; int idx; }; static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos) { struct gpiolib_seq_priv *priv; struct gpio_device *gdev; loff_t index = *pos; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return NULL; s->private = priv; if (*pos > 0) priv->newline = true; priv->idx = srcu_read_lock(&gpio_devices_srcu); list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { if (index-- == 0) return gdev; } return NULL; } static void *gpiolib_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct gpiolib_seq_priv *priv = s->private; struct gpio_device *gdev = v, *next; next = list_entry_rcu(gdev->list.next, struct gpio_device, list); gdev = &next->list == &gpio_devices ? NULL : next; priv->newline = true; ++*pos; return gdev; } static void gpiolib_seq_stop(struct seq_file *s, void *v) { struct gpiolib_seq_priv *priv = s->private; srcu_read_unlock(&gpio_devices_srcu, priv->idx); kfree(priv); } static int gpiolib_seq_show(struct seq_file *s, void *v) { struct gpiolib_seq_priv *priv = s->private; struct gpio_device *gdev = v; struct gpio_chip *gc; struct device *parent; if (priv->newline) seq_putc(s, '\n'); guard(srcu)(&gdev->srcu); gc = srcu_dereference(gdev->chip, &gdev->srcu); if (!gc) { seq_printf(s, "%s: (dangling chip)\n", dev_name(&gdev->dev)); return 0; } seq_printf(s, "%s: GPIOs %u-%u", dev_name(&gdev->dev), gdev->base, gdev->base + gdev->ngpio - 1); parent = gc->parent; if (parent) seq_printf(s, ", parent: %s/%s", parent->bus ? parent->bus->name : "no-bus", dev_name(parent)); if (gc->label) seq_printf(s, ", %s", gc->label); if (gc->can_sleep) seq_printf(s, ", can sleep"); seq_printf(s, ":\n"); if (gc->dbg_show) gc->dbg_show(s, gc); else gpiolib_dbg_show(s, gdev); return 0; } static const struct seq_operations gpiolib_sops = { .start = gpiolib_seq_start, .next = gpiolib_seq_next, .stop = gpiolib_seq_stop, .show = gpiolib_seq_show, }; DEFINE_SEQ_ATTRIBUTE(gpiolib); static int __init gpiolib_debugfs_init(void) { /* /sys/kernel/debug/gpio */ debugfs_create_file("gpio", 0444, NULL, NULL, &gpiolib_fops); return 0; } subsys_initcall(gpiolib_debugfs_init); #endif /* DEBUG_FS */
7 7 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/seq_file.h> #include <net/ip.h> #include <net/mptcp.h> #include <net/snmp.h> #include <net/net_namespace.h> #include "mib.h" static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPCapableSYNRX", MPTCP_MIB_MPCAPABLEPASSIVE), SNMP_MIB_ITEM("MPCapableSYNTX", MPTCP_MIB_MPCAPABLEACTIVE), SNMP_MIB_ITEM("MPCapableSYNACKRX", MPTCP_MIB_MPCAPABLEACTIVEACK), SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK), SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK), SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK), SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP), SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED), SNMP_MIB_ITEM("MPCapableEndpAttempt", MPTCP_MIB_MPCAPABLEENDPATTEMPT), SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT), SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK), SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), SNMP_MIB_ITEM("DataCsumErr", MPTCP_MIB_DATACSUMERR), SNMP_MIB_ITEM("OFOQueueTail", MPTCP_MIB_OFOQUEUETAIL), SNMP_MIB_ITEM("OFOQueue", MPTCP_MIB_OFOQUEUE), SNMP_MIB_ITEM("OFOMerge", MPTCP_MIB_OFOMERGE), SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW), SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA), SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR), SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX), SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP), SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD), SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX), SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP), SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD), SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP), SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX), SNMP_MIB_ITEM("MPJoinPortSynAckRx", MPTCP_MIB_JOINPORTSYNACKRX), SNMP_MIB_ITEM("MPJoinPortAckRx", MPTCP_MIB_JOINPORTACKRX), SNMP_MIB_ITEM("MismatchPortSynRx", MPTCP_MIB_MISMATCHPORTSYNRX), SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX), SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR), SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP), SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX), SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP), SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW), SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX), SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX), SNMP_MIB_ITEM("MPFailTx", MPTCP_MIB_MPFAILTX), SNMP_MIB_ITEM("MPFailRx", MPTCP_MIB_MPFAILRX), SNMP_MIB_ITEM("MPFastcloseTx", MPTCP_MIB_MPFASTCLOSETX), SNMP_MIB_ITEM("MPFastcloseRx", MPTCP_MIB_MPFASTCLOSERX), SNMP_MIB_ITEM("MPRstTx", MPTCP_MIB_MPRSTTX), SNMP_MIB_ITEM("MPRstRx", MPTCP_MIB_MPRSTRX), SNMP_MIB_ITEM("RcvPruned", MPTCP_MIB_RCVPRUNED), SNMP_MIB_ITEM("SubflowStale", MPTCP_MIB_SUBFLOWSTALE), SNMP_MIB_ITEM("SubflowRecover", MPTCP_MIB_SUBFLOWRECOVER), SNMP_MIB_ITEM("SndWndShared", MPTCP_MIB_SNDWNDSHARED), SNMP_MIB_ITEM("RcvWndShared", MPTCP_MIB_RCVWNDSHARED), SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE), SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE), SNMP_MIB_SENTINEL }; /* mptcp_mib_alloc - allocate percpu mib counters * * These are allocated when the first mptcp socket is created so * we do not waste percpu memory if mptcp isn't in use. */ bool mptcp_mib_alloc(struct net *net) { struct mptcp_mib __percpu *mib = alloc_percpu(struct mptcp_mib); if (!mib) return false; if (cmpxchg(&net->mib.mptcp_statistics, NULL, mib)) free_percpu(mib); return true; } void mptcp_seq_show(struct seq_file *seq) { unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; seq_puts(seq, "MPTcpExt:"); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %s", mptcp_snmp_list[i].name); seq_puts(seq, "\nMPTcpExt:"); memset(sum, 0, sizeof(sum)); if (net->mib.mptcp_statistics) snmp_get_cpu_field_batch(sum, mptcp_snmp_list, net->mib.mptcp_statistics); for (i = 0; mptcp_snmp_list[i].name; i++) seq_printf(seq, " %lu", sum[i]); seq_putc(seq, '\n'); }
462 94 3 2513 2753 3777 3924 1622 1620 591 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 /* SPDX-License-Identifier: GPL-2.0 */ /* * include/linux/writeback.h */ #ifndef WRITEBACK_H #define WRITEBACK_H #include <linux/sched.h> #include <linux/workqueue.h> #include <linux/fs.h> #include <linux/flex_proportions.h> #include <linux/backing-dev-defs.h> #include <linux/blk_types.h> #include <linux/pagevec.h> struct bio; DECLARE_PER_CPU(int, dirty_throttle_leaks); /* * The global dirty threshold is normally equal to the global dirty limit, * except when the system suddenly allocates a lot of anonymous memory and * knocks down the global dirty threshold quickly, in which case the global * dirty limit will follow down slowly to prevent livelocking all dirtier tasks. */ #define DIRTY_SCOPE 8 struct backing_dev_info; /* * fs/fs-writeback.c */ enum writeback_sync_modes { WB_SYNC_NONE, /* Don't wait on anything */ WB_SYNC_ALL, /* Wait on every mapping */ }; /* * A control structure which tells the writeback code what to do. These are * always on the stack, and hence need no locking. They are always initialised * in a manner such that unspecified fields are set to zero. */ struct writeback_control { /* public fields that can be set and/or consumed by the caller: */ long nr_to_write; /* Write this many pages, and decrement this for each page written */ long pages_skipped; /* Pages which were not written */ /* * For a_ops->writepages(): if start or end are non-zero then this is * a hint that the filesystem need only write out the pages inside that * byterange. The byte at `end' is included in the writeout request. */ loff_t range_start; loff_t range_end; enum writeback_sync_modes sync_mode; unsigned for_kupdate:1; /* A kupdate writeback */ unsigned for_background:1; /* A background writeback */ unsigned tagged_writepages:1; /* tag-and-write to avoid livelock */ unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ unsigned unpinned_netfs_wb:1; /* Cleared I_PINNING_NETFS_WB */ /* * When writeback IOs are bounced through async layers, only the * initial synchronous phase should be accounted towards inode * cgroup ownership arbitration to avoid confusion. Later stages * can set the following flag to disable the accounting. */ unsigned no_cgroup_owner:1; /* To enable batching of swap writes to non-block-device backends, * "plug" can be set point to a 'struct swap_iocb *'. When all swap * writes have been submitted, if with swap_iocb is not NULL, * swap_write_unplug() should be called. */ struct swap_iocb **swap_plug; /* Target list for splitting a large folio */ struct list_head *list; /* internal fields used by the ->writepages implementation: */ struct folio_batch fbatch; pgoff_t index; int saved_err; #ifdef CONFIG_CGROUP_WRITEBACK struct bdi_writeback *wb; /* wb this writeback is issued under */ struct inode *inode; /* inode being written out */ /* foreign inode detection, see wbc_detach_inode() */ int wb_id; /* current wb id */ int wb_lcand_id; /* last foreign candidate wb id */ int wb_tcand_id; /* this foreign candidate wb id */ size_t wb_bytes; /* bytes written by current wb */ size_t wb_lcand_bytes; /* bytes written by last candidate */ size_t wb_tcand_bytes; /* bytes written by this candidate */ #endif }; static inline blk_opf_t wbc_to_write_flags(struct writeback_control *wbc) { blk_opf_t flags = 0; if (wbc->sync_mode == WB_SYNC_ALL) flags |= REQ_SYNC; else if (wbc->for_kupdate || wbc->for_background) flags |= REQ_BACKGROUND; return flags; } #ifdef CONFIG_CGROUP_WRITEBACK #define wbc_blkcg_css(wbc) \ ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css) #else #define wbc_blkcg_css(wbc) (blkcg_root_css) #endif /* CONFIG_CGROUP_WRITEBACK */ /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to * and are measured against each other in. There always is one global * domain, global_wb_domain, that every wb in the system is a member of. * This allows measuring the relative bandwidth of each wb to distribute * dirtyable memory accordingly. */ struct wb_domain { spinlock_t lock; /* * Scale the writeback cache size proportional to the relative * writeout speed. * * We do this by keeping a floating proportion between BDIs, based * on page writeback completions [end_page_writeback()]. Those * devices that write out pages fastest will get the larger share, * while the slower will get a smaller share. * * We use page writeout completions because we are interested in * getting rid of dirty pages. Having them written out is the * primary goal. * * We introduce a concept of time, a period over which we measure * these events, because demand can/will vary over time. The length * of this period itself is measured in page writeback completions. */ struct fprop_global completions; struct timer_list period_timer; /* timer for aging of completions */ unsigned long period_time; /* * The dirtyable memory and dirty threshold could be suddenly * knocked down by a large amount (eg. on the startup of KVM in a * swapless system). This may throw the system into deep dirty * exceeded state and throttle heavy/light dirtiers alike. To * retain good responsiveness, maintain global_dirty_limit for * tracking slowly down to the knocked down dirty threshold. * * Both fields are protected by ->lock. */ unsigned long dirty_limit_tstamp; unsigned long dirty_limit; }; /** * wb_domain_size_changed - memory available to a wb_domain has changed * @dom: wb_domain of interest * * This function should be called when the amount of memory available to * @dom has changed. It resets @dom's dirty limit parameters to prevent * the past values which don't match the current configuration from skewing * dirty throttling. Without this, when memory size of a wb_domain is * greatly reduced, the dirty throttling logic may allow too many pages to * be dirtied leading to consecutive unnecessary OOMs and may get stuck in * that situation. */ static inline void wb_domain_size_changed(struct wb_domain *dom) { spin_lock(&dom->lock); dom->dirty_limit_tstamp = jiffies; dom->dirty_limit = 0; spin_unlock(&dom->lock); } /* * fs/fs-writeback.c */ struct bdi_writeback; void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(enum wb_reason reason); void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); void inode_io_list_del(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ static inline void wait_on_inode(struct inode *inode) { wait_var_event(inode_state_wait_address(inode, __I_NEW), !(READ_ONCE(inode->i_state) & I_NEW)); } #ifdef CONFIG_CGROUP_WRITEBACK #include <linux/cgroup.h> #include <linux/bio.h> void __inode_attach_wb(struct inode *inode, struct folio *folio); void wbc_detach_inode(struct writeback_control *wbc); void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio, size_t bytes); int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, enum wb_reason reason, struct wb_completion *done); void cgroup_writeback_umount(struct super_block *sb); bool cleanup_offline_cgwb(struct bdi_writeback *wb); /** * inode_attach_wb - associate an inode with its wb * @inode: inode of interest * @folio: folio being dirtied (may be NULL) * * If @inode doesn't have its wb, associate it with the wb matching the * memcg of @folio or, if @folio is NULL, %current. May be called w/ or w/o * @inode->i_lock. */ static inline void inode_attach_wb(struct inode *inode, struct folio *folio) { if (!inode->i_wb) __inode_attach_wb(inode, folio); } /** * inode_detach_wb - disassociate an inode from its wb * @inode: inode of interest * * @inode is being freed. Detach from its wb. */ static inline void inode_detach_wb(struct inode *inode) { if (inode->i_wb) { WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); wb_put(inode->i_wb); inode->i_wb = NULL; } } void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode); /** * wbc_init_bio - writeback specific initializtion of bio * @wbc: writeback_control for the writeback in progress * @bio: bio to be initialized * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup * writeback context. Must be called after the bio has been associated with * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { /* * pageout() path doesn't attach @wbc to the inode being written * out. This is intentional as we don't want the function to block * behind a slow cgroup. Ultimately, we want pageout() to kick off * regular writeback instead of writing things out itself. */ if (wbc->wb) bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ static inline void inode_attach_wb(struct inode *inode, struct folio *folio) { } static inline void inode_detach_wb(struct inode *inode) { } static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, struct inode *inode) { } static inline void wbc_detach_inode(struct writeback_control *wbc) { } static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { } static inline void wbc_account_cgroup_owner(struct writeback_control *wbc, struct folio *folio, size_t bytes) { } static inline void cgroup_writeback_umount(struct super_block *sb) { } #endif /* CONFIG_CGROUP_WRITEBACK */ /* * mm/page-writeback.c */ void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_timer_fn(struct timer_list *t); bool node_dirty_ok(struct pglist_data *pgdat); int wb_domain_init(struct wb_domain *dom, gfp_t gfp); #ifdef CONFIG_CGROUP_WRITEBACK void wb_domain_exit(struct wb_domain *dom); #endif extern struct wb_domain global_wb_domain; /* These are exported to sysctl. */ extern unsigned int dirty_writeback_interval; extern unsigned int dirty_expire_interval; extern unsigned int dirtytime_expire_interval; extern int laptop_mode; int dirtytime_interval_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); unsigned long cgwb_calc_thresh(struct bdi_writeback *wb); void wb_update_bandwidth(struct bdi_writeback *wb); /* Invoke balance dirty pages in async mode. */ #define BDP_ASYNC 0x0001 void balance_dirty_pages_ratelimited(struct address_space *mapping); int balance_dirty_pages_ratelimited_flags(struct address_space *mapping, unsigned int flags); bool wb_over_bg_thresh(struct bdi_writeback *wb); struct folio *writeback_iter(struct address_space *mapping, struct writeback_control *wbc, struct folio *folio, int *error); typedef int (*writepage_t)(struct folio *folio, struct writeback_control *wbc, void *data); int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); #endif /* WRITEBACK_H */
18 18 998 998 9 9 9 9 8 9 9 9 9 16 101 12 89 275 276 2151 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/fs.h> #include <linux/path.h> #include <linux/slab.h> #include <linux/fs_struct.h> #include "internal.h" /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. */ void set_fs_root(struct fs_struct *fs, const struct path *path) { struct path old_root; path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_root = fs->root; fs->root = *path; write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } /* * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. * It can block. */ void set_fs_pwd(struct fs_struct *fs, const struct path *path) { struct path old_pwd; path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); } static inline int replace_path(struct path *p, const struct path *old, const struct path *new) { if (likely(p->dentry != old->dentry || p->mnt != old->mnt)) return 0; *p = *new; return 1; } void chroot_fs_refs(const struct path *old_root, const struct path *new_root) { struct task_struct *g, *p; struct fs_struct *fs; int count = 0; read_lock(&tasklist_lock); for_each_process_thread(g, p) { task_lock(p); fs = p->fs; if (fs) { int hits = 0; spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); hits += replace_path(&fs->root, old_root, new_root); hits += replace_path(&fs->pwd, old_root, new_root); write_seqcount_end(&fs->seq); while (hits--) { count++; path_get(new_root); } spin_unlock(&fs->lock); } task_unlock(p); } read_unlock(&tasklist_lock); while (count--) path_put(old_root); } void free_fs_struct(struct fs_struct *fs) { path_put(&fs->root); path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } void exit_fs(struct task_struct *tsk) { struct fs_struct *fs = tsk->fs; if (fs) { int kill; task_lock(tsk); spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); } } struct fs_struct *copy_fs_struct(struct fs_struct *old) { struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); /* We don't need to lock fs - think why ;-) */ if (fs) { fs->users = 1; fs->in_exec = 0; spin_lock_init(&fs->lock); seqcount_spinlock_init(&fs->seq, &fs->lock); fs->umask = old->umask; spin_lock(&old->lock); fs->root = old->root; path_get(&fs->root); fs->pwd = old->pwd; path_get(&fs->pwd); spin_unlock(&old->lock); } return fs; } int unshare_fs_struct(void) { struct fs_struct *fs = current->fs; struct fs_struct *new_fs = copy_fs_struct(fs); int kill; if (!new_fs) return -ENOMEM; task_lock(current); spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; spin_unlock(&fs->lock); task_unlock(current); if (kill) free_fs_struct(fs); return 0; } EXPORT_SYMBOL_GPL(unshare_fs_struct); int current_umask(void) { return current->fs->umask; } EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .seq = SEQCNT_SPINLOCK_ZERO(init_fs.seq, &init_fs.lock), .umask = 0022, };
14 8 9 14 2 1 1 1 8 2 4 1 1 2 38 21 33 21 41 1 14 17 14 13 24 8 13 15 20 15 14 24 20 20 2 1 1 4 4 1 34 6 5 2 1 13 1 2 8 8 1 3 3 3 3 1 1 1 1 7 8 44 44 44 49 44 59 15 15 10 20 43 40 39 36 60 60 3 1 3 44 1 1 13 13 13 13 1 1 1 1 13 9 9 1 5 1 2 1 48 42 42 7 49 19 6 6 20 22 3 16 2 22 17 2 1 1 1 1 7 1 8 15 2 2 3 3 29 29 33 33 4 32 46 57 63 63 36 64 1 15 64 64 58 11 11 11 57 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ocfs2 #if !defined(_TRACE_OCFS2_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_OCFS2_H #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(ocfs2__int, TP_PROTO(int num), TP_ARGS(num), TP_STRUCT__entry( __field(int, num) ), TP_fast_assign( __entry->num = num; ), TP_printk("%d", __entry->num) ); #define DEFINE_OCFS2_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__int, name, \ TP_PROTO(int num), \ TP_ARGS(num)) DECLARE_EVENT_CLASS(ocfs2__uint, TP_PROTO(unsigned int num), TP_ARGS(num), TP_STRUCT__entry( __field( unsigned int, num ) ), TP_fast_assign( __entry->num = num; ), TP_printk("%u", __entry->num) ); #define DEFINE_OCFS2_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint, name, \ TP_PROTO(unsigned int num), \ TP_ARGS(num)) DECLARE_EVENT_CLASS(ocfs2__ull, TP_PROTO(unsigned long long blkno), TP_ARGS(blkno), TP_STRUCT__entry( __field(unsigned long long, blkno) ), TP_fast_assign( __entry->blkno = blkno; ), TP_printk("%llu", __entry->blkno) ); #define DEFINE_OCFS2_ULL_EVENT(name) \ DEFINE_EVENT(ocfs2__ull, name, \ TP_PROTO(unsigned long long num), \ TP_ARGS(num)) DECLARE_EVENT_CLASS(ocfs2__pointer, TP_PROTO(void *pointer), TP_ARGS(pointer), TP_STRUCT__entry( __field(void *, pointer) ), TP_fast_assign( __entry->pointer = pointer; ), TP_printk("%p", __entry->pointer) ); #define DEFINE_OCFS2_POINTER_EVENT(name) \ DEFINE_EVENT(ocfs2__pointer, name, \ TP_PROTO(void *pointer), \ TP_ARGS(pointer)) DECLARE_EVENT_CLASS(ocfs2__string, TP_PROTO(const char *name), TP_ARGS(name), TP_STRUCT__entry( __string(name,name) ), TP_fast_assign( __assign_str(name); ), TP_printk("%s", __get_str(name)) ); #define DEFINE_OCFS2_STRING_EVENT(name) \ DEFINE_EVENT(ocfs2__string, name, \ TP_PROTO(const char *name), \ TP_ARGS(name)) DECLARE_EVENT_CLASS(ocfs2__int_int, TP_PROTO(int value1, int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(int, value1) __field(int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%d %d", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_INT_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__int_int, name, \ TP_PROTO(int val1, int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__uint_int, TP_PROTO(unsigned int value1, int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned int, value1) __field(int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%u %d", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_UINT_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint_int, name, \ TP_PROTO(unsigned int val1, int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__uint_uint, TP_PROTO(unsigned int value1, unsigned int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned int, value1) __field(unsigned int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%u %u", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint_uint, name, \ TP_PROTO(unsigned int val1, unsigned int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_uint, TP_PROTO(unsigned long long value1, unsigned int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %u", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_ULL_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_uint, name, \ TP_PROTO(unsigned long long val1, unsigned int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_int, TP_PROTO(unsigned long long value1, int value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned long long, value1) __field(int, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %d", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_ULL_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_int, name, \ TP_PROTO(unsigned long long val1, int val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_ull, TP_PROTO(unsigned long long value1, unsigned long long value2), TP_ARGS(value1, value2), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %llu", __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_ULL_ULL_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull, name, \ TP_PROTO(unsigned long long val1, unsigned long long val2), \ TP_ARGS(val1, val2)) DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint, TP_PROTO(unsigned long long value1, unsigned long long value2, unsigned int value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) __field(unsigned int, value3) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %llu %u", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_ULL_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull_uint, name, \ TP_PROTO(unsigned long long val1, \ unsigned long long val2, unsigned int val3), \ TP_ARGS(val1, val2, val3)) DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint, TP_PROTO(unsigned long long value1, unsigned int value2, unsigned int value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned int, value2) __field(unsigned int, value3) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %u %u", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_uint_uint, name, \ TP_PROTO(unsigned long long val1, \ unsigned int val2, unsigned int val3), \ TP_ARGS(val1, val2, val3)) DECLARE_EVENT_CLASS(ocfs2__uint_uint_uint, TP_PROTO(unsigned int value1, unsigned int value2, unsigned int value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field( unsigned int, value1 ) __field( unsigned int, value2 ) __field( unsigned int, value3 ) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%u %u %u", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_UINT_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__uint_uint_uint, name, \ TP_PROTO(unsigned int value1, unsigned int value2, \ unsigned int value3), \ TP_ARGS(value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_ull_ull, TP_PROTO(unsigned long long value1, unsigned long long value2, unsigned long long value3), TP_ARGS(value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) __field(unsigned long long, value3) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %llu %llu", __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_ULL_ULL_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull_ull, name, \ TP_PROTO(unsigned long long value1, unsigned long long value2, \ unsigned long long value3), \ TP_ARGS(value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_int_int_int, TP_PROTO(unsigned long long ull, int value1, int value2, int value3), TP_ARGS(ull, value1, value2, value3), TP_STRUCT__entry( __field( unsigned long long, ull ) __field( int, value1 ) __field( int, value2 ) __field( int, value3 ) ), TP_fast_assign( __entry->ull = ull; __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %d %d %d", __entry->ull, __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_int_int_int, name, \ TP_PROTO(unsigned long long ull, int value1, \ int value2, int value3), \ TP_ARGS(ull, value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_uint_uint_uint, TP_PROTO(unsigned long long ull, unsigned int value1, unsigned int value2, unsigned int value3), TP_ARGS(ull, value1, value2, value3), TP_STRUCT__entry( __field(unsigned long long, ull) __field(unsigned int, value1) __field(unsigned int, value2) __field(unsigned int, value3) ), TP_fast_assign( __entry->ull = ull; __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; ), TP_printk("%llu %u %u %u", __entry->ull, __entry->value1, __entry->value2, __entry->value3) ); #define DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_uint_uint_uint, name, \ TP_PROTO(unsigned long long ull, unsigned int value1, \ unsigned int value2, unsigned int value3), \ TP_ARGS(ull, value1, value2, value3)) DECLARE_EVENT_CLASS(ocfs2__ull_ull_uint_uint, TP_PROTO(unsigned long long value1, unsigned long long value2, unsigned int value3, unsigned int value4), TP_ARGS(value1, value2, value3, value4), TP_STRUCT__entry( __field(unsigned long long, value1) __field(unsigned long long, value2) __field(unsigned int, value3) __field(unsigned int, value4) ), TP_fast_assign( __entry->value1 = value1; __entry->value2 = value2; __entry->value3 = value3; __entry->value4 = value4; ), TP_printk("%llu %llu %u %u", __entry->value1, __entry->value2, __entry->value3, __entry->value4) ); #define DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(name) \ DEFINE_EVENT(ocfs2__ull_ull_uint_uint, name, \ TP_PROTO(unsigned long long ull, unsigned long long ull1, \ unsigned int value2, unsigned int value3), \ TP_ARGS(ull, ull1, value2, value3)) /* Trace events for fs/ocfs2/alloc.c. */ DECLARE_EVENT_CLASS(ocfs2__btree_ops, TP_PROTO(unsigned long long owner,\ unsigned int value1, unsigned int value2), TP_ARGS(owner, value1, value2), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned int, value1) __field(unsigned int, value2) ), TP_fast_assign( __entry->owner = owner; __entry->value1 = value1; __entry->value2 = value2; ), TP_printk("%llu %u %u", __entry->owner, __entry->value1, __entry->value2) ); #define DEFINE_OCFS2_BTREE_EVENT(name) \ DEFINE_EVENT(ocfs2__btree_ops, name, \ TP_PROTO(unsigned long long owner, \ unsigned int value1, unsigned int value2), \ TP_ARGS(owner, value1, value2)) DEFINE_OCFS2_BTREE_EVENT(ocfs2_adjust_rightmost_branch); DEFINE_OCFS2_BTREE_EVENT(ocfs2_rotate_tree_right); DEFINE_OCFS2_BTREE_EVENT(ocfs2_append_rec_to_path); DEFINE_OCFS2_BTREE_EVENT(ocfs2_insert_extent_start); DEFINE_OCFS2_BTREE_EVENT(ocfs2_add_clusters_in_btree); DEFINE_OCFS2_INT_EVENT(ocfs2_num_free_extents); DEFINE_OCFS2_INT_EVENT(ocfs2_complete_edge_insert); TRACE_EVENT(ocfs2_grow_tree, TP_PROTO(unsigned long long owner, int depth), TP_ARGS(owner, depth), TP_STRUCT__entry( __field(unsigned long long, owner) __field(int, depth) ), TP_fast_assign( __entry->owner = owner; __entry->depth = depth; ), TP_printk("%llu %d", __entry->owner, __entry->depth) ); TRACE_EVENT(ocfs2_rotate_subtree, TP_PROTO(int subtree_root, unsigned long long blkno, int depth), TP_ARGS(subtree_root, blkno, depth), TP_STRUCT__entry( __field(int, subtree_root) __field(unsigned long long, blkno) __field(int, depth) ), TP_fast_assign( __entry->subtree_root = subtree_root; __entry->blkno = blkno; __entry->depth = depth; ), TP_printk("%d %llu %d", __entry->subtree_root, __entry->blkno, __entry->depth) ); TRACE_EVENT(ocfs2_insert_extent, TP_PROTO(unsigned int ins_appending, unsigned int ins_contig, int ins_contig_index, int free_records, int ins_tree_depth), TP_ARGS(ins_appending, ins_contig, ins_contig_index, free_records, ins_tree_depth), TP_STRUCT__entry( __field(unsigned int, ins_appending) __field(unsigned int, ins_contig) __field(int, ins_contig_index) __field(int, free_records) __field(int, ins_tree_depth) ), TP_fast_assign( __entry->ins_appending = ins_appending; __entry->ins_contig = ins_contig; __entry->ins_contig_index = ins_contig_index; __entry->free_records = free_records; __entry->ins_tree_depth = ins_tree_depth; ), TP_printk("%u %u %d %d %d", __entry->ins_appending, __entry->ins_contig, __entry->ins_contig_index, __entry->free_records, __entry->ins_tree_depth) ); TRACE_EVENT(ocfs2_split_extent, TP_PROTO(int split_index, unsigned int c_contig_type, unsigned int c_has_empty_extent, unsigned int c_split_covers_rec), TP_ARGS(split_index, c_contig_type, c_has_empty_extent, c_split_covers_rec), TP_STRUCT__entry( __field(int, split_index) __field(unsigned int, c_contig_type) __field(unsigned int, c_has_empty_extent) __field(unsigned int, c_split_covers_rec) ), TP_fast_assign( __entry->split_index = split_index; __entry->c_contig_type = c_contig_type; __entry->c_has_empty_extent = c_has_empty_extent; __entry->c_split_covers_rec = c_split_covers_rec; ), TP_printk("%d %u %u %u", __entry->split_index, __entry->c_contig_type, __entry->c_has_empty_extent, __entry->c_split_covers_rec) ); TRACE_EVENT(ocfs2_remove_extent, TP_PROTO(unsigned long long owner, unsigned int cpos, unsigned int len, int index, unsigned int e_cpos, unsigned int clusters), TP_ARGS(owner, cpos, len, index, e_cpos, clusters), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned int, cpos) __field(unsigned int, len) __field(int, index) __field(unsigned int, e_cpos) __field(unsigned int, clusters) ), TP_fast_assign( __entry->owner = owner; __entry->cpos = cpos; __entry->len = len; __entry->index = index; __entry->e_cpos = e_cpos; __entry->clusters = clusters; ), TP_printk("%llu %u %u %d %u %u", __entry->owner, __entry->cpos, __entry->len, __entry->index, __entry->e_cpos, __entry->clusters) ); TRACE_EVENT(ocfs2_commit_truncate, TP_PROTO(unsigned long long ino, unsigned int new_cpos, unsigned int clusters, unsigned int depth), TP_ARGS(ino, new_cpos, clusters, depth), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, new_cpos) __field(unsigned int, clusters) __field(unsigned int, depth) ), TP_fast_assign( __entry->ino = ino; __entry->new_cpos = new_cpos; __entry->clusters = clusters; __entry->depth = depth; ), TP_printk("%llu %u %u %u", __entry->ino, __entry->new_cpos, __entry->clusters, __entry->depth) ); TRACE_EVENT(ocfs2_validate_extent_block, TP_PROTO(unsigned long long blkno), TP_ARGS(blkno), TP_STRUCT__entry( __field(unsigned long long, blkno) ), TP_fast_assign( __entry->blkno = blkno; ), TP_printk("%llu ", __entry->blkno) ); TRACE_EVENT(ocfs2_rotate_leaf, TP_PROTO(unsigned int insert_cpos, int insert_index, int has_empty, int next_free, unsigned int l_count), TP_ARGS(insert_cpos, insert_index, has_empty, next_free, l_count), TP_STRUCT__entry( __field(unsigned int, insert_cpos) __field(int, insert_index) __field(int, has_empty) __field(int, next_free) __field(unsigned int, l_count) ), TP_fast_assign( __entry->insert_cpos = insert_cpos; __entry->insert_index = insert_index; __entry->has_empty = has_empty; __entry->next_free = next_free; __entry->l_count = l_count; ), TP_printk("%u %d %d %d %u", __entry->insert_cpos, __entry->insert_index, __entry->has_empty, __entry->next_free, __entry->l_count) ); TRACE_EVENT(ocfs2_add_clusters_in_btree_ret, TP_PROTO(int status, int reason, int err), TP_ARGS(status, reason, err), TP_STRUCT__entry( __field(int, status) __field(int, reason) __field(int, err) ), TP_fast_assign( __entry->status = status; __entry->reason = reason; __entry->err = err; ), TP_printk("%d %d %d", __entry->status, __entry->reason, __entry->err) ); TRACE_EVENT(ocfs2_mark_extent_written, TP_PROTO(unsigned long long owner, unsigned int cpos, unsigned int len, unsigned int phys), TP_ARGS(owner, cpos, len, phys), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned int, cpos) __field(unsigned int, len) __field(unsigned int, phys) ), TP_fast_assign( __entry->owner = owner; __entry->cpos = cpos; __entry->len = len; __entry->phys = phys; ), TP_printk("%llu %u %u %u", __entry->owner, __entry->cpos, __entry->len, __entry->phys) ); DECLARE_EVENT_CLASS(ocfs2__truncate_log_ops, TP_PROTO(unsigned long long blkno, int index, unsigned int start, unsigned int num), TP_ARGS(blkno, index, start, num), TP_STRUCT__entry( __field(unsigned long long, blkno) __field(int, index) __field(unsigned int, start) __field(unsigned int, num) ), TP_fast_assign( __entry->blkno = blkno; __entry->index = index; __entry->start = start; __entry->num = num; ), TP_printk("%llu %d %u %u", __entry->blkno, __entry->index, __entry->start, __entry->num) ); #define DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(name) \ DEFINE_EVENT(ocfs2__truncate_log_ops, name, \ TP_PROTO(unsigned long long blkno, int index, \ unsigned int start, unsigned int num), \ TP_ARGS(blkno, index, start, num)) DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_truncate_log_append); DEFINE_OCFS2_TRUNCATE_LOG_OPS_EVENT(ocfs2_replay_truncate_records); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_flush_truncate_log); DEFINE_OCFS2_INT_EVENT(ocfs2_begin_truncate_log_recovery); DEFINE_OCFS2_INT_EVENT(ocfs2_truncate_log_recovery_num); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_complete_truncate_log_recovery); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_free_cached_blocks); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_cache_cluster_dealloc); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_run_deallocs); TRACE_EVENT(ocfs2_cache_block_dealloc, TP_PROTO(int type, int slot, unsigned long long suballoc, unsigned long long blkno, unsigned int bit), TP_ARGS(type, slot, suballoc, blkno, bit), TP_STRUCT__entry( __field(int, type) __field(int, slot) __field(unsigned long long, suballoc) __field(unsigned long long, blkno) __field(unsigned int, bit) ), TP_fast_assign( __entry->type = type; __entry->slot = slot; __entry->suballoc = suballoc; __entry->blkno = blkno; __entry->bit = bit; ), TP_printk("%d %d %llu %llu %u", __entry->type, __entry->slot, __entry->suballoc, __entry->blkno, __entry->bit) ); TRACE_EVENT(ocfs2_trim_extent, TP_PROTO(struct super_block *sb, unsigned long long blk, unsigned long long count), TP_ARGS(sb, blk, count), TP_STRUCT__entry( __field(int, dev_major) __field(int, dev_minor) __field(unsigned long long, blk) __field(__u64, count) ), TP_fast_assign( __entry->dev_major = MAJOR(sb->s_dev); __entry->dev_minor = MINOR(sb->s_dev); __entry->blk = blk; __entry->count = count; ), TP_printk("%d %d %llu %llu", __entry->dev_major, __entry->dev_minor, __entry->blk, __entry->count) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_trim_group); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_mainbm); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_trim_fs); /* End of trace events for fs/ocfs2/alloc.c. */ /* Trace events for fs/ocfs2/localalloc.c. */ DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_la_set_sizes); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_alloc_should_use_local); DEFINE_OCFS2_INT_EVENT(ocfs2_load_local_alloc); DEFINE_OCFS2_INT_EVENT(ocfs2_begin_local_alloc_recovery); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_reserve_local_alloc_bits); DEFINE_OCFS2_UINT_EVENT(ocfs2_local_alloc_count_bits); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits_search_bitmap); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_local_alloc_find_clear_bits); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_sync_local_to_main); TRACE_EVENT(ocfs2_sync_local_to_main_free, TP_PROTO(int count, int bit, unsigned long long start_blk, unsigned long long blkno), TP_ARGS(count, bit, start_blk, blkno), TP_STRUCT__entry( __field(int, count) __field(int, bit) __field(unsigned long long, start_blk) __field(unsigned long long, blkno) ), TP_fast_assign( __entry->count = count; __entry->bit = bit; __entry->start_blk = start_blk; __entry->blkno = blkno; ), TP_printk("%d %d %llu %llu", __entry->count, __entry->bit, __entry->start_blk, __entry->blkno) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_local_alloc_new_window); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_local_alloc_new_window_result); /* End of trace events for fs/ocfs2/localalloc.c. */ /* Trace events for fs/ocfs2/resize.c. */ DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_update_last_group_and_inode); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_group_extend); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_group_add); /* End of trace events for fs/ocfs2/resize.c. */ /* Trace events for fs/ocfs2/suballoc.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_group_descriptor); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_contig); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_block_group_alloc_discontig); DEFINE_OCFS2_ULL_EVENT(ocfs2_block_group_alloc); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_nospc); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_reserve_suballoc_bits_no_new_group); DEFINE_OCFS2_ULL_EVENT(ocfs2_reserve_new_inode_new_group); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_set_bits); TRACE_EVENT(ocfs2_relink_block_group, TP_PROTO(unsigned long long i_blkno, unsigned int chain, unsigned long long bg_blkno, unsigned long long prev_blkno), TP_ARGS(i_blkno, chain, bg_blkno, prev_blkno), TP_STRUCT__entry( __field(unsigned long long, i_blkno) __field(unsigned int, chain) __field(unsigned long long, bg_blkno) __field(unsigned long long, prev_blkno) ), TP_fast_assign( __entry->i_blkno = i_blkno; __entry->chain = chain; __entry->bg_blkno = bg_blkno; __entry->prev_blkno = prev_blkno; ), TP_printk("%llu %u %llu %llu", __entry->i_blkno, __entry->chain, __entry->bg_blkno, __entry->prev_blkno) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_cluster_group_search_wrong_max_bits); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cluster_group_search_max_block); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_block_group_search_max_block); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_search_chain_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_succ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_search_chain_end); DEFINE_OCFS2_UINT_EVENT(ocfs2_claim_suballoc_bits); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_claim_new_inode_at_loc); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_block_group_clear_bits); TRACE_EVENT(ocfs2_free_suballoc_bits, TP_PROTO(unsigned long long inode, unsigned long long group, unsigned int start_bit, unsigned int count), TP_ARGS(inode, group, start_bit, count), TP_STRUCT__entry( __field(unsigned long long, inode) __field(unsigned long long, group) __field(unsigned int, start_bit) __field(unsigned int, count) ), TP_fast_assign( __entry->inode = inode; __entry->group = group; __entry->start_bit = start_bit; __entry->count = count; ), TP_printk("%llu %llu %u %u", __entry->inode, __entry->group, __entry->start_bit, __entry->count) ); TRACE_EVENT(ocfs2_free_clusters, TP_PROTO(unsigned long long bg_blkno, unsigned long long start_blk, unsigned int start_bit, unsigned int count), TP_ARGS(bg_blkno, start_blk, start_bit, count), TP_STRUCT__entry( __field(unsigned long long, bg_blkno) __field(unsigned long long, start_blk) __field(unsigned int, start_bit) __field(unsigned int, count) ), TP_fast_assign( __entry->bg_blkno = bg_blkno; __entry->start_blk = start_blk; __entry->start_bit = start_bit; __entry->count = count; ), TP_printk("%llu %llu %u %u", __entry->bg_blkno, __entry->start_blk, __entry->start_bit, __entry->count) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_get_suballoc_slot_bit); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_test_suballoc_bit); DEFINE_OCFS2_ULL_EVENT(ocfs2_test_inode_bit); /* End of trace events for fs/ocfs2/suballoc.c. */ /* Trace events for fs/ocfs2/refcounttree.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_refcount_block); DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_refcount_trees); DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree); DEFINE_OCFS2_ULL_EVENT(ocfs2_create_refcount_tree_blkno); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_change_refcount_rec); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_expand_inline_ref_root); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_divide_leaf_refcount_block); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_new_leaf_refcount_block); DECLARE_EVENT_CLASS(ocfs2__refcount_tree_ops, TP_PROTO(unsigned long long blkno, int index, unsigned long long cpos, unsigned int clusters, unsigned int refcount), TP_ARGS(blkno, index, cpos, clusters, refcount), TP_STRUCT__entry( __field(unsigned long long, blkno) __field(int, index) __field(unsigned long long, cpos) __field(unsigned int, clusters) __field(unsigned int, refcount) ), TP_fast_assign( __entry->blkno = blkno; __entry->index = index; __entry->cpos = cpos; __entry->clusters = clusters; __entry->refcount = refcount; ), TP_printk("%llu %d %llu %u %u", __entry->blkno, __entry->index, __entry->cpos, __entry->clusters, __entry->refcount) ); #define DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(name) \ DEFINE_EVENT(ocfs2__refcount_tree_ops, name, \ TP_PROTO(unsigned long long blkno, int index, \ unsigned long long cpos, \ unsigned int count, unsigned int refcount), \ TP_ARGS(blkno, index, cpos, count, refcount)) DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_insert_refcount_rec); TRACE_EVENT(ocfs2_split_refcount_rec, TP_PROTO(unsigned long long cpos, unsigned int clusters, unsigned int refcount, unsigned long long split_cpos, unsigned int split_clusters, unsigned int split_refcount), TP_ARGS(cpos, clusters, refcount, split_cpos, split_clusters, split_refcount), TP_STRUCT__entry( __field(unsigned long long, cpos) __field(unsigned int, clusters) __field(unsigned int, refcount) __field(unsigned long long, split_cpos) __field(unsigned int, split_clusters) __field(unsigned int, split_refcount) ), TP_fast_assign( __entry->cpos = cpos; __entry->clusters = clusters; __entry->refcount = refcount; __entry->split_cpos = split_cpos; __entry->split_clusters = split_clusters; __entry->split_refcount = split_refcount; ), TP_printk("%llu %u %u %llu %u %u", __entry->cpos, __entry->clusters, __entry->refcount, __entry->split_cpos, __entry->split_clusters, __entry->split_refcount) ); DEFINE_OCFS2_REFCOUNT_TREE_OPS_EVENT(ocfs2_split_refcount_rec_insert); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_increase_refcount_begin); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_change); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_increase_refcount_insert); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_increase_refcount_split); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_remove_refcount_extent); DEFINE_OCFS2_ULL_EVENT(ocfs2_restore_refcount_block); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_decrease_refcount_rec); TRACE_EVENT(ocfs2_decrease_refcount, TP_PROTO(unsigned long long owner, unsigned long long cpos, unsigned int len, int delete), TP_ARGS(owner, cpos, len, delete), TP_STRUCT__entry( __field(unsigned long long, owner) __field(unsigned long long, cpos) __field(unsigned int, len) __field(int, delete) ), TP_fast_assign( __entry->owner = owner; __entry->cpos = cpos; __entry->len = len; __entry->delete = delete; ), TP_printk("%llu %llu %u %d", __entry->owner, __entry->cpos, __entry->len, __entry->delete) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_mark_extent_refcounted); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_calc_refcount_meta_credits); TRACE_EVENT(ocfs2_calc_refcount_meta_credits_iterate, TP_PROTO(int recs_add, unsigned long long cpos, unsigned int clusters, unsigned long long r_cpos, unsigned int r_clusters, unsigned int refcount, int index), TP_ARGS(recs_add, cpos, clusters, r_cpos, r_clusters, refcount, index), TP_STRUCT__entry( __field(int, recs_add) __field(unsigned long long, cpos) __field(unsigned int, clusters) __field(unsigned long long, r_cpos) __field(unsigned int, r_clusters) __field(unsigned int, refcount) __field(int, index) ), TP_fast_assign( __entry->recs_add = recs_add; __entry->cpos = cpos; __entry->clusters = clusters; __entry->r_cpos = r_cpos; __entry->r_clusters = r_clusters; __entry->refcount = refcount; __entry->index = index; ), TP_printk("%d %llu %u %llu %u %u %d", __entry->recs_add, __entry->cpos, __entry->clusters, __entry->r_cpos, __entry->r_clusters, __entry->refcount, __entry->index) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_add_refcount_flag); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_prepare_refcount_change_for_del); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_lock_refcount_allocators); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_page); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_duplicate_clusters_by_jbd); TRACE_EVENT(ocfs2_clear_ext_refcount, TP_PROTO(unsigned long long ino, unsigned int cpos, unsigned int len, unsigned int p_cluster, unsigned int ext_flags), TP_ARGS(ino, cpos, len, p_cluster, ext_flags), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, cpos) __field(unsigned int, len) __field(unsigned int, p_cluster) __field(unsigned int, ext_flags) ), TP_fast_assign( __entry->ino = ino; __entry->cpos = cpos; __entry->len = len; __entry->p_cluster = p_cluster; __entry->ext_flags = ext_flags; ), TP_printk("%llu %u %u %u %u", __entry->ino, __entry->cpos, __entry->len, __entry->p_cluster, __entry->ext_flags) ); TRACE_EVENT(ocfs2_replace_clusters, TP_PROTO(unsigned long long ino, unsigned int cpos, unsigned int old, unsigned int new, unsigned int len, unsigned int ext_flags), TP_ARGS(ino, cpos, old, new, len, ext_flags), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, cpos) __field(unsigned int, old) __field(unsigned int, new) __field(unsigned int, len) __field(unsigned int, ext_flags) ), TP_fast_assign( __entry->ino = ino; __entry->cpos = cpos; __entry->old = old; __entry->new = new; __entry->len = len; __entry->ext_flags = ext_flags; ), TP_printk("%llu %u %u %u %u %u", __entry->ino, __entry->cpos, __entry->old, __entry->new, __entry->len, __entry->ext_flags) ); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_make_clusters_writable); TRACE_EVENT(ocfs2_refcount_cow_hunk, TP_PROTO(unsigned long long ino, unsigned int cpos, unsigned int write_len, unsigned int max_cpos, unsigned int cow_start, unsigned int cow_len), TP_ARGS(ino, cpos, write_len, max_cpos, cow_start, cow_len), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, cpos) __field(unsigned int, write_len) __field(unsigned int, max_cpos) __field(unsigned int, cow_start) __field(unsigned int, cow_len) ), TP_fast_assign( __entry->ino = ino; __entry->cpos = cpos; __entry->write_len = write_len; __entry->max_cpos = max_cpos; __entry->cow_start = cow_start; __entry->cow_len = cow_len; ), TP_printk("%llu %u %u %u %u %u", __entry->ino, __entry->cpos, __entry->write_len, __entry->max_cpos, __entry->cow_start, __entry->cow_len) ); /* End of trace events for fs/ocfs2/refcounttree.c. */ /* Trace events for fs/ocfs2/aops.c. */ DECLARE_EVENT_CLASS(ocfs2__get_block, TP_PROTO(unsigned long long ino, unsigned long long iblock, void *bh_result, int create), TP_ARGS(ino, iblock, bh_result, create), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, iblock) __field(void *, bh_result) __field(int, create) ), TP_fast_assign( __entry->ino = ino; __entry->iblock = iblock; __entry->bh_result = bh_result; __entry->create = create; ), TP_printk("%llu %llu %p %d", __entry->ino, __entry->iblock, __entry->bh_result, __entry->create) ); #define DEFINE_OCFS2_GET_BLOCK_EVENT(name) \ DEFINE_EVENT(ocfs2__get_block, name, \ TP_PROTO(unsigned long long ino, unsigned long long iblock, \ void *bh_result, int create), \ TP_ARGS(ino, iblock, bh_result, create)) DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_symlink_get_block); DEFINE_OCFS2_GET_BLOCK_EVENT(ocfs2_get_block); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_get_block_end); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_readpage); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_bmap); TRACE_EVENT(ocfs2_try_to_write_inline_data, TP_PROTO(unsigned long long ino, unsigned int len, unsigned long long pos, unsigned int flags), TP_ARGS(ino, len, pos, flags), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, len) __field(unsigned long long, pos) __field(unsigned int, flags) ), TP_fast_assign( __entry->ino = ino; __entry->len = len; __entry->pos = pos; __entry->flags = flags; ), TP_printk("%llu %u %llu 0x%x", __entry->ino, __entry->len, __entry->pos, __entry->flags) ); TRACE_EVENT(ocfs2_write_begin_nolock, TP_PROTO(unsigned long long ino, long long i_size, unsigned int i_clusters, unsigned long long pos, unsigned int len, unsigned int flags, void *page, unsigned int clusters, unsigned int extents_to_split), TP_ARGS(ino, i_size, i_clusters, pos, len, flags, page, clusters, extents_to_split), TP_STRUCT__entry( __field(unsigned long long, ino) __field(long long, i_size) __field(unsigned int, i_clusters) __field(unsigned long long, pos) __field(unsigned int, len) __field(unsigned int, flags) __field(void *, page) __field(unsigned int, clusters) __field(unsigned int, extents_to_split) ), TP_fast_assign( __entry->ino = ino; __entry->i_size = i_size; __entry->i_clusters = i_clusters; __entry->pos = pos; __entry->len = len; __entry->flags = flags; __entry->page = page; __entry->clusters = clusters; __entry->extents_to_split = extents_to_split; ), TP_printk("%llu %lld %u %llu %u %u %p %u %u", __entry->ino, __entry->i_size, __entry->i_clusters, __entry->pos, __entry->len, __entry->flags, __entry->page, __entry->clusters, __entry->extents_to_split) ); TRACE_EVENT(ocfs2_write_end_inline, TP_PROTO(unsigned long long ino, unsigned long long pos, unsigned int copied, unsigned int id_count, unsigned int features), TP_ARGS(ino, pos, copied, id_count, features), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, pos) __field(unsigned int, copied) __field(unsigned int, id_count) __field(unsigned int, features) ), TP_fast_assign( __entry->ino = ino; __entry->pos = pos; __entry->copied = copied; __entry->id_count = id_count; __entry->features = features; ), TP_printk("%llu %llu %u %u %u", __entry->ino, __entry->pos, __entry->copied, __entry->id_count, __entry->features) ); /* End of trace events for fs/ocfs2/aops.c. */ /* Trace events for fs/ocfs2/mmap.c. */ TRACE_EVENT(ocfs2_fault, TP_PROTO(unsigned long long ino, void *area, void *page, unsigned long pgoff), TP_ARGS(ino, area, page, pgoff), TP_STRUCT__entry( __field(unsigned long long, ino) __field(void *, area) __field(void *, page) __field(unsigned long, pgoff) ), TP_fast_assign( __entry->ino = ino; __entry->area = area; __entry->page = page; __entry->pgoff = pgoff; ), TP_printk("%llu %p %p %lu", __entry->ino, __entry->area, __entry->page, __entry->pgoff) ); /* End of trace events for fs/ocfs2/mmap.c. */ /* Trace events for fs/ocfs2/file.c. */ DECLARE_EVENT_CLASS(ocfs2__file_ops, TP_PROTO(void *inode, void *file, void *dentry, unsigned long long ino, unsigned int d_len, const unsigned char *d_name, unsigned long long para), TP_ARGS(inode, file, dentry, ino, d_len, d_name, para), TP_STRUCT__entry( __field(void *, inode) __field(void *, file) __field(void *, dentry) __field(unsigned long long, ino) __field(unsigned int, d_len) __string(d_name, d_name) __field(unsigned long long, para) ), TP_fast_assign( __entry->inode = inode; __entry->file = file; __entry->dentry = dentry; __entry->ino = ino; __entry->d_len = d_len; __assign_str(d_name); __entry->para = para; ), TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file, __entry->dentry, __entry->ino, __entry->para, __entry->d_len, __get_str(d_name)) ); #define DEFINE_OCFS2_FILE_OPS(name) \ DEFINE_EVENT(ocfs2__file_ops, name, \ TP_PROTO(void *inode, void *file, void *dentry, \ unsigned long long ino, \ unsigned int d_len, const unsigned char *d_name, \ unsigned long long mode), \ TP_ARGS(inode, file, dentry, ino, d_len, d_name, mode)) DEFINE_OCFS2_FILE_OPS(ocfs2_file_open); DEFINE_OCFS2_FILE_OPS(ocfs2_file_release); DEFINE_OCFS2_FILE_OPS(ocfs2_sync_file); DEFINE_OCFS2_FILE_OPS(ocfs2_file_write_iter); DEFINE_OCFS2_FILE_OPS(ocfs2_file_read_iter); DEFINE_OCFS2_FILE_OPS(ocfs2_file_splice_read); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_truncate_file); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_truncate_file_error); TRACE_EVENT(ocfs2_extend_allocation, TP_PROTO(unsigned long long ip_blkno, unsigned long long size, unsigned int clusters, unsigned int clusters_to_add, int why, int restart_func), TP_ARGS(ip_blkno, size, clusters, clusters_to_add, why, restart_func), TP_STRUCT__entry( __field(unsigned long long, ip_blkno) __field(unsigned long long, size) __field(unsigned int, clusters) __field(unsigned int, clusters_to_add) __field(int, why) __field(int, restart_func) ), TP_fast_assign( __entry->ip_blkno = ip_blkno; __entry->size = size; __entry->clusters = clusters; __entry->clusters_to_add = clusters_to_add; __entry->why = why; __entry->restart_func = restart_func; ), TP_printk("%llu %llu %u %u %d %d", __entry->ip_blkno, __entry->size, __entry->clusters, __entry->clusters_to_add, __entry->why, __entry->restart_func) ); TRACE_EVENT(ocfs2_extend_allocation_end, TP_PROTO(unsigned long long ino, unsigned int di_clusters, unsigned long long di_size, unsigned int ip_clusters, unsigned long long i_size), TP_ARGS(ino, di_clusters, di_size, ip_clusters, i_size), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, di_clusters) __field(unsigned long long, di_size) __field(unsigned int, ip_clusters) __field(unsigned long long, i_size) ), TP_fast_assign( __entry->ino = ino; __entry->di_clusters = di_clusters; __entry->di_size = di_size; __entry->ip_clusters = ip_clusters; __entry->i_size = i_size; ), TP_printk("%llu %u %llu %u %llu", __entry->ino, __entry->di_clusters, __entry->di_size, __entry->ip_clusters, __entry->i_size) ); TRACE_EVENT(ocfs2_write_zero_page, TP_PROTO(unsigned long long ino, unsigned long long abs_from, unsigned long long abs_to, unsigned long index, unsigned int zero_from, unsigned int zero_to), TP_ARGS(ino, abs_from, abs_to, index, zero_from, zero_to), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, abs_from) __field(unsigned long long, abs_to) __field(unsigned long, index) __field(unsigned int, zero_from) __field(unsigned int, zero_to) ), TP_fast_assign( __entry->ino = ino; __entry->abs_from = abs_from; __entry->abs_to = abs_to; __entry->index = index; __entry->zero_from = zero_from; __entry->zero_to = zero_to; ), TP_printk("%llu %llu %llu %lu %u %u", __entry->ino, __entry->abs_from, __entry->abs_to, __entry->index, __entry->zero_from, __entry->zero_to) ); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend_range); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_extend); TRACE_EVENT(ocfs2_setattr, TP_PROTO(void *inode, void *dentry, unsigned long long ino, unsigned int d_len, const unsigned char *d_name, unsigned int ia_valid, unsigned int ia_mode, unsigned int ia_uid, unsigned int ia_gid), TP_ARGS(inode, dentry, ino, d_len, d_name, ia_valid, ia_mode, ia_uid, ia_gid), TP_STRUCT__entry( __field(void *, inode) __field(void *, dentry) __field(unsigned long long, ino) __field(unsigned int, d_len) __string(d_name, d_name) __field(unsigned int, ia_valid) __field(unsigned int, ia_mode) __field(unsigned int, ia_uid) __field(unsigned int, ia_gid) ), TP_fast_assign( __entry->inode = inode; __entry->dentry = dentry; __entry->ino = ino; __entry->d_len = d_len; __assign_str(d_name); __entry->ia_valid = ia_valid; __entry->ia_mode = ia_mode; __entry->ia_uid = ia_uid; __entry->ia_gid = ia_gid; ), TP_printk("%p %p %llu %.*s %u %u %u %u", __entry->inode, __entry->dentry, __entry->ino, __entry->d_len, __get_str(d_name), __entry->ia_valid, __entry->ia_mode, __entry->ia_uid, __entry->ia_gid) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_write_remove_suid); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_zero_partial_clusters); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range1); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_zero_partial_clusters_range2); DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range); TRACE_EVENT(ocfs2_prepare_inode_for_write, TP_PROTO(unsigned long long ino, unsigned long long saved_pos, unsigned long count, int wait), TP_ARGS(ino, saved_pos, count, wait), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned long long, saved_pos) __field(unsigned long, count) __field(int, wait) ), TP_fast_assign( __entry->ino = ino; __entry->saved_pos = saved_pos; __entry->count = count; __entry->wait = wait; ), TP_printk("%llu %llu %lu %d", __entry->ino, __entry->saved_pos, __entry->count, __entry->wait) ); DEFINE_OCFS2_INT_EVENT(generic_file_read_iter_ret); DEFINE_OCFS2_INT_EVENT(filemap_splice_read_ret); /* End of trace events for fs/ocfs2/file.c. */ /* Trace events for fs/ocfs2/inode.c. */ TRACE_EVENT(ocfs2_iget_begin, TP_PROTO(unsigned long long ino, unsigned int flags, int sysfile_type), TP_ARGS(ino, flags, sysfile_type), TP_STRUCT__entry( __field(unsigned long long, ino) __field(unsigned int, flags) __field(int, sysfile_type) ), TP_fast_assign( __entry->ino = ino; __entry->flags = flags; __entry->sysfile_type = sysfile_type; ), TP_printk("%llu %u %d", __entry->ino, __entry->flags, __entry->sysfile_type) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_iget5_locked); TRACE_EVENT(ocfs2_iget_end, TP_PROTO(void *inode, unsigned long long ino), TP_ARGS(inode, ino), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, ino) ), TP_fast_assign( __entry->inode = inode; __entry->ino = ino; ), TP_printk("%p %llu", __entry->inode, __entry->ino) ); TRACE_EVENT(ocfs2_find_actor, TP_PROTO(void *inode, unsigned long long ino, void *args, unsigned long long fi_blkno), TP_ARGS(inode, ino, args, fi_blkno), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, ino) __field(void *, args) __field(unsigned long long, fi_blkno) ), TP_fast_assign( __entry->inode = inode; __entry->ino = ino; __entry->args = args; __entry->fi_blkno = fi_blkno; ), TP_printk("%p %llu %p %llu", __entry->inode, __entry->ino, __entry->args, __entry->fi_blkno) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_populate_inode); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_read_locked_inode); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state); DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block); DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_validate_inode_block); DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_repair_inode_block); TRACE_EVENT(ocfs2_inode_is_valid_to_delete, TP_PROTO(void *task, void *dc_task, unsigned long long ino, unsigned int flags), TP_ARGS(task, dc_task, ino, flags), TP_STRUCT__entry( __field(void *, task) __field(void *, dc_task) __field(unsigned long long, ino) __field(unsigned int, flags) ), TP_fast_assign( __entry->task = task; __entry->dc_task = dc_task; __entry->ino = ino; __entry->flags = flags; ), TP_printk("%p %p %llu %u", __entry->task, __entry->dc_task, __entry->ino, __entry->flags) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_query_inode_wipe_begin); DEFINE_OCFS2_UINT_EVENT(ocfs2_query_inode_wipe_succ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_query_inode_wipe_end); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_cleanup_delete_inode); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_delete_inode); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_clear_inode); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_drop_inode); TRACE_EVENT(ocfs2_inode_revalidate, TP_PROTO(void *inode, unsigned long long ino, unsigned int flags), TP_ARGS(inode, ino, flags), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, ino) __field(unsigned int, flags) ), TP_fast_assign( __entry->inode = inode; __entry->ino = ino; __entry->flags = flags; ), TP_printk("%p %llu %u", __entry->inode, __entry->ino, __entry->flags) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_mark_inode_dirty); /* End of trace events for fs/ocfs2/inode.c. */ /* Trace events for fs/ocfs2/extent_map.c. */ TRACE_EVENT(ocfs2_read_virt_blocks, TP_PROTO(void *inode, unsigned long long vblock, int nr, void *bhs, unsigned int flags, void *validate), TP_ARGS(inode, vblock, nr, bhs, flags, validate), TP_STRUCT__entry( __field(void *, inode) __field(unsigned long long, vblock) __field(int, nr) __field(void *, bhs) __field(unsigned int, flags) __field(void *, validate) ), TP_fast_assign( __entry->inode = inode; __entry->vblock = vblock; __entry->nr = nr; __entry->bhs = bhs; __entry->flags = flags; __entry->validate = validate; ), TP_printk("%p %llu %d %p %x %p", __entry->inode, __entry->vblock, __entry->nr, __entry->bhs, __entry->flags, __entry->validate) ); /* End of trace events for fs/ocfs2/extent_map.c. */ /* Trace events for fs/ocfs2/slot_map.c. */ DEFINE_OCFS2_UINT_EVENT(ocfs2_refresh_slot_info); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_map_slot_buffers_block); DEFINE_OCFS2_INT_EVENT(ocfs2_find_slot); /* End of trace events for fs/ocfs2/slot_map.c. */ /* Trace events for fs/ocfs2/heartbeat.c. */ DEFINE_OCFS2_INT_EVENT(ocfs2_do_node_down); /* End of trace events for fs/ocfs2/heartbeat.c. */ /* Trace events for fs/ocfs2/super.c. */ TRACE_EVENT(ocfs2_remount, TP_PROTO(unsigned long s_flags, unsigned long osb_flags, int flags), TP_ARGS(s_flags, osb_flags, flags), TP_STRUCT__entry( __field(unsigned long, s_flags) __field(unsigned long, osb_flags) __field(int, flags) ), TP_fast_assign( __entry->s_flags = s_flags; __entry->osb_flags = osb_flags; __entry->flags = flags; ), TP_printk("%lu %lu %d", __entry->s_flags, __entry->osb_flags, __entry->flags) ); TRACE_EVENT(ocfs2_fill_super, TP_PROTO(void *sb, void *data, int silent), TP_ARGS(sb, data, silent), TP_STRUCT__entry( __field(void *, sb) __field(void *, data) __field(int, silent) ), TP_fast_assign( __entry->sb = sb; __entry->data = data; __entry->silent = silent; ), TP_printk("%p %p %d", __entry->sb, __entry->data, __entry->silent) ); TRACE_EVENT(ocfs2_parse_options, TP_PROTO(int is_remount, char *options), TP_ARGS(is_remount, options), TP_STRUCT__entry( __field(int, is_remount) __string(options, options) ), TP_fast_assign( __entry->is_remount = is_remount; __assign_str(options); ), TP_printk("%d %s", __entry->is_remount, __get_str(options)) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super); TRACE_EVENT(ocfs2_statfs, TP_PROTO(void *sb, void *buf), TP_ARGS(sb, buf), TP_STRUCT__entry( __field(void *, sb) __field(void *, buf) ), TP_fast_assign( __entry->sb = sb; __entry->buf = buf; ), TP_printk("%p %p", __entry->sb, __entry->buf) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_dismount_volume); TRACE_EVENT(ocfs2_initialize_super, TP_PROTO(char *label, char *uuid_str, unsigned long long root_dir, unsigned long long system_dir, int cluster_bits), TP_ARGS(label, uuid_str, root_dir, system_dir, cluster_bits), TP_STRUCT__entry( __string(label, label) __string(uuid_str, uuid_str) __field(unsigned long long, root_dir) __field(unsigned long long, system_dir) __field(int, cluster_bits) ), TP_fast_assign( __assign_str(label); __assign_str(uuid_str); __entry->root_dir = root_dir; __entry->system_dir = system_dir; __entry->cluster_bits = cluster_bits; ), TP_printk("%s %s %llu %llu %d", __get_str(label), __get_str(uuid_str), __entry->root_dir, __entry->system_dir, __entry->cluster_bits) ); /* End of trace events for fs/ocfs2/super.c. */ /* Trace events for fs/ocfs2/xattr.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_xattr_block); DEFINE_OCFS2_UINT_EVENT(ocfs2_xattr_extend_allocation); TRACE_EVENT(ocfs2_init_xattr_set_ctxt, TP_PROTO(const char *name, int meta, int clusters, int credits), TP_ARGS(name, meta, clusters, credits), TP_STRUCT__entry( __string(name, name) __field(int, meta) __field(int, clusters) __field(int, credits) ), TP_fast_assign( __assign_str(name); __entry->meta = meta; __entry->clusters = clusters; __entry->credits = credits; ), TP_printk("%s %d %d %d", __get_str(name), __entry->meta, __entry->clusters, __entry->credits) ); DECLARE_EVENT_CLASS(ocfs2__xattr_find, TP_PROTO(unsigned long long ino, const char *name, int name_index, unsigned int hash, unsigned long long location, int xe_index), TP_ARGS(ino, name, name_index, hash, location, xe_index), TP_STRUCT__entry( __field(unsigned long long, ino) __string(name, name) __field(int, name_index) __field(unsigned int, hash) __field(unsigned long long, location) __field(int, xe_index) ), TP_fast_assign( __entry->ino = ino; __assign_str(name); __entry->name_index = name_index; __entry->hash = hash; __entry->location = location; __entry->xe_index = xe_index; ), TP_printk("%llu %s %d %u %llu %d", __entry->ino, __get_str(name), __entry->name_index, __entry->hash, __entry->location, __entry->xe_index) ); #define DEFINE_OCFS2_XATTR_FIND_EVENT(name) \ DEFINE_EVENT(ocfs2__xattr_find, name, \ TP_PROTO(unsigned long long ino, const char *name, int name_index, \ unsigned int hash, unsigned long long bucket, \ int xe_index), \ TP_ARGS(ino, name, name_index, hash, bucket, xe_index)) DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_bucket_find); DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find); DEFINE_OCFS2_XATTR_FIND_EVENT(ocfs2_xattr_index_block_find_rec); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_iterate_xattr_buckets); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_iterate_xattr_bucket); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_cp_xattr_block_to_bucket_begin); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cp_xattr_block_to_bucket_end); DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block_begin); DEFINE_OCFS2_ULL_EVENT(ocfs2_xattr_create_index_block); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_defrag_xattr_bucket); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_bucket_cross_cluster); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_divide_xattr_bucket_begin); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_divide_xattr_bucket_move); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_cp_xattr_bucket); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_mv_xattr_buckets); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_adjust_xattr_cross_cluster); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_add_new_xattr_cluster); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_add_new_xattr_cluster_insert); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_extend_xattr_bucket); DEFINE_OCFS2_ULL_EVENT(ocfs2_add_new_xattr_bucket); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_xattr_bucket_value_truncate); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_rm_xattr_cluster); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_header); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_create_empty_xattr_block); DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_bucket); DEFINE_OCFS2_STRING_EVENT(ocfs2_xattr_set_entry_index_block); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_xattr_bucket_value_refcount); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_reflink_xattr_buckets); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_reflink_xattr_rec); /* End of trace events for fs/ocfs2/xattr.c. */ /* Trace events for fs/ocfs2/reservations.c. */ DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_insert); DEFINE_OCFS2_ULL_UINT_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_begin); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_find_free_bits_end); TRACE_EVENT(ocfs2_resv_find_window_begin, TP_PROTO(unsigned int r_start, unsigned int r_end, unsigned int goal, unsigned int wanted, int empty_root), TP_ARGS(r_start, r_end, goal, wanted, empty_root), TP_STRUCT__entry( __field(unsigned int, r_start) __field(unsigned int, r_end) __field(unsigned int, goal) __field(unsigned int, wanted) __field(int, empty_root) ), TP_fast_assign( __entry->r_start = r_start; __entry->r_end = r_end; __entry->goal = goal; __entry->wanted = wanted; __entry->empty_root = empty_root; ), TP_printk("%u %u %u %u %d", __entry->r_start, __entry->r_end, __entry->goal, __entry->wanted, __entry->empty_root) ); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resv_find_window_prev); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_resv_find_window_next); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_cannibalize_resv_begin); TRACE_EVENT(ocfs2_cannibalize_resv_end, TP_PROTO(unsigned int start, unsigned int end, unsigned int len, unsigned int last_start, unsigned int last_len), TP_ARGS(start, end, len, last_start, last_len), TP_STRUCT__entry( __field(unsigned int, start) __field(unsigned int, end) __field(unsigned int, len) __field(unsigned int, last_start) __field(unsigned int, last_len) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->len = len; __entry->last_start = last_start; __entry->last_len = last_len; ), TP_printk("%u %u %u %u %u", __entry->start, __entry->end, __entry->len, __entry->last_start, __entry->last_len) ); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_resmap_resv_bits); TRACE_EVENT(ocfs2_resmap_claimed_bits_begin, TP_PROTO(unsigned int cstart, unsigned int cend, unsigned int clen, unsigned int r_start, unsigned int r_end, unsigned int r_len, unsigned int last_start, unsigned int last_len), TP_ARGS(cstart, cend, clen, r_start, r_end, r_len, last_start, last_len), TP_STRUCT__entry( __field(unsigned int, cstart) __field(unsigned int, cend) __field(unsigned int, clen) __field(unsigned int, r_start) __field(unsigned int, r_end) __field(unsigned int, r_len) __field(unsigned int, last_start) __field(unsigned int, last_len) ), TP_fast_assign( __entry->cstart = cstart; __entry->cend = cend; __entry->clen = clen; __entry->r_start = r_start; __entry->r_end = r_end; __entry->r_len = r_len; __entry->last_start = last_start; __entry->last_len = last_len; ), TP_printk("%u %u %u %u %u %u %u %u", __entry->cstart, __entry->cend, __entry->clen, __entry->r_start, __entry->r_end, __entry->r_len, __entry->last_start, __entry->last_len) ); TRACE_EVENT(ocfs2_resmap_claimed_bits_end, TP_PROTO(unsigned int start, unsigned int end, unsigned int len, unsigned int last_start, unsigned int last_len), TP_ARGS(start, end, len, last_start, last_len), TP_STRUCT__entry( __field(unsigned int, start) __field(unsigned int, end) __field(unsigned int, len) __field(unsigned int, last_start) __field(unsigned int, last_len) ), TP_fast_assign( __entry->start = start; __entry->end = end; __entry->len = len; __entry->last_start = last_start; __entry->last_len = last_len; ), TP_printk("%u %u %u %u %u", __entry->start, __entry->end, __entry->len, __entry->last_start, __entry->last_len) ); /* End of trace events for fs/ocfs2/reservations.c. */ /* Trace events for fs/ocfs2/quota_local.c. */ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_recover_local_quota_file); DEFINE_OCFS2_INT_EVENT(ocfs2_finish_quota_recovery); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(olq_set_dquot); /* End of trace events for fs/ocfs2/quota_local.c. */ /* Trace events for fs/ocfs2/quota_global.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_quota_block); TRACE_EVENT(ocfs2_sync_dquot, TP_PROTO(unsigned int dq_id, long long dqb_curspace, long long spacechange, long long curinodes, long long inodechange), TP_ARGS(dq_id, dqb_curspace, spacechange, curinodes, inodechange), TP_STRUCT__entry( __field(unsigned int, dq_id) __field(long long, dqb_curspace) __field(long long, spacechange) __field(long long, curinodes) __field(long long, inodechange) ), TP_fast_assign( __entry->dq_id = dq_id; __entry->dqb_curspace = dqb_curspace; __entry->spacechange = spacechange; __entry->curinodes = curinodes; __entry->inodechange = inodechange; ), TP_printk("%u %lld %lld %lld %lld", __entry->dq_id, __entry->dqb_curspace, __entry->spacechange, __entry->curinodes, __entry->inodechange) ); TRACE_EVENT(ocfs2_sync_dquot_helper, TP_PROTO(unsigned int dq_id, unsigned int dq_type, unsigned long type, const char *s_id), TP_ARGS(dq_id, dq_type, type, s_id), TP_STRUCT__entry( __field(unsigned int, dq_id) __field(unsigned int, dq_type) __field(unsigned long, type) __string(s_id, s_id) ), TP_fast_assign( __entry->dq_id = dq_id; __entry->dq_type = dq_type; __entry->type = type; __assign_str(s_id); ), TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type, __entry->type, __get_str(s_id)) ); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_write_dquot); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_release_dquot); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_acquire_dquot); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_get_next_id); DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_mark_dquot_dirty); /* End of trace events for fs/ocfs2/quota_global.c. */ /* Trace events for fs/ocfs2/dir.c. */ DEFINE_OCFS2_INT_EVENT(ocfs2_search_dirblock); DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_dir_block); DEFINE_OCFS2_POINTER_EVENT(ocfs2_find_entry_el); TRACE_EVENT(ocfs2_dx_dir_search, TP_PROTO(unsigned long long ino, int namelen, const char *name, unsigned int major_hash, unsigned int minor_hash, unsigned long long blkno), TP_ARGS(ino, namelen, name, major_hash, minor_hash, blkno), TP_STRUCT__entry( __field(unsigned long long, ino) __field(int, namelen) __string(name, name) __field(unsigned int, major_hash) __field(unsigned int,minor_hash) __field(unsigned long long, blkno) ), TP_fast_assign( __entry->ino = ino; __entry->namelen = namelen; __assign_str(name); __entry->major_hash = major_hash; __entry->minor_hash = minor_hash; __entry->blkno = blkno; ), TP_printk("%llu %.*s %u %u %llu", __entry->ino, __entry->namelen, __get_str(name), __entry->major_hash, __entry->minor_hash, __entry->blkno) ); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_dx_dir_search_leaf_info); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_delete_entry_dx); DEFINE_OCFS2_ULL_EVENT(ocfs2_readdir); TRACE_EVENT(ocfs2_find_files_on_disk, TP_PROTO(int namelen, const char *name, void *blkno, unsigned long long dir), TP_ARGS(namelen, name, blkno, dir), TP_STRUCT__entry( __field(int, namelen) __string(name, name) __field(void *, blkno) __field(unsigned long long, dir) ), TP_fast_assign( __entry->namelen = namelen; __assign_str(name); __entry->blkno = blkno; __entry->dir = dir; ), TP_printk("%.*s %p %llu", __entry->namelen, __get_str(name), __entry->blkno, __entry->dir) ); TRACE_EVENT(ocfs2_check_dir_for_entry, TP_PROTO(unsigned long long dir, int namelen, const char *name), TP_ARGS(dir, namelen, name), TP_STRUCT__entry( __field(unsigned long long, dir) __field(int, namelen) __string(name, name) ), TP_fast_assign( __entry->dir = dir; __entry->namelen = namelen; __assign_str(name); ), TP_printk("%llu %.*s", __entry->dir, __entry->namelen, __get_str(name)) ); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_dx_dir_attach_index); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_format_cluster); TRACE_EVENT(ocfs2_dx_dir_index_root_block, TP_PROTO(unsigned long long dir, unsigned int major_hash, unsigned int minor_hash, int namelen, const char *name, unsigned int num_used), TP_ARGS(dir, major_hash, minor_hash, namelen, name, num_used), TP_STRUCT__entry( __field(unsigned long long, dir) __field(unsigned int, major_hash) __field(unsigned int, minor_hash) __field(int, namelen) __string(name, name) __field(unsigned int, num_used) ), TP_fast_assign( __entry->dir = dir; __entry->major_hash = major_hash; __entry->minor_hash = minor_hash; __entry->namelen = namelen; __assign_str(name); __entry->num_used = num_used; ), TP_printk("%llu %x %x %.*s %u", __entry->dir, __entry->major_hash, __entry->minor_hash, __entry->namelen, __get_str(name), __entry->num_used) ); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_extend_dir); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_dx_dir_rebalance); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_dx_dir_rebalance_split); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_prepare_dir_for_insert); /* End of trace events for fs/ocfs2/dir.c. */ /* Trace events for fs/ocfs2/namei.c. */ DECLARE_EVENT_CLASS(ocfs2__dentry_ops, TP_PROTO(void *dir, void *dentry, int name_len, const char *name, unsigned long long dir_blkno, unsigned long long extra), TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra), TP_STRUCT__entry( __field(void *, dir) __field(void *, dentry) __field(int, name_len) __string(name, name) __field(unsigned long long, dir_blkno) __field(unsigned long long, extra) ), TP_fast_assign( __entry->dir = dir; __entry->dentry = dentry; __entry->name_len = name_len; __assign_str(name); __entry->dir_blkno = dir_blkno; __entry->extra = extra; ), TP_printk("%p %p %.*s %llu %llu", __entry->dir, __entry->dentry, __entry->name_len, __get_str(name), __entry->dir_blkno, __entry->extra) ); #define DEFINE_OCFS2_DENTRY_OPS(name) \ DEFINE_EVENT(ocfs2__dentry_ops, name, \ TP_PROTO(void *dir, void *dentry, int name_len, const char *name, \ unsigned long long dir_blkno, unsigned long long extra), \ TP_ARGS(dir, dentry, name_len, name, dir_blkno, extra)) DEFINE_OCFS2_DENTRY_OPS(ocfs2_lookup); DEFINE_OCFS2_DENTRY_OPS(ocfs2_mkdir); DEFINE_OCFS2_DENTRY_OPS(ocfs2_create); DEFINE_OCFS2_DENTRY_OPS(ocfs2_unlink); DEFINE_OCFS2_DENTRY_OPS(ocfs2_symlink_create); DEFINE_OCFS2_DENTRY_OPS(ocfs2_mv_orphaned_inode_to_new); DEFINE_OCFS2_POINTER_EVENT(ocfs2_lookup_ret); TRACE_EVENT(ocfs2_mknod, TP_PROTO(void *dir, void *dentry, int name_len, const char *name, unsigned long long dir_blkno, unsigned long dev, int mode), TP_ARGS(dir, dentry, name_len, name, dir_blkno, dev, mode), TP_STRUCT__entry( __field(void *, dir) __field(void *, dentry) __field(int, name_len) __string(name, name) __field(unsigned long long, dir_blkno) __field(unsigned long, dev) __field(int, mode) ), TP_fast_assign( __entry->dir = dir; __entry->dentry = dentry; __entry->name_len = name_len; __assign_str(name); __entry->dir_blkno = dir_blkno; __entry->dev = dev; __entry->mode = mode; ), TP_printk("%p %p %.*s %llu %lu %d", __entry->dir, __entry->dentry, __entry->name_len, __get_str(name), __entry->dir_blkno, __entry->dev, __entry->mode) ); TRACE_EVENT(ocfs2_link, TP_PROTO(unsigned long long ino, int old_len, const char *old_name, int name_len, const char *name), TP_ARGS(ino, old_len, old_name, name_len, name), TP_STRUCT__entry( __field(unsigned long long, ino) __field(int, old_len) __string(old_name, old_name) __field(int, name_len) __string(name, name) ), TP_fast_assign( __entry->ino = ino; __entry->old_len = old_len; __assign_str(old_name); __entry->name_len = name_len; __assign_str(name); ), TP_printk("%llu %.*s %.*s", __entry->ino, __entry->old_len, __get_str(old_name), __entry->name_len, __get_str(name)) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_unlink_noent); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_double_lock_end); TRACE_EVENT(ocfs2_rename, TP_PROTO(void *old_dir, void *old_dentry, void *new_dir, void *new_dentry, int old_len, const char *old_name, int new_len, const char *new_name), TP_ARGS(old_dir, old_dentry, new_dir, new_dentry, old_len, old_name, new_len, new_name), TP_STRUCT__entry( __field(void *, old_dir) __field(void *, old_dentry) __field(void *, new_dir) __field(void *, new_dentry) __field(int, old_len) __string(old_name, old_name) __field(int, new_len) __string(new_name, new_name) ), TP_fast_assign( __entry->old_dir = old_dir; __entry->old_dentry = old_dentry; __entry->new_dir = new_dir; __entry->new_dentry = new_dentry; __entry->old_len = old_len; __assign_str(old_name); __entry->new_len = new_len; __assign_str(new_name); ), TP_printk("%p %p %p %p %.*s %.*s", __entry->old_dir, __entry->old_dentry, __entry->new_dir, __entry->new_dentry, __entry->old_len, __get_str(old_name), __entry->new_len, __get_str(new_name)) ); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted); TRACE_EVENT(ocfs2_rename_target_exists, TP_PROTO(int new_len, const char *new_name), TP_ARGS(new_len, new_name), TP_STRUCT__entry( __field(int, new_len) __string(new_name, new_name) ), TP_fast_assign( __entry->new_len = new_len; __assign_str(new_name); ), TP_printk("%.*s", __entry->new_len, __get_str(new_name)) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_rename_disagree); TRACE_EVENT(ocfs2_rename_over_existing, TP_PROTO(unsigned long long new_blkno, void *new_bh, unsigned long long newdi_blkno), TP_ARGS(new_blkno, new_bh, newdi_blkno), TP_STRUCT__entry( __field(unsigned long long, new_blkno) __field(void *, new_bh) __field(unsigned long long, newdi_blkno) ), TP_fast_assign( __entry->new_blkno = new_blkno; __entry->new_bh = new_bh; __entry->newdi_blkno = newdi_blkno; ), TP_printk("%llu %p %llu", __entry->new_blkno, __entry->new_bh, __entry->newdi_blkno) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_create_symlink_data); TRACE_EVENT(ocfs2_symlink_begin, TP_PROTO(void *dir, void *dentry, const char *symname, int len, const char *name), TP_ARGS(dir, dentry, symname, len, name), TP_STRUCT__entry( __field(void *, dir) __field(void *, dentry) __field(const char *, symname) __field(int, len) __string(name, name) ), TP_fast_assign( __entry->dir = dir; __entry->dentry = dentry; __entry->symname = symname; __entry->len = len; __assign_str(name); ), TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry, __entry->symname, __entry->len, __get_str(name)) ); TRACE_EVENT(ocfs2_blkno_stringify, TP_PROTO(unsigned long long blkno, const char *name, int namelen), TP_ARGS(blkno, name, namelen), TP_STRUCT__entry( __field(unsigned long long, blkno) __string(name, name) __field(int, namelen) ), TP_fast_assign( __entry->blkno = blkno; __assign_str(name); __entry->namelen = namelen; ), TP_printk("%llu %s %d", __entry->blkno, __get_str(name), __entry->namelen) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_add_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_orphan_add_end); TRACE_EVENT(ocfs2_orphan_del, TP_PROTO(unsigned long long dir, const char *name, int namelen), TP_ARGS(dir, name, namelen), TP_STRUCT__entry( __field(unsigned long long, dir) __string(name, name) __field(int, namelen) ), TP_fast_assign( __entry->dir = dir; __assign_str(name); __entry->namelen = namelen; ), TP_printk("%llu %s %d", __entry->dir, __get_str(name), __entry->namelen) ); /* End of trace events for fs/ocfs2/namei.c. */ /* Trace events for fs/ocfs2/dcache.c. */ TRACE_EVENT(ocfs2_dentry_revalidate, TP_PROTO(void *dentry, int len, const char *name), TP_ARGS(dentry, len, name), TP_STRUCT__entry( __field(void *, dentry) __field(int, len) __string(name, name) ), TP_fast_assign( __entry->dentry = dentry; __entry->len = len; __assign_str(name); ), TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name)) ); TRACE_EVENT(ocfs2_dentry_revalidate_negative, TP_PROTO(int len, const char *name, unsigned long pgen, unsigned long gen), TP_ARGS(len, name, pgen, gen), TP_STRUCT__entry( __field(int, len) __string(name, name) __field(unsigned long, pgen) __field(unsigned long, gen) ), TP_fast_assign( __entry->len = len; __assign_str(name); __entry->pgen = pgen; __entry->gen = gen; ), TP_printk("%.*s %lu %lu", __entry->len, __get_str(name), __entry->pgen, __entry->gen) ); DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_delete); DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_dentry_revalidate_orphaned); DEFINE_OCFS2_ULL_EVENT(ocfs2_dentry_revalidate_nofsdata); DEFINE_OCFS2_INT_EVENT(ocfs2_dentry_revalidate_ret); TRACE_EVENT(ocfs2_find_local_alias, TP_PROTO(int len, const char *name), TP_ARGS(len, name), TP_STRUCT__entry( __field(int, len) __string(name, name) ), TP_fast_assign( __entry->len = len; __assign_str(name); ), TP_printk("%.*s", __entry->len, __get_str(name)) ); TRACE_EVENT(ocfs2_dentry_attach_lock, TP_PROTO(int len, const char *name, unsigned long long parent, void *fsdata), TP_ARGS(len, name, parent, fsdata), TP_STRUCT__entry( __field(int, len) __string(name, name) __field(unsigned long long, parent) __field(void *, fsdata) ), TP_fast_assign( __entry->len = len; __assign_str(name); __entry->parent = parent; __entry->fsdata = fsdata; ), TP_printk("%.*s %llu %p", __entry->len, __get_str(name), __entry->parent, __entry->fsdata) ); TRACE_EVENT(ocfs2_dentry_attach_lock_found, TP_PROTO(const char *name, unsigned long long parent, unsigned long long ino), TP_ARGS(name, parent, ino), TP_STRUCT__entry( __string(name, name) __field(unsigned long long, parent) __field(unsigned long long, ino) ), TP_fast_assign( __assign_str(name); __entry->parent = parent; __entry->ino = ino; ), TP_printk("%s %llu %llu", __get_str(name), __entry->parent, __entry->ino) ); /* End of trace events for fs/ocfs2/dcache.c. */ /* Trace events for fs/ocfs2/export.c. */ TRACE_EVENT(ocfs2_get_dentry_begin, TP_PROTO(void *sb, void *handle, unsigned long long blkno), TP_ARGS(sb, handle, blkno), TP_STRUCT__entry( __field(void *, sb) __field(void *, handle) __field(unsigned long long, blkno) ), TP_fast_assign( __entry->sb = sb; __entry->handle = handle; __entry->blkno = blkno; ), TP_printk("%p %p %llu", __entry->sb, __entry->handle, __entry->blkno) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_get_dentry_test_bit); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_get_dentry_stale); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_get_dentry_generation); DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_dentry_end); TRACE_EVENT(ocfs2_get_parent, TP_PROTO(void *child, int len, const char *name, unsigned long long ino), TP_ARGS(child, len, name, ino), TP_STRUCT__entry( __field(void *, child) __field(int, len) __string(name, name) __field(unsigned long long, ino) ), TP_fast_assign( __entry->child = child; __entry->len = len; __assign_str(name); __entry->ino = ino; ), TP_printk("%p %.*s %llu", __entry->child, __entry->len, __get_str(name), __entry->ino) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_get_parent_end); TRACE_EVENT(ocfs2_encode_fh_begin, TP_PROTO(void *dentry, int name_len, const char *name, void *fh, int len, int connectable), TP_ARGS(dentry, name_len, name, fh, len, connectable), TP_STRUCT__entry( __field(void *, dentry) __field(int, name_len) __string(name, name) __field(void *, fh) __field(int, len) __field(int, connectable) ), TP_fast_assign( __entry->dentry = dentry; __entry->name_len = name_len; __assign_str(name); __entry->fh = fh; __entry->len = len; __entry->connectable = connectable; ), TP_printk("%p %.*s %p %d %d", __entry->dentry, __entry->name_len, __get_str(name), __entry->fh, __entry->len, __entry->connectable) ); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_self); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_encode_fh_parent); DEFINE_OCFS2_INT_EVENT(ocfs2_encode_fh_type); /* End of trace events for fs/ocfs2/export.c. */ /* Trace events for fs/ocfs2/journal.c. */ DEFINE_OCFS2_UINT_EVENT(ocfs2_commit_cache_begin); DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_commit_cache_end); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans); DEFINE_OCFS2_INT_EVENT(ocfs2_assure_trans_credits); DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access); DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_journal_init); DEFINE_OCFS2_UINT_EVENT(ocfs2_journal_init_maxlen); DEFINE_OCFS2_INT_EVENT(ocfs2_journal_shutdown); DEFINE_OCFS2_POINTER_EVENT(ocfs2_journal_shutdown_wait); DEFINE_OCFS2_ULL_EVENT(ocfs2_complete_recovery); DEFINE_OCFS2_INT_EVENT(ocfs2_complete_recovery_end); TRACE_EVENT(ocfs2_complete_recovery_slot, TP_PROTO(int slot, unsigned long long la_ino, unsigned long long tl_ino, void *qrec), TP_ARGS(slot, la_ino, tl_ino, qrec), TP_STRUCT__entry( __field(int, slot) __field(unsigned long long, la_ino) __field(unsigned long long, tl_ino) __field(void *, qrec) ), TP_fast_assign( __entry->slot = slot; __entry->la_ino = la_ino; __entry->tl_ino = tl_ino; __entry->qrec = qrec; ), TP_printk("%d %llu %llu %p", __entry->slot, __entry->la_ino, __entry->tl_ino, __entry->qrec) ); DEFINE_OCFS2_INT_INT_EVENT(ocfs2_recovery_thread_node); DEFINE_OCFS2_INT_EVENT(ocfs2_recovery_thread_end); TRACE_EVENT(ocfs2_recovery_thread, TP_PROTO(int node_num, int osb_node_num, int disable, void *recovery_thread, int map_set), TP_ARGS(node_num, osb_node_num, disable, recovery_thread, map_set), TP_STRUCT__entry( __field(int, node_num) __field(int, osb_node_num) __field(int,disable) __field(void *, recovery_thread) __field(int,map_set) ), TP_fast_assign( __entry->node_num = node_num; __entry->osb_node_num = osb_node_num; __entry->disable = disable; __entry->recovery_thread = recovery_thread; __entry->map_set = map_set; ), TP_printk("%d %d %d %p %d", __entry->node_num, __entry->osb_node_num, __entry->disable, __entry->recovery_thread, __entry->map_set) ); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_replay_journal_recovered); DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_lock_err); DEFINE_OCFS2_INT_EVENT(ocfs2_replay_journal_skip); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_recover_node); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_recover_node_skip); DEFINE_OCFS2_UINT_UINT_EVENT(ocfs2_mark_dead_nodes); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_begin); DEFINE_OCFS2_UINT_UINT_UINT_EVENT(ocfs2_queue_orphan_scan_end); DEFINE_OCFS2_ULL_EVENT(ocfs2_orphan_filldir); DEFINE_OCFS2_INT_EVENT(ocfs2_recover_orphans); DEFINE_OCFS2_ULL_EVENT(ocfs2_recover_orphans_iput); DEFINE_OCFS2_INT_EVENT(ocfs2_wait_on_mount); /* End of trace events for fs/ocfs2/journal.c. */ /* Trace events for fs/ocfs2/buffer_head_io.c. */ DEFINE_OCFS2_ULL_UINT_EVENT(ocfs2_read_blocks_sync); DEFINE_OCFS2_ULL_EVENT(ocfs2_read_blocks_sync_jbd); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_read_blocks_from_disk); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_bh); DEFINE_OCFS2_ULL_INT_INT_INT_EVENT(ocfs2_read_blocks_end); TRACE_EVENT(ocfs2_write_block, TP_PROTO(unsigned long long block, void *ci), TP_ARGS(block, ci), TP_STRUCT__entry( __field(unsigned long long, block) __field(void *, ci) ), TP_fast_assign( __entry->block = block; __entry->ci = ci; ), TP_printk("%llu %p", __entry->block, __entry->ci) ); TRACE_EVENT(ocfs2_read_blocks_begin, TP_PROTO(void *ci, unsigned long long block, unsigned int nr, int flags), TP_ARGS(ci, block, nr, flags), TP_STRUCT__entry( __field(void *, ci) __field(unsigned long long, block) __field(unsigned int, nr) __field(int, flags) ), TP_fast_assign( __entry->ci = ci; __entry->block = block; __entry->nr = nr; __entry->flags = flags; ), TP_printk("%p %llu %u %d", __entry->ci, __entry->block, __entry->nr, __entry->flags) ); /* End of trace events for fs/ocfs2/buffer_head_io.c. */ /* Trace events for fs/ocfs2/uptodate.c. */ DEFINE_OCFS2_ULL_EVENT(ocfs2_purge_copied_metadata_tree); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_metadata_cache_purge); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_buffer_cached_begin); TRACE_EVENT(ocfs2_buffer_cached_end, TP_PROTO(int index, void *item), TP_ARGS(index, item), TP_STRUCT__entry( __field(int, index) __field(void *, item) ), TP_fast_assign( __entry->index = index; __entry->item = item; ), TP_printk("%d %p", __entry->index, __entry->item) ); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_append_cache_array); DEFINE_OCFS2_ULL_ULL_UINT_EVENT(ocfs2_insert_cache_tree); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_expand_cache); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_set_buffer_uptodate); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_set_buffer_uptodate_begin); DEFINE_OCFS2_ULL_UINT_UINT_EVENT(ocfs2_remove_metadata_array); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_remove_metadata_tree); DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_remove_block_from_cache); /* End of trace events for fs/ocfs2/uptodate.c. */ #endif /* _TRACE_OCFS2_H */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #define TRACE_INCLUDE_FILE ocfs2_trace #include <trace/define_trace.h>
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 // SPDX-License-Identifier: GPL-2.0+ /* * HID driver for Topre REALFORCE Keyboards * * Copyright (c) 2022 Harry Stern <harry@harrystern.net> * * Based on the hid-macally driver */ #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" MODULE_AUTHOR("Harry Stern <harry@harrystern.net>"); MODULE_DESCRIPTION("REALFORCE R2 Keyboard driver"); MODULE_LICENSE("GPL"); /* * Fix the REALFORCE R2's non-boot interface's report descriptor to match the * events it's actually sending. It claims to send array events but is instead * sending variable events. */ static const __u8 *topre_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { if (*rsize >= 119 && rdesc[69] == 0x29 && rdesc[70] == 0xe7 && rdesc[71] == 0x81 && rdesc[72] == 0x00) { hid_info(hdev, "fixing up Topre REALFORCE keyboard report descriptor\n"); rdesc[72] = 0x02; } return rdesc; } static const struct hid_device_id topre_id_table[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_108) }, { HID_USB_DEVICE(USB_VENDOR_ID_TOPRE, USB_DEVICE_ID_TOPRE_REALFORCE_R2_87) }, { } }; MODULE_DEVICE_TABLE(hid, topre_id_table); static struct hid_driver topre_driver = { .name = "topre", .id_table = topre_id_table, .report_fixup = topre_report_fixup, }; module_hid_driver(topre_driver);
371 26 294 13 27 241 47 66 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SEQ_FILE_H #define _LINUX_SEQ_FILE_H #include <linux/types.h> #include <linux/string.h> #include <linux/string_helpers.h> #include <linux/bug.h> #include <linux/mutex.h> #include <linux/nodemask.h> #include <linux/fs.h> #include <linux/cred.h> struct seq_operations; struct seq_file { char *buf; size_t size; size_t from; size_t count; size_t pad_until; loff_t index; loff_t read_pos; struct mutex lock; const struct seq_operations *op; int poll_event; const struct file *file; void *private; }; struct seq_operations { void * (*start) (struct seq_file *m, loff_t *pos); void (*stop) (struct seq_file *m, void *v); void * (*next) (struct seq_file *m, void *v, loff_t *pos); int (*show) (struct seq_file *m, void *v); }; #define SEQ_SKIP 1 /** * seq_has_overflowed - check if the buffer has overflowed * @m: the seq_file handle * * seq_files have a buffer which may overflow. When this happens a larger * buffer is reallocated and all the data will be printed again. * The overflow state is true when m->count == m->size. * * Returns true if the buffer received more than it can hold. */ static inline bool seq_has_overflowed(struct seq_file *m) { return m->count == m->size; } /** * seq_get_buf - get buffer to write arbitrary data to * @m: the seq_file handle * @bufp: the beginning of the buffer is stored here * * Return the number of bytes available in the buffer, or zero if * there's no space. */ static inline size_t seq_get_buf(struct seq_file *m, char **bufp) { BUG_ON(m->count > m->size); if (m->count < m->size) *bufp = m->buf + m->count; else *bufp = NULL; return m->size - m->count; } /** * seq_commit - commit data to the buffer * @m: the seq_file handle * @num: the number of bytes to commit * * Commit @num bytes of data written to a buffer previously acquired * by seq_buf_get. To signal an error condition, or that the data * didn't fit in the available space, pass a negative @num value. */ static inline void seq_commit(struct seq_file *m, int num) { if (num < 0) { m->count = m->size; } else { BUG_ON(m->count + num > m->size); m->count += num; } } /** * seq_setwidth - set padding width * @m: the seq_file handle * @size: the max number of bytes to pad. * * Call seq_setwidth() for setting max width, then call seq_printf() etc. and * finally call seq_pad() to pad the remaining bytes. */ static inline void seq_setwidth(struct seq_file *m, size_t size) { m->pad_until = m->count + size; } void seq_pad(struct seq_file *m, char c); char *mangle_path(char *s, const char *p, const char *esc); int seq_open(struct file *, const struct seq_operations *); ssize_t seq_read(struct file *, char __user *, size_t, loff_t *); ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter); loff_t seq_lseek(struct file *, loff_t, int); int seq_release(struct inode *, struct file *); int seq_write(struct seq_file *seq, const void *data, size_t len); __printf(2, 0) void seq_vprintf(struct seq_file *m, const char *fmt, va_list args); __printf(2, 3) void seq_printf(struct seq_file *m, const char *fmt, ...); void seq_putc(struct seq_file *m, char c); void __seq_puts(struct seq_file *m, const char *s); static __always_inline void seq_puts(struct seq_file *m, const char *s) { if (!__builtin_constant_p(*s)) __seq_puts(m, s); else if (s[0] && !s[1]) seq_putc(m, s[0]); else seq_write(m, s, __builtin_strlen(s)); } void seq_put_decimal_ull_width(struct seq_file *m, const char *delimiter, unsigned long long num, unsigned int width); void seq_put_decimal_ull(struct seq_file *m, const char *delimiter, unsigned long long num); void seq_put_decimal_ll(struct seq_file *m, const char *delimiter, long long num); void seq_put_hex_ll(struct seq_file *m, const char *delimiter, unsigned long long v, unsigned int width); void seq_escape_mem(struct seq_file *m, const char *src, size_t len, unsigned int flags, const char *esc); static inline void seq_escape_str(struct seq_file *m, const char *src, unsigned int flags, const char *esc) { seq_escape_mem(m, src, strlen(src), flags, esc); } /** * seq_escape - print string into buffer, escaping some characters * @m: target buffer * @s: NULL-terminated string * @esc: set of characters that need escaping * * Puts string into buffer, replacing each occurrence of character from * @esc with usual octal escape. * * Use seq_has_overflowed() to check for errors. */ static inline void seq_escape(struct seq_file *m, const char *s, const char *esc) { seq_escape_str(m, s, ESCAPE_OCTAL, esc); } void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii); int seq_path(struct seq_file *, const struct path *, const char *); int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); void *single_start(struct seq_file *, loff_t *); int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); int single_release(struct inode *, struct file *); void *__seq_open_private(struct file *, const struct seq_operations *, int); int seq_open_private(struct file *, const struct seq_operations *, int); int seq_release_private(struct inode *, struct file *); #ifdef CONFIG_BINARY_PRINTF void seq_bprintf(struct seq_file *m, const char *f, const u32 *binary); #endif #define DEFINE_SEQ_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ int ret = seq_open(file, &__name ## _sops); \ if (!ret && inode->i_private) { \ struct seq_file *seq_f = file->private_data; \ seq_f->private = inode->i_private; \ } \ return ret; \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .llseek = seq_lseek, \ .release = seq_release, \ } #define DEFINE_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, inode->i_private); \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .llseek = seq_lseek, \ .release = single_release, \ } #define DEFINE_SHOW_STORE_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, inode->i_private); \ } \ \ static const struct file_operations __name ## _fops = { \ .owner = THIS_MODULE, \ .open = __name ## _open, \ .read = seq_read, \ .write = __name ## _write, \ .llseek = seq_lseek, \ .release = single_release, \ } #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ return single_open(file, __name ## _show, pde_data(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ .proc_open = __name ## _open, \ .proc_read = seq_read, \ .proc_lseek = seq_lseek, \ .proc_release = single_release, \ } static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; #endif } /** * seq_show_options - display mount options with appropriate escapes. * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, can be NULL */ static inline void seq_show_option(struct seq_file *m, const char *name, const char *value) { seq_putc(m, ','); seq_escape(m, name, ",= \t\n\\"); if (value) { seq_putc(m, '='); seq_escape(m, value, ", \t\n\\"); } } /** * seq_show_option_n - display mount options with appropriate escapes * where @value must be a specific length (i.e. * not NUL-terminated). * @m: the seq_file handle * @name: the mount option name * @value: the mount option name's value, cannot be NULL * @length: the exact length of @value to display, must be constant expression * * This is a macro since this uses "length" to define the size of the * stack buffer. */ #define seq_show_option_n(m, name, value, length) { \ char val_buf[length + 1]; \ memcpy(val_buf, value, length); \ val_buf[length] = '\0'; \ seq_show_option(m, name, val_buf); \ } #define SEQ_START_TOKEN ((void *)1) /* * Helpers for iteration over list_head-s in seq_files */ extern struct list_head *seq_list_start(struct list_head *head, loff_t pos); extern struct list_head *seq_list_start_head(struct list_head *head, loff_t pos); extern struct list_head *seq_list_next(void *v, struct list_head *head, loff_t *ppos); extern struct list_head *seq_list_start_rcu(struct list_head *head, loff_t pos); extern struct list_head *seq_list_start_head_rcu(struct list_head *head, loff_t pos); extern struct list_head *seq_list_next_rcu(void *v, struct list_head *head, loff_t *ppos); /* * Helpers for iteration over hlist_head-s in seq_files */ extern struct hlist_node *seq_hlist_start(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_start_head(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_next(void *v, struct hlist_head *head, loff_t *ppos); extern struct hlist_node *seq_hlist_start_rcu(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head, loff_t pos); extern struct hlist_node *seq_hlist_next_rcu(void *v, struct hlist_head *head, loff_t *ppos); /* Helpers for iterating over per-cpu hlist_head-s in seq_files */ extern struct hlist_node *seq_hlist_start_percpu(struct hlist_head __percpu *head, int *cpu, loff_t pos); extern struct hlist_node *seq_hlist_next_percpu(void *v, struct hlist_head __percpu *head, int *cpu, loff_t *pos); void seq_file_init(void); #endif
2503 216 214 215 210 88 211 88 214 215 62 201 22 22 22 22 14 22 14 22 22 218 61 203 209 210 210 88 86 88 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 // SPDX-License-Identifier: GPL-2.0-only /* * Power Management Quality of Service (PM QoS) support base. * * Copyright (C) 2020 Intel Corporation * * Authors: * Mark Gross <mgross@linux.intel.com> * Rafael J. Wysocki <rafael.j.wysocki@intel.com> * * Provided here is an interface for specifying PM QoS dependencies. It allows * entities depending on QoS constraints to register their requests which are * aggregated as appropriate to produce effective constraints (target values) * that can be monitored by entities needing to respect them, either by polling * or through a built-in notification mechanism. * * In addition to the basic functionality, more specific interfaces for managing * global CPU latency QoS requests and frequency QoS requests are provided. */ /*#define DEBUG*/ #include <linux/pm_qos.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/fs.h> #include <linux/device.h> #include <linux/miscdevice.h> #include <linux/string.h> #include <linux/platform_device.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include <linux/export.h> #include <trace/events/power.h> /* * locking rule: all changes to constraints or notifiers lists * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock * held, taken with _irqsave. One lock to rule them all */ static DEFINE_SPINLOCK(pm_qos_lock); /** * pm_qos_read_value - Return the current effective constraint value. * @c: List of PM QoS constraint requests. */ s32 pm_qos_read_value(struct pm_qos_constraints *c) { return READ_ONCE(c->target_value); } static int pm_qos_get_value(struct pm_qos_constraints *c) { if (plist_head_empty(&c->list)) return c->no_constraint_value; switch (c->type) { case PM_QOS_MIN: return plist_first(&c->list)->prio; case PM_QOS_MAX: return plist_last(&c->list)->prio; default: WARN(1, "Unknown PM QoS type in %s\n", __func__); return PM_QOS_DEFAULT_VALUE; } } static void pm_qos_set_value(struct pm_qos_constraints *c, s32 value) { WRITE_ONCE(c->target_value, value); } /** * pm_qos_update_target - Update a list of PM QoS constraint requests. * @c: List of PM QoS requests. * @node: Target list entry. * @action: Action to carry out (add, update or remove). * @value: New request value for the target list entry. * * Update the given list of PM QoS constraint requests, @c, by carrying an * @action involving the @node list entry and @value on it. * * The recognized values of @action are PM_QOS_ADD_REQ (store @value in @node * and add it to the list), PM_QOS_UPDATE_REQ (remove @node from the list, store * @value in it and add it to the list again), and PM_QOS_REMOVE_REQ (remove * @node from the list, ignore @value). * * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value) { int prev_value, curr_value, new_value; unsigned long flags; spin_lock_irqsave(&pm_qos_lock, flags); prev_value = pm_qos_get_value(c); if (value == PM_QOS_DEFAULT_VALUE) new_value = c->default_value; else new_value = value; switch (action) { case PM_QOS_REMOVE_REQ: plist_del(node, &c->list); break; case PM_QOS_UPDATE_REQ: /* * To change the list, atomically remove, reinit with new value * and add, then see if the aggregate has changed. */ plist_del(node, &c->list); fallthrough; case PM_QOS_ADD_REQ: plist_node_init(node, new_value); plist_add(node, &c->list); break; default: /* no action */ ; } curr_value = pm_qos_get_value(c); pm_qos_set_value(c, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); trace_pm_qos_update_target(action, prev_value, curr_value); if (prev_value == curr_value) return 0; if (c->notifiers) blocking_notifier_call_chain(c->notifiers, curr_value, NULL); return 1; } /** * pm_qos_flags_remove_req - Remove device PM QoS flags request. * @pqf: Device PM QoS flags set to remove the request from. * @req: Request to remove from the set. */ static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req) { s32 val = 0; list_del(&req->node); list_for_each_entry(req, &pqf->list, node) val |= req->flags; pqf->effective_flags = val; } /** * pm_qos_update_flags - Update a set of PM QoS flags. * @pqf: Set of PM QoS flags to update. * @req: Request to add to the set, to modify, or to remove from the set. * @action: Action to take on the set. * @val: Value of the request to add or modify. * * Return: 1 if the aggregate constraint value has changed, 0 otherwise. */ bool pm_qos_update_flags(struct pm_qos_flags *pqf, struct pm_qos_flags_request *req, enum pm_qos_req_action action, s32 val) { unsigned long irqflags; s32 prev_value, curr_value; spin_lock_irqsave(&pm_qos_lock, irqflags); prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; switch (action) { case PM_QOS_REMOVE_REQ: pm_qos_flags_remove_req(pqf, req); break; case PM_QOS_UPDATE_REQ: pm_qos_flags_remove_req(pqf, req); fallthrough; case PM_QOS_ADD_REQ: req->flags = val; INIT_LIST_HEAD(&req->node); list_add_tail(&req->node, &pqf->list); pqf->effective_flags |= val; break; default: /* no action */ ; } curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; spin_unlock_irqrestore(&pm_qos_lock, irqflags); trace_pm_qos_update_flags(action, prev_value, curr_value); return prev_value != curr_value; } #ifdef CONFIG_CPU_IDLE /* Definitions related to the CPU latency QoS. */ static struct pm_qos_constraints cpu_latency_constraints = { .list = PLIST_HEAD_INIT(cpu_latency_constraints.list), .target_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .default_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .no_constraint_value = PM_QOS_CPU_LATENCY_DEFAULT_VALUE, .type = PM_QOS_MIN, }; static inline bool cpu_latency_qos_value_invalid(s32 value) { return value < 0 && value != PM_QOS_DEFAULT_VALUE; } /** * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit. */ s32 cpu_latency_qos_limit(void) { return pm_qos_read_value(&cpu_latency_constraints); } /** * cpu_latency_qos_request_active - Check the given PM QoS request. * @req: PM QoS request to check. * * Return: 'true' if @req has been added to the CPU latency QoS list, 'false' * otherwise. */ bool cpu_latency_qos_request_active(struct pm_qos_request *req) { return req->qos == &cpu_latency_constraints; } EXPORT_SYMBOL_GPL(cpu_latency_qos_request_active); static void cpu_latency_qos_apply(struct pm_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret = pm_qos_update_target(req->qos, &req->node, action, value); if (ret > 0) wake_up_all_idle_cpus(); } /** * cpu_latency_qos_add_request - Add new CPU latency QoS request. * @req: Pointer to a preallocated handle. * @value: Requested constraint value. * * Use @value to initialize the request handle pointed to by @req, insert it as * a new entry to the CPU latency QoS list and recompute the effective QoS * constraint for that list. * * Callers need to save the handle for later use in updates and removal of the * QoS request represented by it. */ void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) { if (!req || cpu_latency_qos_value_invalid(value)) return; if (cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for already added request\n", __func__); return; } trace_pm_qos_add_request(value); req->qos = &cpu_latency_constraints; cpu_latency_qos_apply(req, PM_QOS_ADD_REQ, value); } EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request); /** * cpu_latency_qos_update_request - Modify existing CPU latency QoS request. * @req : QoS request to update. * @new_value: New requested constraint value. * * Use @new_value to update the QoS request represented by @req in the CPU * latency QoS list along with updating the effective constraint value for that * list. */ void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) { if (!req || cpu_latency_qos_value_invalid(new_value)) return; if (!cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } trace_pm_qos_update_request(new_value); if (new_value == req->node.prio) return; cpu_latency_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(cpu_latency_qos_update_request); /** * cpu_latency_qos_remove_request - Remove existing CPU latency QoS request. * @req: QoS request to remove. * * Remove the CPU latency QoS request represented by @req from the CPU latency * QoS list along with updating the effective constraint value for that list. */ void cpu_latency_qos_remove_request(struct pm_qos_request *req) { if (!req) return; if (!cpu_latency_qos_request_active(req)) { WARN(1, KERN_ERR "%s called for unknown object\n", __func__); return; } trace_pm_qos_remove_request(PM_QOS_DEFAULT_VALUE); cpu_latency_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); memset(req, 0, sizeof(*req)); } EXPORT_SYMBOL_GPL(cpu_latency_qos_remove_request); /* User space interface to the CPU latency QoS via misc device. */ static int cpu_latency_qos_open(struct inode *inode, struct file *filp) { struct pm_qos_request *req; req = kzalloc(sizeof(*req), GFP_KERNEL); if (!req) return -ENOMEM; cpu_latency_qos_add_request(req, PM_QOS_DEFAULT_VALUE); filp->private_data = req; return 0; } static int cpu_latency_qos_release(struct inode *inode, struct file *filp) { struct pm_qos_request *req = filp->private_data; filp->private_data = NULL; cpu_latency_qos_remove_request(req); kfree(req); return 0; } static ssize_t cpu_latency_qos_read(struct file *filp, char __user *buf, size_t count, loff_t *f_pos) { struct pm_qos_request *req = filp->private_data; unsigned long flags; s32 value; if (!req || !cpu_latency_qos_request_active(req)) return -EINVAL; spin_lock_irqsave(&pm_qos_lock, flags); value = pm_qos_get_value(&cpu_latency_constraints); spin_unlock_irqrestore(&pm_qos_lock, flags); return simple_read_from_buffer(buf, count, f_pos, &value, sizeof(s32)); } static ssize_t cpu_latency_qos_write(struct file *filp, const char __user *buf, size_t count, loff_t *f_pos) { s32 value; if (count == sizeof(s32)) { if (copy_from_user(&value, buf, sizeof(s32))) return -EFAULT; } else { int ret; ret = kstrtos32_from_user(buf, count, 16, &value); if (ret) return ret; } cpu_latency_qos_update_request(filp->private_data, value); return count; } static const struct file_operations cpu_latency_qos_fops = { .write = cpu_latency_qos_write, .read = cpu_latency_qos_read, .open = cpu_latency_qos_open, .release = cpu_latency_qos_release, .llseek = noop_llseek, }; static struct miscdevice cpu_latency_qos_miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "cpu_dma_latency", .fops = &cpu_latency_qos_fops, }; static int __init cpu_latency_qos_init(void) { int ret; ret = misc_register(&cpu_latency_qos_miscdev); if (ret < 0) pr_err("%s: %s setup failed\n", __func__, cpu_latency_qos_miscdev.name); return ret; } late_initcall(cpu_latency_qos_init); #endif /* CONFIG_CPU_IDLE */ /* Definitions related to the frequency QoS below. */ static inline bool freq_qos_value_invalid(s32 value) { return value < 0 && value != PM_QOS_DEFAULT_VALUE; } /** * freq_constraints_init - Initialize frequency QoS constraints. * @qos: Frequency QoS constraints to initialize. */ void freq_constraints_init(struct freq_constraints *qos) { struct pm_qos_constraints *c; c = &qos->min_freq; plist_head_init(&c->list); c->target_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->default_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->no_constraint_value = FREQ_QOS_MIN_DEFAULT_VALUE; c->type = PM_QOS_MAX; c->notifiers = &qos->min_freq_notifiers; BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); c = &qos->max_freq; plist_head_init(&c->list); c->target_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->default_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->no_constraint_value = FREQ_QOS_MAX_DEFAULT_VALUE; c->type = PM_QOS_MIN; c->notifiers = &qos->max_freq_notifiers; BLOCKING_INIT_NOTIFIER_HEAD(c->notifiers); } /** * freq_qos_read_value - Get frequency QoS constraint for a given list. * @qos: Constraints to evaluate. * @type: QoS request type. */ s32 freq_qos_read_value(struct freq_constraints *qos, enum freq_qos_req_type type) { s32 ret; switch (type) { case FREQ_QOS_MIN: ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MIN_DEFAULT_VALUE : pm_qos_read_value(&qos->min_freq); break; case FREQ_QOS_MAX: ret = IS_ERR_OR_NULL(qos) ? FREQ_QOS_MAX_DEFAULT_VALUE : pm_qos_read_value(&qos->max_freq); break; default: WARN_ON(1); ret = 0; } return ret; } /** * freq_qos_apply - Add/modify/remove frequency QoS request. * @req: Constraint request to apply. * @action: Action to perform (add/update/remove). * @value: Value to assign to the QoS request. * * This is only meant to be called from inside pm_qos, not drivers. */ int freq_qos_apply(struct freq_qos_request *req, enum pm_qos_req_action action, s32 value) { int ret; switch(req->type) { case FREQ_QOS_MIN: ret = pm_qos_update_target(&req->qos->min_freq, &req->pnode, action, value); break; case FREQ_QOS_MAX: ret = pm_qos_update_target(&req->qos->max_freq, &req->pnode, action, value); break; default: ret = -EINVAL; } return ret; } /** * freq_qos_add_request - Insert new frequency QoS request into a given list. * @qos: Constraints to update. * @req: Preallocated request object. * @type: Request type. * @value: Request value. * * Insert a new entry into the @qos list of requests, recompute the effective * QoS constraint value for that list and initialize the @req object. The * caller needs to save that object for later use in updates and removal. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_add_request(struct freq_constraints *qos, struct freq_qos_request *req, enum freq_qos_req_type type, s32 value) { int ret; if (IS_ERR_OR_NULL(qos) || !req || freq_qos_value_invalid(value)) return -EINVAL; if (WARN(freq_qos_request_active(req), "%s() called for active request\n", __func__)) return -EINVAL; req->qos = qos; req->type = type; ret = freq_qos_apply(req, PM_QOS_ADD_REQ, value); if (ret < 0) { req->qos = NULL; req->type = 0; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_add_request); /** * freq_qos_update_request - Modify existing frequency QoS request. * @req: Request to modify. * @new_value: New request value. * * Update an existing frequency QoS request along with the effective constraint * value for the list of requests it belongs to. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_update_request(struct freq_qos_request *req, s32 new_value) { if (!req || freq_qos_value_invalid(new_value)) return -EINVAL; if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; if (req->pnode.prio == new_value) return 0; return freq_qos_apply(req, PM_QOS_UPDATE_REQ, new_value); } EXPORT_SYMBOL_GPL(freq_qos_update_request); /** * freq_qos_remove_request - Remove frequency QoS request from its list. * @req: Request to remove. * * Remove the given frequency QoS request from the list of constraints it * belongs to and recompute the effective constraint value for that list. * * Return 1 if the effective constraint value has changed, 0 if the effective * constraint value has not changed, or a negative error code on failures. */ int freq_qos_remove_request(struct freq_qos_request *req) { int ret; if (!req) return -EINVAL; if (WARN(!freq_qos_request_active(req), "%s() called for unknown object\n", __func__)) return -EINVAL; ret = freq_qos_apply(req, PM_QOS_REMOVE_REQ, PM_QOS_DEFAULT_VALUE); req->qos = NULL; req->type = 0; return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_request); /** * freq_qos_add_notifier - Add frequency QoS change notifier. * @qos: List of requests to add the notifier to. * @type: Request type. * @notifier: Notifier block to add. */ int freq_qos_add_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier) { int ret; if (IS_ERR_OR_NULL(qos) || !notifier) return -EINVAL; switch (type) { case FREQ_QOS_MIN: ret = blocking_notifier_chain_register(qos->min_freq.notifiers, notifier); break; case FREQ_QOS_MAX: ret = blocking_notifier_chain_register(qos->max_freq.notifiers, notifier); break; default: WARN_ON(1); ret = -EINVAL; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_add_notifier); /** * freq_qos_remove_notifier - Remove frequency QoS change notifier. * @qos: List of requests to remove the notifier from. * @type: Request type. * @notifier: Notifier block to remove. */ int freq_qos_remove_notifier(struct freq_constraints *qos, enum freq_qos_req_type type, struct notifier_block *notifier) { int ret; if (IS_ERR_OR_NULL(qos) || !notifier) return -EINVAL; switch (type) { case FREQ_QOS_MIN: ret = blocking_notifier_chain_unregister(qos->min_freq.notifiers, notifier); break; case FREQ_QOS_MAX: ret = blocking_notifier_chain_unregister(qos->max_freq.notifiers, notifier); break; default: WARN_ON(1); ret = -EINVAL; } return ret; } EXPORT_SYMBOL_GPL(freq_qos_remove_notifier);
36 12 24 20 9 10 40 18 22 1 19 17 17 5 2 2 2 4 2 2 4 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ #include <linux/bitmap.h> #include <linux/bpf.h> #include <linux/btf.h> #include <linux/err.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/btf_ids.h> #define BLOOM_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) struct bpf_bloom_filter { struct bpf_map map; u32 bitset_mask; u32 hash_seed; u32 nr_hash_funcs; unsigned long bitset[]; }; static u32 hash(struct bpf_bloom_filter *bloom, void *value, u32 value_size, u32 index) { u32 h; if (likely(value_size % 4 == 0)) h = jhash2(value, value_size / 4, bloom->hash_seed + index); else h = jhash(value, value_size, bloom->hash_seed + index); return h & bloom->bitset_mask; } static long bloom_map_peek_elem(struct bpf_map *map, void *value) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); u32 i, h; for (i = 0; i < bloom->nr_hash_funcs; i++) { h = hash(bloom, value, map->value_size, i); if (!test_bit(h, bloom->bitset)) return -ENOENT; } return 0; } static long bloom_map_push_elem(struct bpf_map *map, void *value, u64 flags) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); u32 i, h; if (flags != BPF_ANY) return -EINVAL; for (i = 0; i < bloom->nr_hash_funcs; i++) { h = hash(bloom, value, map->value_size, i); set_bit(h, bloom->bitset); } return 0; } static long bloom_map_pop_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } static long bloom_map_delete_elem(struct bpf_map *map, void *value) { return -EOPNOTSUPP; } static int bloom_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { return -EOPNOTSUPP; } /* Called from syscall */ static int bloom_map_alloc_check(union bpf_attr *attr) { if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ return -E2BIG; return 0; } static struct bpf_map *bloom_map_alloc(union bpf_attr *attr) { u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; int numa_node = bpf_map_attr_numa_node(attr); struct bpf_bloom_filter *bloom; if (attr->key_size != 0 || attr->value_size == 0 || attr->max_entries == 0 || attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || !bpf_map_flags_access_ok(attr->map_flags) || /* The lower 4 bits of map_extra (0xF) specify the number * of hash functions */ (attr->map_extra & ~0xF)) return ERR_PTR(-EINVAL); nr_hash_funcs = attr->map_extra; if (nr_hash_funcs == 0) /* Default to using 5 hash functions if unspecified */ nr_hash_funcs = 5; /* For the bloom filter, the optimal bit array size that minimizes the * false positive probability is n * k / ln(2) where n is the number of * expected entries in the bloom filter and k is the number of hash * functions. We use 7 / 5 to approximate 1 / ln(2). * * We round this up to the nearest power of two to enable more efficient * hashing using bitmasks. The bitmask will be the bit array size - 1. * * If this overflows a u32, the bit array size will have 2^32 (4 * GB) bits. */ if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || nr_bits > (1UL << 31)) { /* The bit array size is 2^32 bits but to avoid overflowing the * u32, we use U32_MAX, which will round up to the equivalent * number of bytes */ bitset_bytes = BITS_TO_BYTES(U32_MAX); bitset_mask = U32_MAX; } else { if (nr_bits <= BITS_PER_LONG) nr_bits = BITS_PER_LONG; else nr_bits = roundup_pow_of_two(nr_bits); bitset_bytes = BITS_TO_BYTES(nr_bits); bitset_mask = nr_bits - 1; } bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); if (!bloom) return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&bloom->map, attr); bloom->nr_hash_funcs = nr_hash_funcs; bloom->bitset_mask = bitset_mask; if (!(attr->map_flags & BPF_F_ZERO_SEED)) bloom->hash_seed = get_random_u32(); return &bloom->map; } static void bloom_map_free(struct bpf_map *map) { struct bpf_bloom_filter *bloom = container_of(map, struct bpf_bloom_filter, map); bpf_map_area_free(bloom); } static void *bloom_map_lookup_elem(struct bpf_map *map, void *key) { /* The eBPF program should use map_peek_elem instead */ return ERR_PTR(-EINVAL); } static long bloom_map_update_elem(struct bpf_map *map, void *key, void *value, u64 flags) { /* The eBPF program should use map_push_elem instead */ return -EINVAL; } static int bloom_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type) { /* Bloom filter maps are keyless */ return btf_type_is_void(key_type) ? 0 : -EINVAL; } static u64 bloom_map_mem_usage(const struct bpf_map *map) { struct bpf_bloom_filter *bloom; u64 bitset_bytes; bloom = container_of(map, struct bpf_bloom_filter, map); bitset_bytes = BITS_TO_BYTES((u64)bloom->bitset_mask + 1); bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); return sizeof(*bloom) + bitset_bytes; } BTF_ID_LIST_SINGLE(bpf_bloom_map_btf_ids, struct, bpf_bloom_filter) const struct bpf_map_ops bloom_filter_map_ops = { .map_meta_equal = bpf_map_meta_equal, .map_alloc_check = bloom_map_alloc_check, .map_alloc = bloom_map_alloc, .map_free = bloom_map_free, .map_get_next_key = bloom_map_get_next_key, .map_push_elem = bloom_map_push_elem, .map_peek_elem = bloom_map_peek_elem, .map_pop_elem = bloom_map_pop_elem, .map_lookup_elem = bloom_map_lookup_elem, .map_update_elem = bloom_map_update_elem, .map_delete_elem = bloom_map_delete_elem, .map_check_btf = bloom_map_check_btf, .map_mem_usage = bloom_map_mem_usage, .map_btf_id = &bpf_bloom_map_btf_ids[0], };
29 4 1 24 142 3 1 2 135 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 // SPDX-License-Identifier: GPL-2.0-or-later /* * Squashfs - a compressed read only filesystem for Linux * * Copyright (c) 2010 * Phillip Lougher <phillip@squashfs.org.uk> * * xattr_id.c */ /* * This file implements code to map the 32-bit xattr id stored in the inode * into the on disk location of the xattr data. */ #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" #include "squashfs.h" #include "xattr.h" /* * Map xattr id using the xattr id look up table */ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, int *count, unsigned int *size, unsigned long long *xattr) { struct squashfs_sb_info *msblk = sb->s_fs_info; int block = SQUASHFS_XATTR_BLOCK(index); int offset = SQUASHFS_XATTR_BLOCK_OFFSET(index); u64 start_block; struct squashfs_xattr_id id; int err; if (index >= msblk->xattr_ids) return -EINVAL; start_block = le64_to_cpu(msblk->xattr_id_table[block]); err = squashfs_read_metadata(sb, &id, &start_block, &offset, sizeof(id)); if (err < 0) return err; *xattr = le64_to_cpu(id.xattr); *size = le32_to_cpu(id.size); *count = le32_to_cpu(id.count); return 0; } /* * Read uncompressed xattr id lookup table indexes from disk into memory */ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_sb_info *msblk = sb->s_fs_info; unsigned int len, indexes; struct squashfs_xattr_id_table *id_table; __le64 *table; u64 start, end; int n; id_table = squashfs_read_table(sb, table_start, sizeof(*id_table)); if (IS_ERR(id_table)) return (__le64 *) id_table; *xattr_table_start = le64_to_cpu(id_table->xattr_table_start); *xattr_ids = le32_to_cpu(id_table->xattr_ids); kfree(id_table); /* Sanity check values */ /* there is always at least one xattr id */ if (*xattr_ids == 0) return ERR_PTR(-EINVAL); len = SQUASHFS_XATTR_BLOCK_BYTES(*xattr_ids); indexes = SQUASHFS_XATTR_BLOCKS(*xattr_ids); /* * The computed size of the index table (len bytes) should exactly * match the table start and end points */ start = table_start + sizeof(*id_table); end = msblk->bytes_used; if (len != (end - start)) return ERR_PTR(-EINVAL); table = squashfs_read_table(sb, start, len); if (IS_ERR(table)) return table; /* table[0], table[1], ... table[indexes - 1] store the locations * of the compressed xattr id blocks. Each entry should be less than * the next (i.e. table[0] < table[1]), and the difference between them * should be SQUASHFS_METADATA_SIZE or less. table[indexes - 1] * should be less than table_start, and again the difference * shouls be SQUASHFS_METADATA_SIZE or less. * * Finally xattr_table_start should be less than table[0]. */ for (n = 0; n < (indexes - 1); n++) { start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); if (start >= end || (end - start) > (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); if (start >= table_start || (table_start - start) > (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } if (*xattr_table_start >= le64_to_cpu(table[0])) { kfree(table); return ERR_PTR(-EINVAL); } return table; }
17 17 3 1 2 6 3 3 3 3 3 2 9 3 16 165 165 48 37 5 9 1 1 9 4 11 1 11 9 1 3 9 9 18 9 9 9 18 1 1 1 1 18 2 20 10 10 20 5 1 1 1 2 5 1 1 4 1 1 1 1 18 1 1 1 15 2 4 2 8 9 36 25 25 25 11 11 6 5 1 1 1 8 1 2 5 2 1 2 2 3 3 3 3 6 1 5 9 1 2 1 3 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 // SPDX-License-Identifier: GPL-2.0 /* * Filesystem-level keyring for fscrypt * * Copyright 2019 Google LLC */ /* * This file implements management of fscrypt master keys in the * filesystem-level keyring, including the ioctls: * * - FS_IOC_ADD_ENCRYPTION_KEY * - FS_IOC_REMOVE_ENCRYPTION_KEY * - FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS * - FS_IOC_GET_ENCRYPTION_KEY_STATUS * * See the "User API" section of Documentation/filesystems/fscrypt.rst for more * information about these ioctls. */ #include <linux/unaligned.h> #include <crypto/skcipher.h> #include <linux/key-type.h> #include <linux/random.h> #include <linux/once.h> #include <linux/seq_file.h> #include "fscrypt_private.h" /* The master encryption keys for a filesystem (->s_master_keys) */ struct fscrypt_keyring { /* * Lock that protects ->key_hashtable. It does *not* protect the * fscrypt_master_key structs themselves. */ spinlock_t lock; /* Hash table that maps fscrypt_key_specifier to fscrypt_master_key */ struct hlist_head key_hashtable[128]; }; static void wipe_master_key_secret(struct fscrypt_master_key_secret *secret) { fscrypt_destroy_hkdf(&secret->hkdf); memzero_explicit(secret, sizeof(*secret)); } static void move_master_key_secret(struct fscrypt_master_key_secret *dst, struct fscrypt_master_key_secret *src) { memcpy(dst, src, sizeof(*dst)); memzero_explicit(src, sizeof(*src)); } static void fscrypt_free_master_key(struct rcu_head *head) { struct fscrypt_master_key *mk = container_of(head, struct fscrypt_master_key, mk_rcu_head); /* * The master key secret and any embedded subkeys should have already * been wiped when the last active reference to the fscrypt_master_key * struct was dropped; doing it here would be unnecessarily late. * Nevertheless, use kfree_sensitive() in case anything was missed. */ kfree_sensitive(mk); } void fscrypt_put_master_key(struct fscrypt_master_key *mk) { if (!refcount_dec_and_test(&mk->mk_struct_refs)) return; /* * No structural references left, so free ->mk_users, and also free the * fscrypt_master_key struct itself after an RCU grace period ensures * that concurrent keyring lookups can no longer find it. */ WARN_ON_ONCE(refcount_read(&mk->mk_active_refs) != 0); if (mk->mk_users) { /* Clear the keyring so the quota gets released right away. */ keyring_clear(mk->mk_users); key_put(mk->mk_users); mk->mk_users = NULL; } call_rcu(&mk->mk_rcu_head, fscrypt_free_master_key); } void fscrypt_put_master_key_activeref(struct super_block *sb, struct fscrypt_master_key *mk) { size_t i; if (!refcount_dec_and_test(&mk->mk_active_refs)) return; /* * No active references left, so complete the full removal of this * fscrypt_master_key struct by removing it from the keyring and * destroying any subkeys embedded in it. */ if (WARN_ON_ONCE(!sb->s_master_keys)) return; spin_lock(&sb->s_master_keys->lock); hlist_del_rcu(&mk->mk_node); spin_unlock(&sb->s_master_keys->lock); /* * ->mk_active_refs == 0 implies that ->mk_present is false and * ->mk_decrypted_inodes is empty. */ WARN_ON_ONCE(mk->mk_present); WARN_ON_ONCE(!list_empty(&mk->mk_decrypted_inodes)); for (i = 0; i <= FSCRYPT_MODE_MAX; i++) { fscrypt_destroy_prepared_key( sb, &mk->mk_direct_keys[i]); fscrypt_destroy_prepared_key( sb, &mk->mk_iv_ino_lblk_64_keys[i]); fscrypt_destroy_prepared_key( sb, &mk->mk_iv_ino_lblk_32_keys[i]); } memzero_explicit(&mk->mk_ino_hash_key, sizeof(mk->mk_ino_hash_key)); mk->mk_ino_hash_key_initialized = false; /* Drop the structural ref associated with the active refs. */ fscrypt_put_master_key(mk); } /* * This transitions the key state from present to incompletely removed, and then * potentially to absent (depending on whether inodes remain). */ static void fscrypt_initiate_key_removal(struct super_block *sb, struct fscrypt_master_key *mk) { WRITE_ONCE(mk->mk_present, false); wipe_master_key_secret(&mk->mk_secret); fscrypt_put_master_key_activeref(sb, mk); } static inline bool valid_key_spec(const struct fscrypt_key_specifier *spec) { if (spec->__reserved) return false; return master_key_spec_len(spec) != 0; } static int fscrypt_user_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { /* * We just charge FSCRYPT_MAX_KEY_SIZE bytes to the user's key quota for * each key, regardless of the exact key size. The amount of memory * actually used is greater than the size of the raw key anyway. */ return key_payload_reserve(key, FSCRYPT_MAX_KEY_SIZE); } static void fscrypt_user_key_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); } /* * Type of key in ->mk_users. Each key of this type represents a particular * user who has added a particular master key. * * Note that the name of this key type really should be something like * ".fscrypt-user" instead of simply ".fscrypt". But the shorter name is chosen * mainly for simplicity of presentation in /proc/keys when read by a non-root * user. And it is expected to be rare that a key is actually added by multiple * users, since users should keep their encryption keys confidential. */ static struct key_type key_type_fscrypt_user = { .name = ".fscrypt", .instantiate = fscrypt_user_key_instantiate, .describe = fscrypt_user_key_describe, }; #define FSCRYPT_MK_USERS_DESCRIPTION_SIZE \ (CONST_STRLEN("fscrypt-") + 2 * FSCRYPT_KEY_IDENTIFIER_SIZE + \ CONST_STRLEN("-users") + 1) #define FSCRYPT_MK_USER_DESCRIPTION_SIZE \ (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + CONST_STRLEN(".uid.") + 10 + 1) static void format_mk_users_keyring_description( char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE], const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { sprintf(description, "fscrypt-%*phN-users", FSCRYPT_KEY_IDENTIFIER_SIZE, mk_identifier); } static void format_mk_user_description( char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE], const u8 mk_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { sprintf(description, "%*phN.uid.%u", FSCRYPT_KEY_IDENTIFIER_SIZE, mk_identifier, __kuid_val(current_fsuid())); } /* Create ->s_master_keys if needed. Synchronized by fscrypt_add_key_mutex. */ static int allocate_filesystem_keyring(struct super_block *sb) { struct fscrypt_keyring *keyring; if (sb->s_master_keys) return 0; keyring = kzalloc(sizeof(*keyring), GFP_KERNEL); if (!keyring) return -ENOMEM; spin_lock_init(&keyring->lock); /* * Pairs with the smp_load_acquire() in fscrypt_find_master_key(). * I.e., here we publish ->s_master_keys with a RELEASE barrier so that * concurrent tasks can ACQUIRE it. */ smp_store_release(&sb->s_master_keys, keyring); return 0; } /* * Release all encryption keys that have been added to the filesystem, along * with the keyring that contains them. * * This is called at unmount time, after all potentially-encrypted inodes have * been evicted. The filesystem's underlying block device(s) are still * available at this time; this is important because after user file accesses * have been allowed, this function may need to evict keys from the keyslots of * an inline crypto engine, which requires the block device(s). */ void fscrypt_destroy_keyring(struct super_block *sb) { struct fscrypt_keyring *keyring = sb->s_master_keys; size_t i; if (!keyring) return; for (i = 0; i < ARRAY_SIZE(keyring->key_hashtable); i++) { struct hlist_head *bucket = &keyring->key_hashtable[i]; struct fscrypt_master_key *mk; struct hlist_node *tmp; hlist_for_each_entry_safe(mk, tmp, bucket, mk_node) { /* * Since all potentially-encrypted inodes were already * evicted, every key remaining in the keyring should * have an empty inode list, and should only still be in * the keyring due to the single active ref associated * with ->mk_present. There should be no structural * refs beyond the one associated with the active ref. */ WARN_ON_ONCE(refcount_read(&mk->mk_active_refs) != 1); WARN_ON_ONCE(refcount_read(&mk->mk_struct_refs) != 1); WARN_ON_ONCE(!mk->mk_present); fscrypt_initiate_key_removal(sb, mk); } } kfree_sensitive(keyring); sb->s_master_keys = NULL; } static struct hlist_head * fscrypt_mk_hash_bucket(struct fscrypt_keyring *keyring, const struct fscrypt_key_specifier *mk_spec) { /* * Since key specifiers should be "random" values, it is sufficient to * use a trivial hash function that just takes the first several bits of * the key specifier. */ unsigned long i = get_unaligned((unsigned long *)&mk_spec->u); return &keyring->key_hashtable[i % ARRAY_SIZE(keyring->key_hashtable)]; } /* * Find the specified master key struct in ->s_master_keys and take a structural * ref to it. The structural ref guarantees that the key struct continues to * exist, but it does *not* guarantee that ->s_master_keys continues to contain * the key struct. The structural ref needs to be dropped by * fscrypt_put_master_key(). Returns NULL if the key struct is not found. */ struct fscrypt_master_key * fscrypt_find_master_key(struct super_block *sb, const struct fscrypt_key_specifier *mk_spec) { struct fscrypt_keyring *keyring; struct hlist_head *bucket; struct fscrypt_master_key *mk; /* * Pairs with the smp_store_release() in allocate_filesystem_keyring(). * I.e., another task can publish ->s_master_keys concurrently, * executing a RELEASE barrier. We need to use smp_load_acquire() here * to safely ACQUIRE the memory the other task published. */ keyring = smp_load_acquire(&sb->s_master_keys); if (keyring == NULL) return NULL; /* No keyring yet, so no keys yet. */ bucket = fscrypt_mk_hash_bucket(keyring, mk_spec); rcu_read_lock(); switch (mk_spec->type) { case FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR: hlist_for_each_entry_rcu(mk, bucket, mk_node) { if (mk->mk_spec.type == FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && memcmp(mk->mk_spec.u.descriptor, mk_spec->u.descriptor, FSCRYPT_KEY_DESCRIPTOR_SIZE) == 0 && refcount_inc_not_zero(&mk->mk_struct_refs)) goto out; } break; case FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER: hlist_for_each_entry_rcu(mk, bucket, mk_node) { if (mk->mk_spec.type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER && memcmp(mk->mk_spec.u.identifier, mk_spec->u.identifier, FSCRYPT_KEY_IDENTIFIER_SIZE) == 0 && refcount_inc_not_zero(&mk->mk_struct_refs)) goto out; } break; } mk = NULL; out: rcu_read_unlock(); return mk; } static int allocate_master_key_users_keyring(struct fscrypt_master_key *mk) { char description[FSCRYPT_MK_USERS_DESCRIPTION_SIZE]; struct key *keyring; format_mk_users_keyring_description(description, mk->mk_spec.u.identifier); keyring = keyring_alloc(description, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(), KEY_POS_SEARCH | KEY_USR_SEARCH | KEY_USR_READ | KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); mk->mk_users = keyring; return 0; } /* * Find the current user's "key" in the master key's ->mk_users. * Returns ERR_PTR(-ENOKEY) if not found. */ static struct key *find_master_key_user(struct fscrypt_master_key *mk) { char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE]; key_ref_t keyref; format_mk_user_description(description, mk->mk_spec.u.identifier); /* * We need to mark the keyring reference as "possessed" so that we * acquire permission to search it, via the KEY_POS_SEARCH permission. */ keyref = keyring_search(make_key_ref(mk->mk_users, true /*possessed*/), &key_type_fscrypt_user, description, false); if (IS_ERR(keyref)) { if (PTR_ERR(keyref) == -EAGAIN || /* not found */ PTR_ERR(keyref) == -EKEYREVOKED) /* recently invalidated */ keyref = ERR_PTR(-ENOKEY); return ERR_CAST(keyref); } return key_ref_to_ptr(keyref); } /* * Give the current user a "key" in ->mk_users. This charges the user's quota * and marks the master key as added by the current user, so that it cannot be * removed by another user with the key. Either ->mk_sem must be held for * write, or the master key must be still undergoing initialization. */ static int add_master_key_user(struct fscrypt_master_key *mk) { char description[FSCRYPT_MK_USER_DESCRIPTION_SIZE]; struct key *mk_user; int err; format_mk_user_description(description, mk->mk_spec.u.identifier); mk_user = key_alloc(&key_type_fscrypt_user, description, current_fsuid(), current_gid(), current_cred(), KEY_POS_SEARCH | KEY_USR_VIEW, 0, NULL); if (IS_ERR(mk_user)) return PTR_ERR(mk_user); err = key_instantiate_and_link(mk_user, NULL, 0, mk->mk_users, NULL); key_put(mk_user); return err; } /* * Remove the current user's "key" from ->mk_users. * ->mk_sem must be held for write. * * Returns 0 if removed, -ENOKEY if not found, or another -errno code. */ static int remove_master_key_user(struct fscrypt_master_key *mk) { struct key *mk_user; int err; mk_user = find_master_key_user(mk); if (IS_ERR(mk_user)) return PTR_ERR(mk_user); err = key_unlink(mk->mk_users, mk_user); key_put(mk_user); return err; } /* * Allocate a new fscrypt_master_key, transfer the given secret over to it, and * insert it into sb->s_master_keys. */ static int add_new_master_key(struct super_block *sb, struct fscrypt_master_key_secret *secret, const struct fscrypt_key_specifier *mk_spec) { struct fscrypt_keyring *keyring = sb->s_master_keys; struct fscrypt_master_key *mk; int err; mk = kzalloc(sizeof(*mk), GFP_KERNEL); if (!mk) return -ENOMEM; init_rwsem(&mk->mk_sem); refcount_set(&mk->mk_struct_refs, 1); mk->mk_spec = *mk_spec; INIT_LIST_HEAD(&mk->mk_decrypted_inodes); spin_lock_init(&mk->mk_decrypted_inodes_lock); if (mk_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) { err = allocate_master_key_users_keyring(mk); if (err) goto out_put; err = add_master_key_user(mk); if (err) goto out_put; } move_master_key_secret(&mk->mk_secret, secret); mk->mk_present = true; refcount_set(&mk->mk_active_refs, 1); /* ->mk_present is true */ spin_lock(&keyring->lock); hlist_add_head_rcu(&mk->mk_node, fscrypt_mk_hash_bucket(keyring, mk_spec)); spin_unlock(&keyring->lock); return 0; out_put: fscrypt_put_master_key(mk); return err; } #define KEY_DEAD 1 static int add_existing_master_key(struct fscrypt_master_key *mk, struct fscrypt_master_key_secret *secret) { int err; /* * If the current user is already in ->mk_users, then there's nothing to * do. Otherwise, we need to add the user to ->mk_users. (Neither is * applicable for v1 policy keys, which have NULL ->mk_users.) */ if (mk->mk_users) { struct key *mk_user = find_master_key_user(mk); if (mk_user != ERR_PTR(-ENOKEY)) { if (IS_ERR(mk_user)) return PTR_ERR(mk_user); key_put(mk_user); return 0; } err = add_master_key_user(mk); if (err) return err; } /* If the key is incompletely removed, make it present again. */ if (!mk->mk_present) { if (!refcount_inc_not_zero(&mk->mk_active_refs)) { /* * Raced with the last active ref being dropped, so the * key has become, or is about to become, "absent". * Therefore, we need to allocate a new key struct. */ return KEY_DEAD; } move_master_key_secret(&mk->mk_secret, secret); WRITE_ONCE(mk->mk_present, true); } return 0; } static int do_add_master_key(struct super_block *sb, struct fscrypt_master_key_secret *secret, const struct fscrypt_key_specifier *mk_spec) { static DEFINE_MUTEX(fscrypt_add_key_mutex); struct fscrypt_master_key *mk; int err; mutex_lock(&fscrypt_add_key_mutex); /* serialize find + link */ mk = fscrypt_find_master_key(sb, mk_spec); if (!mk) { /* Didn't find the key in ->s_master_keys. Add it. */ err = allocate_filesystem_keyring(sb); if (!err) err = add_new_master_key(sb, secret, mk_spec); } else { /* * Found the key in ->s_master_keys. Add the user to ->mk_users * if needed, and make the key "present" again if possible. */ down_write(&mk->mk_sem); err = add_existing_master_key(mk, secret); up_write(&mk->mk_sem); if (err == KEY_DEAD) { /* * We found a key struct, but it's already been fully * removed. Ignore the old struct and add a new one. * fscrypt_add_key_mutex means we don't need to worry * about concurrent adds. */ err = add_new_master_key(sb, secret, mk_spec); } fscrypt_put_master_key(mk); } mutex_unlock(&fscrypt_add_key_mutex); return err; } static int add_master_key(struct super_block *sb, struct fscrypt_master_key_secret *secret, struct fscrypt_key_specifier *key_spec) { int err; if (key_spec->type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) { err = fscrypt_init_hkdf(&secret->hkdf, secret->raw, secret->size); if (err) return err; /* * Now that the HKDF context is initialized, the raw key is no * longer needed. */ memzero_explicit(secret->raw, secret->size); /* Calculate the key identifier */ err = fscrypt_hkdf_expand(&secret->hkdf, HKDF_CONTEXT_KEY_IDENTIFIER, NULL, 0, key_spec->u.identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); if (err) return err; } return do_add_master_key(sb, secret, key_spec); } static int fscrypt_provisioning_key_preparse(struct key_preparsed_payload *prep) { const struct fscrypt_provisioning_key_payload *payload = prep->data; if (prep->datalen < sizeof(*payload) + FSCRYPT_MIN_KEY_SIZE || prep->datalen > sizeof(*payload) + FSCRYPT_MAX_KEY_SIZE) return -EINVAL; if (payload->type != FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && payload->type != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER) return -EINVAL; if (payload->__reserved) return -EINVAL; prep->payload.data[0] = kmemdup(payload, prep->datalen, GFP_KERNEL); if (!prep->payload.data[0]) return -ENOMEM; prep->quotalen = prep->datalen; return 0; } static void fscrypt_provisioning_key_free_preparse( struct key_preparsed_payload *prep) { kfree_sensitive(prep->payload.data[0]); } static void fscrypt_provisioning_key_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); if (key_is_positive(key)) { const struct fscrypt_provisioning_key_payload *payload = key->payload.data[0]; seq_printf(m, ": %u [%u]", key->datalen, payload->type); } } static void fscrypt_provisioning_key_destroy(struct key *key) { kfree_sensitive(key->payload.data[0]); } static struct key_type key_type_fscrypt_provisioning = { .name = "fscrypt-provisioning", .preparse = fscrypt_provisioning_key_preparse, .free_preparse = fscrypt_provisioning_key_free_preparse, .instantiate = generic_key_instantiate, .describe = fscrypt_provisioning_key_describe, .destroy = fscrypt_provisioning_key_destroy, }; /* * Retrieve the raw key from the Linux keyring key specified by 'key_id', and * store it into 'secret'. * * The key must be of type "fscrypt-provisioning" and must have the field * fscrypt_provisioning_key_payload::type set to 'type', indicating that it's * only usable with fscrypt with the particular KDF version identified by * 'type'. We don't use the "logon" key type because there's no way to * completely restrict the use of such keys; they can be used by any kernel API * that accepts "logon" keys and doesn't require a specific service prefix. * * The ability to specify the key via Linux keyring key is intended for cases * where userspace needs to re-add keys after the filesystem is unmounted and * re-mounted. Most users should just provide the raw key directly instead. */ static int get_keyring_key(u32 key_id, u32 type, struct fscrypt_master_key_secret *secret) { key_ref_t ref; struct key *key; const struct fscrypt_provisioning_key_payload *payload; int err; ref = lookup_user_key(key_id, 0, KEY_NEED_SEARCH); if (IS_ERR(ref)) return PTR_ERR(ref); key = key_ref_to_ptr(ref); if (key->type != &key_type_fscrypt_provisioning) goto bad_key; payload = key->payload.data[0]; /* Don't allow fscrypt v1 keys to be used as v2 keys and vice versa. */ if (payload->type != type) goto bad_key; secret->size = key->datalen - sizeof(*payload); memcpy(secret->raw, payload->raw, secret->size); err = 0; goto out_put; bad_key: err = -EKEYREJECTED; out_put: key_ref_put(ref); return err; } /* * Add a master encryption key to the filesystem, causing all files which were * encrypted with it to appear "unlocked" (decrypted) when accessed. * * When adding a key for use by v1 encryption policies, this ioctl is * privileged, and userspace must provide the 'key_descriptor'. * * When adding a key for use by v2+ encryption policies, this ioctl is * unprivileged. This is needed, in general, to allow non-root users to use * encryption without encountering the visibility problems of process-subscribed * keyrings and the inability to properly remove keys. This works by having * each key identified by its cryptographically secure hash --- the * 'key_identifier'. The cryptographic hash ensures that a malicious user * cannot add the wrong key for a given identifier. Furthermore, each added key * is charged to the appropriate user's quota for the keyrings service, which * prevents a malicious user from adding too many keys. Finally, we forbid a * user from removing a key while other users have added it too, which prevents * a user who knows another user's key from causing a denial-of-service by * removing it at an inopportune time. (We tolerate that a user who knows a key * can prevent other users from removing it.) * * For more details, see the "FS_IOC_ADD_ENCRYPTION_KEY" section of * Documentation/filesystems/fscrypt.rst. */ int fscrypt_ioctl_add_key(struct file *filp, void __user *_uarg) { struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_add_key_arg __user *uarg = _uarg; struct fscrypt_add_key_arg arg; struct fscrypt_master_key_secret secret; int err; if (copy_from_user(&arg, uarg, sizeof(arg))) return -EFAULT; if (!valid_key_spec(&arg.key_spec)) return -EINVAL; if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; /* * Only root can add keys that are identified by an arbitrary descriptor * rather than by a cryptographic hash --- since otherwise a malicious * user could add the wrong key. */ if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && !capable(CAP_SYS_ADMIN)) return -EACCES; memset(&secret, 0, sizeof(secret)); if (arg.key_id) { if (arg.raw_size != 0) return -EINVAL; err = get_keyring_key(arg.key_id, arg.key_spec.type, &secret); if (err) goto out_wipe_secret; } else { if (arg.raw_size < FSCRYPT_MIN_KEY_SIZE || arg.raw_size > FSCRYPT_MAX_KEY_SIZE) return -EINVAL; secret.size = arg.raw_size; err = -EFAULT; if (copy_from_user(secret.raw, uarg->raw, secret.size)) goto out_wipe_secret; } err = add_master_key(sb, &secret, &arg.key_spec); if (err) goto out_wipe_secret; /* Return the key identifier to userspace, if applicable */ err = -EFAULT; if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER && copy_to_user(uarg->key_spec.u.identifier, arg.key_spec.u.identifier, FSCRYPT_KEY_IDENTIFIER_SIZE)) goto out_wipe_secret; err = 0; out_wipe_secret: wipe_master_key_secret(&secret); return err; } EXPORT_SYMBOL_GPL(fscrypt_ioctl_add_key); static void fscrypt_get_test_dummy_secret(struct fscrypt_master_key_secret *secret) { static u8 test_key[FSCRYPT_MAX_KEY_SIZE]; get_random_once(test_key, FSCRYPT_MAX_KEY_SIZE); memset(secret, 0, sizeof(*secret)); secret->size = FSCRYPT_MAX_KEY_SIZE; memcpy(secret->raw, test_key, FSCRYPT_MAX_KEY_SIZE); } int fscrypt_get_test_dummy_key_identifier( u8 key_identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { struct fscrypt_master_key_secret secret; int err; fscrypt_get_test_dummy_secret(&secret); err = fscrypt_init_hkdf(&secret.hkdf, secret.raw, secret.size); if (err) goto out; err = fscrypt_hkdf_expand(&secret.hkdf, HKDF_CONTEXT_KEY_IDENTIFIER, NULL, 0, key_identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); out: wipe_master_key_secret(&secret); return err; } /** * fscrypt_add_test_dummy_key() - add the test dummy encryption key * @sb: the filesystem instance to add the key to * @key_spec: the key specifier of the test dummy encryption key * * Add the key for the test_dummy_encryption mount option to the filesystem. To * prevent misuse of this mount option, a per-boot random key is used instead of * a hardcoded one. This makes it so that any encrypted files created using * this option won't be accessible after a reboot. * * Return: 0 on success, -errno on failure */ int fscrypt_add_test_dummy_key(struct super_block *sb, struct fscrypt_key_specifier *key_spec) { struct fscrypt_master_key_secret secret; int err; fscrypt_get_test_dummy_secret(&secret); err = add_master_key(sb, &secret, key_spec); wipe_master_key_secret(&secret); return err; } /* * Verify that the current user has added a master key with the given identifier * (returns -ENOKEY if not). This is needed to prevent a user from encrypting * their files using some other user's key which they don't actually know. * Cryptographically this isn't much of a problem, but the semantics of this * would be a bit weird, so it's best to just forbid it. * * The system administrator (CAP_FOWNER) can override this, which should be * enough for any use cases where encryption policies are being set using keys * that were chosen ahead of time but aren't available at the moment. * * Note that the key may have already removed by the time this returns, but * that's okay; we just care whether the key was there at some point. * * Return: 0 if the key is added, -ENOKEY if it isn't, or another -errno code */ int fscrypt_verify_key_added(struct super_block *sb, const u8 identifier[FSCRYPT_KEY_IDENTIFIER_SIZE]) { struct fscrypt_key_specifier mk_spec; struct fscrypt_master_key *mk; struct key *mk_user; int err; mk_spec.type = FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER; memcpy(mk_spec.u.identifier, identifier, FSCRYPT_KEY_IDENTIFIER_SIZE); mk = fscrypt_find_master_key(sb, &mk_spec); if (!mk) { err = -ENOKEY; goto out; } down_read(&mk->mk_sem); mk_user = find_master_key_user(mk); if (IS_ERR(mk_user)) { err = PTR_ERR(mk_user); } else { key_put(mk_user); err = 0; } up_read(&mk->mk_sem); fscrypt_put_master_key(mk); out: if (err == -ENOKEY && capable(CAP_FOWNER)) err = 0; return err; } /* * Try to evict the inode's dentries from the dentry cache. If the inode is a * directory, then it can have at most one dentry; however, that dentry may be * pinned by child dentries, so first try to evict the children too. */ static void shrink_dcache_inode(struct inode *inode) { struct dentry *dentry; if (S_ISDIR(inode->i_mode)) { dentry = d_find_any_alias(inode); if (dentry) { shrink_dcache_parent(dentry); dput(dentry); } } d_prune_aliases(inode); } static void evict_dentries_for_decrypted_inodes(struct fscrypt_master_key *mk) { struct fscrypt_inode_info *ci; struct inode *inode; struct inode *toput_inode = NULL; spin_lock(&mk->mk_decrypted_inodes_lock); list_for_each_entry(ci, &mk->mk_decrypted_inodes, ci_master_key_link) { inode = ci->ci_inode; spin_lock(&inode->i_lock); if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) { spin_unlock(&inode->i_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); spin_unlock(&mk->mk_decrypted_inodes_lock); shrink_dcache_inode(inode); iput(toput_inode); toput_inode = inode; spin_lock(&mk->mk_decrypted_inodes_lock); } spin_unlock(&mk->mk_decrypted_inodes_lock); iput(toput_inode); } static int check_for_busy_inodes(struct super_block *sb, struct fscrypt_master_key *mk) { struct list_head *pos; size_t busy_count = 0; unsigned long ino; char ino_str[50] = ""; spin_lock(&mk->mk_decrypted_inodes_lock); list_for_each(pos, &mk->mk_decrypted_inodes) busy_count++; if (busy_count == 0) { spin_unlock(&mk->mk_decrypted_inodes_lock); return 0; } { /* select an example file to show for debugging purposes */ struct inode *inode = list_first_entry(&mk->mk_decrypted_inodes, struct fscrypt_inode_info, ci_master_key_link)->ci_inode; ino = inode->i_ino; } spin_unlock(&mk->mk_decrypted_inodes_lock); /* If the inode is currently being created, ino may still be 0. */ if (ino) snprintf(ino_str, sizeof(ino_str), ", including ino %lu", ino); fscrypt_warn(NULL, "%s: %zu inode(s) still busy after removing key with %s %*phN%s", sb->s_id, busy_count, master_key_spec_type(&mk->mk_spec), master_key_spec_len(&mk->mk_spec), (u8 *)&mk->mk_spec.u, ino_str); return -EBUSY; } static int try_to_lock_encrypted_files(struct super_block *sb, struct fscrypt_master_key *mk) { int err1; int err2; /* * An inode can't be evicted while it is dirty or has dirty pages. * Thus, we first have to clean the inodes in ->mk_decrypted_inodes. * * Just do it the easy way: call sync_filesystem(). It's overkill, but * it works, and it's more important to minimize the amount of caches we * drop than the amount of data we sync. Also, unprivileged users can * already call sync_filesystem() via sys_syncfs() or sys_sync(). */ down_read(&sb->s_umount); err1 = sync_filesystem(sb); up_read(&sb->s_umount); /* If a sync error occurs, still try to evict as much as possible. */ /* * Inodes are pinned by their dentries, so we have to evict their * dentries. shrink_dcache_sb() would suffice, but would be overkill * and inappropriate for use by unprivileged users. So instead go * through the inodes' alias lists and try to evict each dentry. */ evict_dentries_for_decrypted_inodes(mk); /* * evict_dentries_for_decrypted_inodes() already iput() each inode in * the list; any inodes for which that dropped the last reference will * have been evicted due to fscrypt_drop_inode() detecting the key * removal and telling the VFS to evict the inode. So to finish, we * just need to check whether any inodes couldn't be evicted. */ err2 = check_for_busy_inodes(sb, mk); return err1 ?: err2; } /* * Try to remove an fscrypt master encryption key. * * FS_IOC_REMOVE_ENCRYPTION_KEY (all_users=false) removes the current user's * claim to the key, then removes the key itself if no other users have claims. * FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS (all_users=true) always removes the * key itself. * * To "remove the key itself", first we transition the key to the "incompletely * removed" state, so that no more inodes can be unlocked with it. Then we try * to evict all cached inodes that had been unlocked with the key. * * If all inodes were evicted, then we unlink the fscrypt_master_key from the * keyring. Otherwise it remains in the keyring in the "incompletely removed" * state where it tracks the list of remaining inodes. Userspace can execute * the ioctl again later to retry eviction, or alternatively can re-add the key. * * For more details, see the "Removing keys" section of * Documentation/filesystems/fscrypt.rst. */ static int do_remove_key(struct file *filp, void __user *_uarg, bool all_users) { struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_remove_key_arg __user *uarg = _uarg; struct fscrypt_remove_key_arg arg; struct fscrypt_master_key *mk; u32 status_flags = 0; int err; bool inodes_remain; if (copy_from_user(&arg, uarg, sizeof(arg))) return -EFAULT; if (!valid_key_spec(&arg.key_spec)) return -EINVAL; if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; /* * Only root can add and remove keys that are identified by an arbitrary * descriptor rather than by a cryptographic hash. */ if (arg.key_spec.type == FSCRYPT_KEY_SPEC_TYPE_DESCRIPTOR && !capable(CAP_SYS_ADMIN)) return -EACCES; /* Find the key being removed. */ mk = fscrypt_find_master_key(sb, &arg.key_spec); if (!mk) return -ENOKEY; down_write(&mk->mk_sem); /* If relevant, remove current user's (or all users) claim to the key */ if (mk->mk_users && mk->mk_users->keys.nr_leaves_on_tree != 0) { if (all_users) err = keyring_clear(mk->mk_users); else err = remove_master_key_user(mk); if (err) { up_write(&mk->mk_sem); goto out_put_key; } if (mk->mk_users->keys.nr_leaves_on_tree != 0) { /* * Other users have still added the key too. We removed * the current user's claim to the key, but we still * can't remove the key itself. */ status_flags |= FSCRYPT_KEY_REMOVAL_STATUS_FLAG_OTHER_USERS; err = 0; up_write(&mk->mk_sem); goto out_put_key; } } /* No user claims remaining. Initiate removal of the key. */ err = -ENOKEY; if (mk->mk_present) { fscrypt_initiate_key_removal(sb, mk); err = 0; } inodes_remain = refcount_read(&mk->mk_active_refs) > 0; up_write(&mk->mk_sem); if (inodes_remain) { /* Some inodes still reference this key; try to evict them. */ err = try_to_lock_encrypted_files(sb, mk); if (err == -EBUSY) { status_flags |= FSCRYPT_KEY_REMOVAL_STATUS_FLAG_FILES_BUSY; err = 0; } } /* * We return 0 if we successfully did something: removed a claim to the * key, initiated removal of the key, or tried locking the files again. * Users need to check the informational status flags if they care * whether the key has been fully removed including all files locked. */ out_put_key: fscrypt_put_master_key(mk); if (err == 0) err = put_user(status_flags, &uarg->removal_status_flags); return err; } int fscrypt_ioctl_remove_key(struct file *filp, void __user *uarg) { return do_remove_key(filp, uarg, false); } EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key); int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *uarg) { if (!capable(CAP_SYS_ADMIN)) return -EACCES; return do_remove_key(filp, uarg, true); } EXPORT_SYMBOL_GPL(fscrypt_ioctl_remove_key_all_users); /* * Retrieve the status of an fscrypt master encryption key. * * We set ->status to indicate whether the key is absent, present, or * incompletely removed. (For an explanation of what these statuses mean and * how they are represented internally, see struct fscrypt_master_key.) This * field allows applications to easily determine the status of an encrypted * directory without using a hack such as trying to open a regular file in it * (which can confuse the "incompletely removed" status with absent or present). * * In addition, for v2 policy keys we allow applications to determine, via * ->status_flags and ->user_count, whether the key has been added by the * current user, by other users, or by both. Most applications should not need * this, since ordinarily only one user should know a given key. However, if a * secret key is shared by multiple users, applications may wish to add an * already-present key to prevent other users from removing it. This ioctl can * be used to check whether that really is the case before the work is done to * add the key --- which might e.g. require prompting the user for a passphrase. * * For more details, see the "FS_IOC_GET_ENCRYPTION_KEY_STATUS" section of * Documentation/filesystems/fscrypt.rst. */ int fscrypt_ioctl_get_key_status(struct file *filp, void __user *uarg) { struct super_block *sb = file_inode(filp)->i_sb; struct fscrypt_get_key_status_arg arg; struct fscrypt_master_key *mk; int err; if (copy_from_user(&arg, uarg, sizeof(arg))) return -EFAULT; if (!valid_key_spec(&arg.key_spec)) return -EINVAL; if (memchr_inv(arg.__reserved, 0, sizeof(arg.__reserved))) return -EINVAL; arg.status_flags = 0; arg.user_count = 0; memset(arg.__out_reserved, 0, sizeof(arg.__out_reserved)); mk = fscrypt_find_master_key(sb, &arg.key_spec); if (!mk) { arg.status = FSCRYPT_KEY_STATUS_ABSENT; err = 0; goto out; } down_read(&mk->mk_sem); if (!mk->mk_present) { arg.status = refcount_read(&mk->mk_active_refs) > 0 ? FSCRYPT_KEY_STATUS_INCOMPLETELY_REMOVED : FSCRYPT_KEY_STATUS_ABSENT /* raced with full removal */; err = 0; goto out_release_key; } arg.status = FSCRYPT_KEY_STATUS_PRESENT; if (mk->mk_users) { struct key *mk_user; arg.user_count = mk->mk_users->keys.nr_leaves_on_tree; mk_user = find_master_key_user(mk); if (!IS_ERR(mk_user)) { arg.status_flags |= FSCRYPT_KEY_STATUS_FLAG_ADDED_BY_SELF; key_put(mk_user); } else if (mk_user != ERR_PTR(-ENOKEY)) { err = PTR_ERR(mk_user); goto out_release_key; } } err = 0; out_release_key: up_read(&mk->mk_sem); fscrypt_put_master_key(mk); out: if (!err && copy_to_user(uarg, &arg, sizeof(arg))) err = -EFAULT; return err; } EXPORT_SYMBOL_GPL(fscrypt_ioctl_get_key_status); int __init fscrypt_init_keyring(void) { int err; err = register_key_type(&key_type_fscrypt_user); if (err) return err; err = register_key_type(&key_type_fscrypt_provisioning); if (err) goto err_unregister_fscrypt_user; return 0; err_unregister_fscrypt_user: unregister_key_type(&key_type_fscrypt_user); return err; }
1 1 1 1 1 1 1 1 1 1 5 5 5 1 1 4 4 4 4 1 1 1 1 1 1 1 1 4 4 2 4 2 2 4 4 4 4 4 1 1 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 // SPDX-License-Identifier: GPL-2.0 /* * Implementation of operations over global quota file */ #include <linux/spinlock.h> #include <linux/fs.h> #include <linux/slab.h> #include <linux/quota.h> #include <linux/quotaops.h> #include <linux/dqblk_qtree.h> #include <linux/jiffies.h> #include <linux/writeback.h> #include <linux/workqueue.h> #include <linux/llist.h> #include <linux/iversion.h> #include <cluster/masklog.h> #include "ocfs2_fs.h" #include "ocfs2.h" #include "alloc.h" #include "blockcheck.h" #include "inode.h" #include "journal.h" #include "file.h" #include "sysfile.h" #include "dlmglue.h" #include "uptodate.h" #include "super.h" #include "buffer_head_io.h" #include "quota.h" #include "ocfs2_trace.h" /* * Locking of quotas with OCFS2 is rather complex. Here are rules that * should be obeyed by all the functions: * - any write of quota structure (either to local or global file) is protected * by dqio_sem or dquot->dq_lock. * - any modification of global quota file holds inode cluster lock, i_rwsem, * and ip_alloc_sem of the global quota file (achieved by * ocfs2_lock_global_qf). It also has to hold qinfo_lock. * - an allocation of new blocks for local quota file is protected by * its ip_alloc_sem * * A rough sketch of locking dependencies (lf = local file, gf = global file): * Normal filesystem operation: * start_trans -> dqio_sem -> write to lf * Syncing of local and global file: * ocfs2_lock_global_qf -> start_trans -> dqio_sem -> qinfo_lock -> * write to gf * -> write to lf * Acquire dquot for the first time: * dq_lock -> ocfs2_lock_global_qf -> qinfo_lock -> read from gf * -> alloc space for gf * -> start_trans -> qinfo_lock -> write to gf * -> ip_alloc_sem of lf -> alloc space for lf * -> write to lf * Release last reference to dquot: * dq_lock -> ocfs2_lock_global_qf -> start_trans -> qinfo_lock -> write to gf * -> write to lf * Note that all the above operations also hold the inode cluster lock of lf. * Recovery: * inode cluster lock of recovered lf * -> read bitmaps -> ip_alloc_sem of lf * -> ocfs2_lock_global_qf -> start_trans -> dqio_sem -> qinfo_lock -> * write to gf */ static void qsync_work_fn(struct work_struct *work); static void ocfs2_global_disk2memdqb(struct dquot *dquot, void *dp) { struct ocfs2_global_disk_dqblk *d = dp; struct mem_dqblk *m = &dquot->dq_dqb; /* Update from disk only entries not set by the admin */ if (!test_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags)) { m->dqb_ihardlimit = le64_to_cpu(d->dqb_ihardlimit); m->dqb_isoftlimit = le64_to_cpu(d->dqb_isoftlimit); } if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) m->dqb_curinodes = le64_to_cpu(d->dqb_curinodes); if (!test_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags)) { m->dqb_bhardlimit = le64_to_cpu(d->dqb_bhardlimit); m->dqb_bsoftlimit = le64_to_cpu(d->dqb_bsoftlimit); } if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) m->dqb_curspace = le64_to_cpu(d->dqb_curspace); if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags)) m->dqb_btime = le64_to_cpu(d->dqb_btime); if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags)) m->dqb_itime = le64_to_cpu(d->dqb_itime); OCFS2_DQUOT(dquot)->dq_use_count = le32_to_cpu(d->dqb_use_count); } static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot) { struct ocfs2_global_disk_dqblk *d = dp; struct mem_dqblk *m = &dquot->dq_dqb; d->dqb_id = cpu_to_le32(from_kqid(&init_user_ns, dquot->dq_id)); d->dqb_use_count = cpu_to_le32(OCFS2_DQUOT(dquot)->dq_use_count); d->dqb_ihardlimit = cpu_to_le64(m->dqb_ihardlimit); d->dqb_isoftlimit = cpu_to_le64(m->dqb_isoftlimit); d->dqb_curinodes = cpu_to_le64(m->dqb_curinodes); d->dqb_bhardlimit = cpu_to_le64(m->dqb_bhardlimit); d->dqb_bsoftlimit = cpu_to_le64(m->dqb_bsoftlimit); d->dqb_curspace = cpu_to_le64(m->dqb_curspace); d->dqb_btime = cpu_to_le64(m->dqb_btime); d->dqb_itime = cpu_to_le64(m->dqb_itime); d->dqb_pad1 = d->dqb_pad2 = 0; } static int ocfs2_global_is_id(void *dp, struct dquot *dquot) { struct ocfs2_global_disk_dqblk *d = dp; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; if (qtree_entry_unused(&oinfo->dqi_gi, dp)) return 0; return qid_eq(make_kqid(&init_user_ns, dquot->dq_id.type, le32_to_cpu(d->dqb_id)), dquot->dq_id); } const struct qtree_fmt_operations ocfs2_global_ops = { .mem2disk_dqblk = ocfs2_global_mem2diskdqb, .disk2mem_dqblk = ocfs2_global_disk2memdqb, .is_id = ocfs2_global_is_id, }; int ocfs2_validate_quota_block(struct super_block *sb, struct buffer_head *bh) { struct ocfs2_disk_dqtrailer *dqt = ocfs2_block_dqtrailer(sb->s_blocksize, bh->b_data); trace_ocfs2_validate_quota_block((unsigned long long)bh->b_blocknr); BUG_ON(!buffer_uptodate(bh)); /* * If the ecc fails, we return the error but otherwise * leave the filesystem running. We know any error is * local to this block. */ return ocfs2_validate_meta_ecc(sb, bh->b_data, &dqt->dq_check); } int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, struct buffer_head **bhp) { int rc; *bhp = NULL; rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, 1, bhp, 0, ocfs2_validate_quota_block); if (rc) mlog_errno(rc); return rc; } /* Read data from global quotafile - avoid pagecache and such because we cannot * afford acquiring the locks... We use quota cluster lock to serialize * operations. Caller is responsible for acquiring it. */ ssize_t ocfs2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off) { struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; struct inode *gqinode = oinfo->dqi_gqinode; loff_t i_size = i_size_read(gqinode); int offset = off & (sb->s_blocksize - 1); sector_t blk = off >> sb->s_blocksize_bits; int err = 0; struct buffer_head *bh; size_t toread, tocopy; u64 pblock = 0, pcount = 0; if (off > i_size) return 0; if (off + len > i_size) len = i_size - off; toread = len; while (toread > 0) { tocopy = min_t(size_t, (sb->s_blocksize - offset), toread); if (!pcount) { err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, &pcount, NULL); if (err) { mlog_errno(err); return err; } } else { pcount--; pblock++; } bh = NULL; err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh); if (err) { mlog_errno(err); return err; } memcpy(data, bh->b_data + offset, tocopy); brelse(bh); offset = 0; toread -= tocopy; data += tocopy; blk++; } return len; } /* Write to quotafile (we know the transaction is already started and has * enough credits) */ ssize_t ocfs2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off) { struct mem_dqinfo *info = sb_dqinfo(sb, type); struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; struct inode *gqinode = oinfo->dqi_gqinode; int offset = off & (sb->s_blocksize - 1); sector_t blk = off >> sb->s_blocksize_bits; int err = 0, new = 0, ja_type; struct buffer_head *bh = NULL; handle_t *handle = journal_current_handle(); u64 pblock, pcount; if (!handle) { mlog(ML_ERROR, "Quota write (off=%llu, len=%llu) cancelled " "because transaction was not started.\n", (unsigned long long)off, (unsigned long long)len); return -EIO; } if (len > sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset) { WARN_ON(1); len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset; } if (i_size_read(gqinode) < off + len) { loff_t rounded_end = ocfs2_align_bytes_to_blocks(sb, off + len); /* Space is already allocated in ocfs2_acquire_dquot() */ err = ocfs2_simple_size_update(gqinode, oinfo->dqi_gqi_bh, rounded_end); if (err < 0) goto out; new = 1; } err = ocfs2_extent_map_get_blocks(gqinode, blk, &pblock, &pcount, NULL); if (err) { mlog_errno(err); goto out; } /* Not rewriting whole block? */ if ((offset || len < sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE) && !new) { err = ocfs2_read_quota_phys_block(gqinode, pblock, &bh); ja_type = OCFS2_JOURNAL_ACCESS_WRITE; } else { bh = sb_getblk(sb, pblock); if (!bh) err = -ENOMEM; ja_type = OCFS2_JOURNAL_ACCESS_CREATE; } if (err) { mlog_errno(err); goto out; } lock_buffer(bh); if (new) memset(bh->b_data, 0, sb->s_blocksize); memcpy(bh->b_data + offset, data, len); flush_dcache_page(bh->b_page); set_buffer_uptodate(bh); unlock_buffer(bh); ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh); err = ocfs2_journal_access_dq(handle, INODE_CACHE(gqinode), bh, ja_type); if (err < 0) { brelse(bh); goto out; } ocfs2_journal_dirty(handle, bh); brelse(bh); out: if (err) { mlog_errno(err); return err; } inode_inc_iversion(gqinode); ocfs2_mark_inode_dirty(handle, gqinode, oinfo->dqi_gqi_bh); return len; } int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) { int status; struct buffer_head *bh = NULL; status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex); if (status < 0) return status; spin_lock(&dq_data_lock); if (!oinfo->dqi_gqi_count++) oinfo->dqi_gqi_bh = bh; else WARN_ON(bh != oinfo->dqi_gqi_bh); spin_unlock(&dq_data_lock); if (ex) { inode_lock(oinfo->dqi_gqinode); down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); } else { down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); } return 0; } void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) { if (ex) { up_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); inode_unlock(oinfo->dqi_gqinode); } else { up_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); } ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); brelse(oinfo->dqi_gqi_bh); spin_lock(&dq_data_lock); if (!--oinfo->dqi_gqi_count) oinfo->dqi_gqi_bh = NULL; spin_unlock(&dq_data_lock); } /* Read information header from global quota file */ int ocfs2_global_read_info(struct super_block *sb, int type) { unsigned int ino[OCFS2_MAXQUOTAS] = { USER_QUOTA_SYSTEM_INODE, GROUP_QUOTA_SYSTEM_INODE }; struct ocfs2_global_disk_dqinfo dinfo; struct mem_dqinfo *info = sb_dqinfo(sb, type); struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; u64 pcount; int status; oinfo->dqi_gi.dqi_sb = sb; oinfo->dqi_gi.dqi_type = type; ocfs2_qinfo_lock_res_init(&oinfo->dqi_gqlock, oinfo); oinfo->dqi_gi.dqi_entry_size = sizeof(struct ocfs2_global_disk_dqblk); oinfo->dqi_gi.dqi_ops = &ocfs2_global_ops; oinfo->dqi_gqi_bh = NULL; oinfo->dqi_gqi_count = 0; /* Read global header */ oinfo->dqi_gqinode = ocfs2_get_system_file_inode(OCFS2_SB(sb), ino[type], OCFS2_INVALID_SLOT); if (!oinfo->dqi_gqinode) { mlog(ML_ERROR, "failed to get global quota inode (type=%d)\n", type); status = -EINVAL; goto out_err; } status = ocfs2_lock_global_qf(oinfo, 0); if (status < 0) { mlog_errno(status); goto out_err; } status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); if (status < 0) { mlog_errno(status); goto out_unlock; } status = ocfs2_qinfo_lock(oinfo, 0); if (status < 0) { mlog_errno(status); goto out_unlock; } status = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct ocfs2_global_disk_dqinfo), OCFS2_GLOBAL_INFO_OFF); ocfs2_qinfo_unlock(oinfo, 0); ocfs2_unlock_global_qf(oinfo, 0); if (status != sizeof(struct ocfs2_global_disk_dqinfo)) { mlog(ML_ERROR, "Cannot read global quota info (%d).\n", status); if (status >= 0) status = -EIO; mlog_errno(status); goto out_err; } info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); oinfo->dqi_gi.dqi_blocksize_bits = sb->s_blocksize_bits; oinfo->dqi_gi.dqi_usable_bs = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE; oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); schedule_delayed_work(&oinfo->dqi_sync_work, msecs_to_jiffies(oinfo->dqi_syncms)); return 0; out_unlock: ocfs2_unlock_global_qf(oinfo, 0); out_err: return status; } /* Write information to global quota file. Expects exclusive lock on quota * file inode and quota info */ static int __ocfs2_global_write_info(struct super_block *sb, int type) { struct mem_dqinfo *info = sb_dqinfo(sb, type); struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; struct ocfs2_global_disk_dqinfo dinfo; ssize_t size; spin_lock(&dq_data_lock); info->dqi_flags &= ~DQF_INFO_DIRTY; dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); spin_unlock(&dq_data_lock); dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms); dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk); dinfo.dqi_free_entry = cpu_to_le32(oinfo->dqi_gi.dqi_free_entry); size = sb->s_op->quota_write(sb, type, (char *)&dinfo, sizeof(struct ocfs2_global_disk_dqinfo), OCFS2_GLOBAL_INFO_OFF); if (size != sizeof(struct ocfs2_global_disk_dqinfo)) { mlog(ML_ERROR, "Cannot write global quota info structure\n"); if (size >= 0) size = -EIO; return size; } return 0; } int ocfs2_global_write_info(struct super_block *sb, int type) { int err; struct quota_info *dqopt = sb_dqopt(sb); struct ocfs2_mem_dqinfo *info = dqopt->info[type].dqi_priv; unsigned int memalloc; down_write(&dqopt->dqio_sem); memalloc = memalloc_nofs_save(); err = ocfs2_qinfo_lock(info, 1); if (err < 0) goto out_sem; err = __ocfs2_global_write_info(sb, type); ocfs2_qinfo_unlock(info, 1); out_sem: memalloc_nofs_restore(memalloc); up_write(&dqopt->dqio_sem); return err; } static int ocfs2_global_qinit_alloc(struct super_block *sb, int type) { struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; /* * We may need to allocate tree blocks and a leaf block but not the * root block */ return oinfo->dqi_gi.dqi_qtree_depth; } static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type) { /* We modify all the allocated blocks, tree root, info block and * the inode */ return (ocfs2_global_qinit_alloc(sb, type) + 2) * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + 1; } /* Sync local information about quota modifications with global quota file. * Caller must have started the transaction and obtained exclusive lock for * global quota file inode */ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) { int err, err2; struct super_block *sb = dquot->dq_sb; int type = dquot->dq_id.type; struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; struct ocfs2_global_disk_dqblk dqblk; s64 spacechange, inodechange; time64_t olditime, oldbtime; err = sb->s_op->quota_read(sb, type, (char *)&dqblk, sizeof(struct ocfs2_global_disk_dqblk), dquot->dq_off); if (err != sizeof(struct ocfs2_global_disk_dqblk)) { if (err >= 0) { mlog(ML_ERROR, "Short read from global quota file " "(%u read)\n", err); err = -EIO; } goto out; } /* Update space and inode usage. Get also other information from * global quota file so that we don't overwrite any changes there. * We are */ spin_lock(&dquot->dq_dqb_lock); spacechange = dquot->dq_dqb.dqb_curspace - OCFS2_DQUOT(dquot)->dq_origspace; inodechange = dquot->dq_dqb.dqb_curinodes - OCFS2_DQUOT(dquot)->dq_originodes; olditime = dquot->dq_dqb.dqb_itime; oldbtime = dquot->dq_dqb.dqb_btime; ocfs2_global_disk2memdqb(dquot, &dqblk); trace_ocfs2_sync_dquot(from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_dqb.dqb_curspace, (long long)spacechange, dquot->dq_dqb.dqb_curinodes, (long long)inodechange); if (!test_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags)) dquot->dq_dqb.dqb_curspace += spacechange; if (!test_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags)) dquot->dq_dqb.dqb_curinodes += inodechange; /* Set properly space grace time... */ if (dquot->dq_dqb.dqb_bsoftlimit && dquot->dq_dqb.dqb_curspace > dquot->dq_dqb.dqb_bsoftlimit) { if (!test_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags) && oldbtime > 0) { if (dquot->dq_dqb.dqb_btime > 0) dquot->dq_dqb.dqb_btime = min(dquot->dq_dqb.dqb_btime, oldbtime); else dquot->dq_dqb.dqb_btime = oldbtime; } } else { dquot->dq_dqb.dqb_btime = 0; clear_bit(DQ_BLKS_B, &dquot->dq_flags); } /* Set properly inode grace time... */ if (dquot->dq_dqb.dqb_isoftlimit && dquot->dq_dqb.dqb_curinodes > dquot->dq_dqb.dqb_isoftlimit) { if (!test_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags) && olditime > 0) { if (dquot->dq_dqb.dqb_itime > 0) dquot->dq_dqb.dqb_itime = min(dquot->dq_dqb.dqb_itime, olditime); else dquot->dq_dqb.dqb_itime = olditime; } } else { dquot->dq_dqb.dqb_itime = 0; clear_bit(DQ_INODES_B, &dquot->dq_flags); } /* All information is properly updated, clear the flags */ __clear_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags); __clear_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags); __clear_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags); __clear_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags); __clear_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags); __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; spin_unlock(&dquot->dq_dqb_lock); err = ocfs2_qinfo_lock(info, freeing); if (err < 0) { mlog(ML_ERROR, "Failed to lock quota info, losing quota write" " (type=%d, id=%u)\n", dquot->dq_id.type, (unsigned)from_kqid(&init_user_ns, dquot->dq_id)); goto out; } if (freeing) OCFS2_DQUOT(dquot)->dq_use_count--; err = qtree_write_dquot(&info->dqi_gi, dquot); if (err < 0) goto out_qlock; if (freeing && !OCFS2_DQUOT(dquot)->dq_use_count) { err = qtree_release_dquot(&info->dqi_gi, dquot); if (info_dirty(sb_dqinfo(sb, type))) { err2 = __ocfs2_global_write_info(sb, type); if (!err) err = err2; } } out_qlock: ocfs2_qinfo_unlock(info, freeing); out: if (err < 0) mlog_errno(err); return err; } /* * Functions for periodic syncing of dquots with global file */ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) { handle_t *handle; struct super_block *sb = dquot->dq_sb; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; struct ocfs2_super *osb = OCFS2_SB(sb); int status = 0; unsigned int memalloc; trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type, type, sb->s_id); if (type != dquot->dq_id.type) goto out; status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out_ilock; } down_write(&sb_dqopt(sb)->dqio_sem); memalloc = memalloc_nofs_save(); status = ocfs2_sync_dquot(dquot); if (status < 0) mlog_errno(status); /* We have to write local structure as well... */ status = ocfs2_local_write_dquot(dquot); if (status < 0) mlog_errno(status); memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(sb)->dqio_sem); ocfs2_commit_trans(osb, handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); out: return status; } static void qsync_work_fn(struct work_struct *work) { struct ocfs2_mem_dqinfo *oinfo = container_of(work, struct ocfs2_mem_dqinfo, dqi_sync_work.work); struct super_block *sb = oinfo->dqi_gqinode->i_sb; /* * We have to be careful here not to deadlock on s_umount as umount * disabling quotas may be in progress and it waits for this work to * complete. If trylock fails, we'll do the sync next time... */ if (down_read_trylock(&sb->s_umount)) { dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); up_read(&sb->s_umount); } schedule_delayed_work(&oinfo->dqi_sync_work, msecs_to_jiffies(oinfo->dqi_syncms)); } /* * Wrappers for generic quota functions */ static int ocfs2_write_dquot(struct dquot *dquot) { handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); int status = 0; unsigned int memalloc; trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type); handle = ocfs2_start_trans(osb, OCFS2_QWRITE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out; } down_write(&sb_dqopt(dquot->dq_sb)->dqio_sem); memalloc = memalloc_nofs_save(); status = ocfs2_local_write_dquot(dquot); memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(dquot->dq_sb)->dqio_sem); ocfs2_commit_trans(osb, handle); out: return status; } static int ocfs2_calc_qdel_credits(struct super_block *sb, int type) { struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; /* * We modify tree, leaf block, global info, local chunk header, * global and local inode; OCFS2_QINFO_WRITE_CREDITS already * accounts for inode update */ return (oinfo->dqi_gi.dqi_qtree_depth + 2) * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS + OCFS2_QINFO_WRITE_CREDITS + OCFS2_INODE_UPDATE_CREDITS; } void ocfs2_drop_dquot_refs(struct work_struct *work) { struct ocfs2_super *osb = container_of(work, struct ocfs2_super, dquot_drop_work); struct llist_node *list; struct ocfs2_dquot *odquot, *next_odquot; list = llist_del_all(&osb->dquot_drop_list); llist_for_each_entry_safe(odquot, next_odquot, list, list) { /* Drop the reference we acquired in ocfs2_dquot_release() */ dqput(&odquot->dq_dquot); } } /* * Called when the last reference to dquot is dropped. If we are called from * downconvert thread, we cannot do all the handling here because grabbing * quota lock could deadlock (the node holding the quota lock could need some * other cluster lock to proceed but with blocked downconvert thread we cannot * release any lock). */ static int ocfs2_release_dquot(struct dquot *dquot) { handle_t *handle; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type)->dqi_priv; struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); int status = 0; trace_ocfs2_release_dquot(from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type); mutex_lock(&dquot->dq_lock); /* Check whether we are not racing with some other dqget() */ if (dquot_is_busy(dquot)) goto out; /* Running from downconvert thread? Postpone quota processing to wq */ if (current == osb->dc_task) { /* * Grab our own reference to dquot and queue it for delayed * dropping. Quota code rechecks after calling * ->release_dquot() and won't free dquot structure. */ dqgrab(dquot); /* First entry on list -> queue work */ if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list)) queue_work(osb->ocfs2_wq, &osb->dquot_drop_work); goto out; } status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; handle = ocfs2_start_trans(osb, ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out_ilock; } status = ocfs2_global_release_dquot(dquot); if (status < 0) { mlog_errno(status); goto out_trans; } status = ocfs2_local_release_dquot(handle, dquot); /* * If we fail here, we cannot do much as global structure is * already released. So just complain... */ if (status < 0) mlog_errno(status); /* * Clear dq_off so that we search for the structure in quota file next * time we acquire it. The structure might be deleted and reallocated * elsewhere by another node while our dquot structure is on freelist. */ dquot->dq_off = 0; clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); out_trans: ocfs2_commit_trans(osb, handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); out: mutex_unlock(&dquot->dq_lock); if (status) mlog_errno(status); return status; } /* * Read global dquot structure from disk or create it if it does * not exist. Also update use count of the global structure and * create structure in node-local quota file. */ static int ocfs2_acquire_dquot(struct dquot *dquot) { int status = 0, err; int ex = 0; struct super_block *sb = dquot->dq_sb; struct ocfs2_super *osb = OCFS2_SB(sb); int type = dquot->dq_id.type; struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; struct inode *gqinode = info->dqi_gqinode; int need_alloc = ocfs2_global_qinit_alloc(sb, type); handle_t *handle; trace_ocfs2_acquire_dquot(from_kqid(&init_user_ns, dquot->dq_id), type); mutex_lock(&dquot->dq_lock); /* * We need an exclusive lock, because we're going to update use count * and instantiate possibly new dquot structure */ status = ocfs2_lock_global_qf(info, 1); if (status < 0) goto out; status = ocfs2_qinfo_lock(info, 0); if (status < 0) goto out_dq; /* * We always want to read dquot structure from disk because we don't * know what happened with it while it was on freelist. */ status = qtree_read_dquot(&info->dqi_gi, dquot); ocfs2_qinfo_unlock(info, 0); if (status < 0) goto out_dq; OCFS2_DQUOT(dquot)->dq_use_count++; OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; if (!dquot->dq_off) { /* No real quota entry? */ ex = 1; /* * Add blocks to quota file before we start a transaction since * locking allocators ranks above a transaction start */ WARN_ON(journal_current_handle()); status = ocfs2_extend_no_holes(gqinode, NULL, i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits), i_size_read(gqinode)); if (status < 0) goto out_dq; } handle = ocfs2_start_trans(osb, ocfs2_calc_global_qinit_credits(sb, type)); if (IS_ERR(handle)) { status = PTR_ERR(handle); goto out_dq; } status = ocfs2_qinfo_lock(info, ex); if (status < 0) goto out_trans; status = qtree_write_dquot(&info->dqi_gi, dquot); if (ex && info_dirty(sb_dqinfo(sb, type))) { err = __ocfs2_global_write_info(sb, type); if (!status) status = err; } ocfs2_qinfo_unlock(info, ex); out_trans: ocfs2_commit_trans(osb, handle); out_dq: ocfs2_unlock_global_qf(info, 1); if (status < 0) goto out; status = ocfs2_create_local_dquot(dquot); if (status < 0) goto out; set_bit(DQ_ACTIVE_B, &dquot->dq_flags); out: mutex_unlock(&dquot->dq_lock); if (status) mlog_errno(status); return status; } static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid) { int type = qid->type; struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv; int status = 0; trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type); if (!sb_has_quota_loaded(sb, type)) { status = -ESRCH; goto out; } status = ocfs2_lock_global_qf(info, 0); if (status < 0) goto out; status = ocfs2_qinfo_lock(info, 0); if (status < 0) goto out_global; status = qtree_get_next_id(&info->dqi_gi, qid); ocfs2_qinfo_unlock(info, 0); out_global: ocfs2_unlock_global_qf(info, 0); out: /* * Avoid logging ENOENT since it just means there isn't next ID and * ESRCH which means quota isn't enabled for the filesystem. */ if (status && status != -ENOENT && status != -ESRCH) mlog_errno(status); return status; } static int ocfs2_mark_dquot_dirty(struct dquot *dquot) { unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) | (1 << (DQ_LASTSET_B + QIF_BLIMITS_B)) | (1 << (DQ_LASTSET_B + QIF_INODES_B)) | (1 << (DQ_LASTSET_B + QIF_SPACE_B)) | (1 << (DQ_LASTSET_B + QIF_BTIME_B)) | (1 << (DQ_LASTSET_B + QIF_ITIME_B)); int sync = 0; int status; struct super_block *sb = dquot->dq_sb; int type = dquot->dq_id.type; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(sb); unsigned int memalloc; trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id), type); /* In case user set some limits, sync dquot immediately to global * quota file so that information propagates quicker */ spin_lock(&dquot->dq_dqb_lock); if (dquot->dq_flags & mask) sync = 1; spin_unlock(&dquot->dq_dqb_lock); /* This is a slight hack but we can't afford getting global quota * lock if we already have a transaction started. */ if (!sync || journal_current_handle()) { status = ocfs2_write_dquot(dquot); goto out; } status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; handle = ocfs2_start_trans(osb, OCFS2_QSYNC_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out_ilock; } down_write(&sb_dqopt(sb)->dqio_sem); memalloc = memalloc_nofs_save(); status = ocfs2_sync_dquot(dquot); if (status < 0) { mlog_errno(status); goto out_dlock; } /* Now write updated local dquot structure */ status = ocfs2_local_write_dquot(dquot); out_dlock: memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(sb)->dqio_sem); ocfs2_commit_trans(osb, handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); out: if (status) mlog_errno(status); return status; } /* This should happen only after set_dqinfo(). */ static int ocfs2_write_info(struct super_block *sb, int type) { handle_t *handle; int status = 0; struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; status = ocfs2_lock_global_qf(oinfo, 1); if (status < 0) goto out; handle = ocfs2_start_trans(OCFS2_SB(sb), OCFS2_QINFO_WRITE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); goto out_ilock; } status = dquot_commit_info(sb, type); ocfs2_commit_trans(OCFS2_SB(sb), handle); out_ilock: ocfs2_unlock_global_qf(oinfo, 1); out: if (status) mlog_errno(status); return status; } static struct dquot *ocfs2_alloc_dquot(struct super_block *sb, int type) { struct ocfs2_dquot *dquot = kmem_cache_zalloc(ocfs2_dquot_cachep, GFP_NOFS); if (!dquot) return NULL; return &dquot->dq_dquot; } static void ocfs2_destroy_dquot(struct dquot *dquot) { kmem_cache_free(ocfs2_dquot_cachep, dquot); } const struct dquot_operations ocfs2_quota_operations = { /* We never make dquot dirty so .write_dquot is never called */ .acquire_dquot = ocfs2_acquire_dquot, .release_dquot = ocfs2_release_dquot, .mark_dirty = ocfs2_mark_dquot_dirty, .write_info = ocfs2_write_info, .alloc_dquot = ocfs2_alloc_dquot, .destroy_dquot = ocfs2_destroy_dquot, .get_next_id = ocfs2_get_next_id, };
8 2 1 3 2 3 2 5 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 // SPDX-License-Identifier: GPL-2.0 /* * chaoskey - driver for ChaosKey device from Altus Metrum. * * This device provides true random numbers using a noise source based * on a reverse-biased p-n junction in avalanche breakdown. More * details can be found at http://chaoskey.org * * The driver connects to the kernel hardware RNG interface to provide * entropy for /dev/random and other kernel activities. It also offers * a separate /dev/ entry to allow for direct access to the random * bit stream. * * Copyright © 2015 Keith Packard <keithp@keithp.com> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/wait.h> #include <linux/hw_random.h> #include <linux/mutex.h> #include <linux/uaccess.h> static struct usb_driver chaoskey_driver; static struct usb_class_driver chaoskey_class; static int chaoskey_rng_read(struct hwrng *rng, void *data, size_t max, bool wait); static DEFINE_MUTEX(chaoskey_list_lock); #define usb_dbg(usb_if, format, arg...) \ dev_dbg(&(usb_if)->dev, format, ## arg) #define usb_err(usb_if, format, arg...) \ dev_err(&(usb_if)->dev, format, ## arg) /* Version Information */ #define DRIVER_AUTHOR "Keith Packard, keithp@keithp.com" #define DRIVER_DESC "Altus Metrum ChaosKey driver" #define DRIVER_SHORT "chaoskey" MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); #define CHAOSKEY_VENDOR_ID 0x1d50 /* OpenMoko */ #define CHAOSKEY_PRODUCT_ID 0x60c6 /* ChaosKey */ #define ALEA_VENDOR_ID 0x12d8 /* Araneus */ #define ALEA_PRODUCT_ID 0x0001 /* Alea I */ #define CHAOSKEY_BUF_LEN 64 /* max size of USB full speed packet */ #define NAK_TIMEOUT (HZ) /* normal stall/wait timeout */ #define ALEA_FIRST_TIMEOUT (HZ*3) /* first stall/wait timeout for Alea */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define USB_CHAOSKEY_MINOR_BASE 0 #else /* IOWARRIOR_MINOR_BASE + 16, not official yet */ #define USB_CHAOSKEY_MINOR_BASE 224 #endif static const struct usb_device_id chaoskey_table[] = { { USB_DEVICE(CHAOSKEY_VENDOR_ID, CHAOSKEY_PRODUCT_ID) }, { USB_DEVICE(ALEA_VENDOR_ID, ALEA_PRODUCT_ID) }, { }, }; MODULE_DEVICE_TABLE(usb, chaoskey_table); static void chaos_read_callback(struct urb *urb); /* Driver-local specific stuff */ struct chaoskey { struct usb_interface *interface; char in_ep; struct mutex lock; struct mutex rng_lock; int open; /* open count */ bool present; /* device not disconnected */ bool reading; /* ongoing IO */ bool reads_started; /* track first read for Alea */ int size; /* size of buf */ int valid; /* bytes of buf read */ int used; /* bytes of buf consumed */ char *name; /* product + serial */ struct hwrng hwrng; /* Embedded struct for hwrng */ int hwrng_registered; /* registered with hwrng API */ wait_queue_head_t wait_q; /* for timeouts */ struct urb *urb; /* for performing IO */ char *buf; }; static void chaoskey_free(struct chaoskey *dev) { if (dev) { usb_dbg(dev->interface, "free"); usb_free_urb(dev->urb); kfree(dev->name); kfree(dev->buf); usb_put_intf(dev->interface); kfree(dev); } } static int chaoskey_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_host_interface *altsetting = interface->cur_altsetting; struct usb_endpoint_descriptor *epd; int in_ep; struct chaoskey *dev; int result = -ENOMEM; int size; int res; usb_dbg(interface, "probe %s-%s", udev->product, udev->serial); /* Find the first bulk IN endpoint and its packet size */ res = usb_find_bulk_in_endpoint(altsetting, &epd); if (res) { usb_dbg(interface, "no IN endpoint found"); return res; } in_ep = usb_endpoint_num(epd); size = usb_endpoint_maxp(epd); /* Validate endpoint and size */ if (size <= 0) { usb_dbg(interface, "invalid size (%d)", size); return -ENODEV; } if (size > CHAOSKEY_BUF_LEN) { usb_dbg(interface, "size reduced from %d to %d\n", size, CHAOSKEY_BUF_LEN); size = CHAOSKEY_BUF_LEN; } /* Looks good, allocate and initialize */ dev = kzalloc(sizeof(struct chaoskey), GFP_KERNEL); if (dev == NULL) goto out; dev->interface = usb_get_intf(interface); dev->buf = kmalloc(size, GFP_KERNEL); if (dev->buf == NULL) goto out; dev->urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb) goto out; usb_fill_bulk_urb(dev->urb, udev, usb_rcvbulkpipe(udev, in_ep), dev->buf, size, chaos_read_callback, dev); /* Construct a name using the product and serial values. Each * device needs a unique name for the hwrng code */ if (udev->product && udev->serial) { dev->name = kasprintf(GFP_KERNEL, "%s-%s", udev->product, udev->serial); if (dev->name == NULL) goto out; } dev->in_ep = in_ep; if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) dev->reads_started = true; dev->size = size; dev->present = true; init_waitqueue_head(&dev->wait_q); mutex_init(&dev->lock); mutex_init(&dev->rng_lock); usb_set_intfdata(interface, dev); result = usb_register_dev(interface, &chaoskey_class); if (result) { usb_err(interface, "Unable to allocate minor number."); goto out; } dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name; dev->hwrng.read = chaoskey_rng_read; dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0); if (!dev->hwrng_registered) usb_err(interface, "Unable to register with hwrng"); usb_enable_autosuspend(udev); usb_dbg(interface, "chaoskey probe success, size %d", dev->size); return 0; out: usb_set_intfdata(interface, NULL); chaoskey_free(dev); return result; } static void chaoskey_disconnect(struct usb_interface *interface) { struct chaoskey *dev; usb_dbg(interface, "disconnect"); dev = usb_get_intfdata(interface); if (!dev) { usb_dbg(interface, "disconnect failed - no dev"); return; } if (dev->hwrng_registered) hwrng_unregister(&dev->hwrng); usb_deregister_dev(interface, &chaoskey_class); usb_set_intfdata(interface, NULL); mutex_lock(&chaoskey_list_lock); mutex_lock(&dev->lock); dev->present = false; usb_poison_urb(dev->urb); if (!dev->open) { mutex_unlock(&dev->lock); chaoskey_free(dev); } else mutex_unlock(&dev->lock); mutex_unlock(&chaoskey_list_lock); usb_dbg(interface, "disconnect done"); } static int chaoskey_open(struct inode *inode, struct file *file) { struct chaoskey *dev; struct usb_interface *interface; int rv = 0; /* get the interface from minor number and driver information */ interface = usb_find_interface(&chaoskey_driver, iminor(inode)); if (!interface) return -ENODEV; usb_dbg(interface, "open"); dev = usb_get_intfdata(interface); if (!dev) { usb_dbg(interface, "open (dev)"); return -ENODEV; } file->private_data = dev; mutex_lock(&chaoskey_list_lock); mutex_lock(&dev->lock); if (dev->present) ++dev->open; else rv = -ENODEV; mutex_unlock(&dev->lock); mutex_unlock(&chaoskey_list_lock); return rv; } static int chaoskey_release(struct inode *inode, struct file *file) { struct chaoskey *dev = file->private_data; struct usb_interface *interface; int rv = 0; if (dev == NULL) return -ENODEV; interface = dev->interface; usb_dbg(interface, "release"); mutex_lock(&chaoskey_list_lock); mutex_lock(&dev->lock); usb_dbg(interface, "open count at release is %d", dev->open); if (dev->open <= 0) { usb_dbg(interface, "invalid open count (%d)", dev->open); rv = -ENODEV; goto bail; } --dev->open; if (!dev->present) { if (dev->open == 0) { mutex_unlock(&dev->lock); chaoskey_free(dev); goto destruction; } } bail: mutex_unlock(&dev->lock); destruction: mutex_unlock(&chaoskey_list_lock); usb_dbg(interface, "release success"); return rv; } static void chaos_read_callback(struct urb *urb) { struct chaoskey *dev = urb->context; int status = urb->status; usb_dbg(dev->interface, "callback status (%d)", status); if (status == 0) dev->valid = urb->actual_length; else dev->valid = 0; dev->used = 0; /* must be seen first before validity is announced */ smp_wmb(); dev->reading = false; wake_up(&dev->wait_q); } /* Fill the buffer. Called with dev->lock held */ static int _chaoskey_fill(struct chaoskey *dev) { DEFINE_WAIT(wait); int result; bool started; usb_dbg(dev->interface, "fill"); /* Return immediately if someone called before the buffer was * empty */ if (dev->valid != dev->used) { usb_dbg(dev->interface, "not empty yet (valid %d used %d)", dev->valid, dev->used); return 0; } /* Bail if the device has been removed */ if (!dev->present) { usb_dbg(dev->interface, "device not present"); return -ENODEV; } /* Make sure the device is awake */ result = usb_autopm_get_interface(dev->interface); if (result) { usb_dbg(dev->interface, "wakeup failed (result %d)", result); return result; } dev->reading = true; result = usb_submit_urb(dev->urb, GFP_KERNEL); if (result < 0) { result = usb_translate_errors(result); dev->reading = false; goto out; } /* The first read on the Alea takes a little under 2 seconds. * Reads after the first read take only a few microseconds * though. Presumably the entropy-generating circuit needs * time to ramp up. So, we wait longer on the first read. */ started = dev->reads_started; dev->reads_started = true; result = wait_event_interruptible_timeout( dev->wait_q, !dev->reading, (started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) ); if (result < 0) { usb_kill_urb(dev->urb); goto out; } if (result == 0) { result = -ETIMEDOUT; usb_kill_urb(dev->urb); } else { result = dev->valid; } out: /* Let the device go back to sleep eventually */ usb_autopm_put_interface(dev->interface); usb_dbg(dev->interface, "read %d bytes", dev->valid); return result; } static ssize_t chaoskey_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct chaoskey *dev; ssize_t read_count = 0; int this_time; int result = 0; unsigned long remain; dev = file->private_data; if (dev == NULL || !dev->present) return -ENODEV; usb_dbg(dev->interface, "read %zu", count); while (count > 0) { /* Grab the rng_lock briefly to ensure that the hwrng interface * gets priority over other user access */ result = mutex_lock_interruptible(&dev->rng_lock); if (result) goto bail; mutex_unlock(&dev->rng_lock); result = mutex_lock_interruptible(&dev->lock); if (result) goto bail; if (dev->valid == dev->used) { result = _chaoskey_fill(dev); if (result < 0) { mutex_unlock(&dev->lock); goto bail; } } this_time = dev->valid - dev->used; if (this_time > count) this_time = count; remain = copy_to_user(buffer, dev->buf + dev->used, this_time); if (remain) { result = -EFAULT; /* Consume the bytes that were copied so we don't leak * data to user space */ dev->used += this_time - remain; mutex_unlock(&dev->lock); goto bail; } count -= this_time; read_count += this_time; buffer += this_time; dev->used += this_time; mutex_unlock(&dev->lock); } bail: if (read_count) { usb_dbg(dev->interface, "read %zu bytes", read_count); return read_count; } usb_dbg(dev->interface, "empty read, result %d", result); if (result == -ETIMEDOUT) result = -EAGAIN; return result; } static int chaoskey_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) { struct chaoskey *dev = container_of(rng, struct chaoskey, hwrng); int this_time; usb_dbg(dev->interface, "rng_read max %zu wait %d", max, wait); if (!dev->present) { usb_dbg(dev->interface, "device not present"); return 0; } /* Hold the rng_lock until we acquire the device lock so that * this operation gets priority over other user access to the * device */ mutex_lock(&dev->rng_lock); mutex_lock(&dev->lock); mutex_unlock(&dev->rng_lock); /* Try to fill the buffer if empty. It doesn't actually matter * if _chaoskey_fill works; we'll just return zero bytes as * the buffer will still be empty */ if (dev->valid == dev->used) (void) _chaoskey_fill(dev); this_time = dev->valid - dev->used; if (this_time > max) this_time = max; memcpy(data, dev->buf + dev->used, this_time); dev->used += this_time; mutex_unlock(&dev->lock); usb_dbg(dev->interface, "rng_read this_time %d\n", this_time); return this_time; } #ifdef CONFIG_PM static int chaoskey_suspend(struct usb_interface *interface, pm_message_t message) { usb_dbg(interface, "suspend"); return 0; } static int chaoskey_resume(struct usb_interface *interface) { struct chaoskey *dev; struct usb_device *udev = interface_to_usbdev(interface); usb_dbg(interface, "resume"); dev = usb_get_intfdata(interface); /* * We may have lost power. * In that case the device that needs a long time * for the first requests needs an extended timeout * again */ if (le16_to_cpu(udev->descriptor.idVendor) == ALEA_VENDOR_ID) dev->reads_started = false; return 0; } #else #define chaoskey_suspend NULL #define chaoskey_resume NULL #endif /* file operation pointers */ static const struct file_operations chaoskey_fops = { .owner = THIS_MODULE, .read = chaoskey_read, .open = chaoskey_open, .release = chaoskey_release, .llseek = default_llseek, }; /* class driver information */ static struct usb_class_driver chaoskey_class = { .name = "chaoskey%d", .fops = &chaoskey_fops, .minor_base = USB_CHAOSKEY_MINOR_BASE, }; /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver chaoskey_driver = { .name = DRIVER_SHORT, .probe = chaoskey_probe, .disconnect = chaoskey_disconnect, .suspend = chaoskey_suspend, .resume = chaoskey_resume, .reset_resume = chaoskey_resume, .id_table = chaoskey_table, .supports_autosuspend = 1, }; module_usb_driver(chaoskey_driver);
2 9 8 4 6 1 5 8 2 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 // SPDX-License-Identifier: GPL-2.0-only /* Network filesystem write subrequest result collection, assessment * and retrying. * * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/export.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/pagemap.h> #include <linux/slab.h> #include "internal.h" /* Notes made in the collector */ #define HIT_PENDING 0x01 /* A front op was still pending */ #define NEED_REASSESS 0x02 /* Need to loop round and reassess */ #define MADE_PROGRESS 0x04 /* Made progress cleaning up a stream or the folio set */ #define BUFFERED 0x08 /* The pagecache needs cleaning up */ #define NEED_RETRY 0x10 /* A front op requests retrying */ #define SAW_FAILURE 0x20 /* One stream or hit a permanent failure */ /* * Successful completion of write of a folio to the server and/or cache. Note * that we are not allowed to lock the folio here on pain of deadlocking with * truncate. */ int netfs_folio_written_back(struct folio *folio) { enum netfs_folio_trace why = netfs_folio_trace_clear; struct netfs_inode *ictx = netfs_inode(folio->mapping->host); struct netfs_folio *finfo; struct netfs_group *group = NULL; int gcount = 0; if ((finfo = netfs_folio_info(folio))) { /* Streaming writes cannot be redirtied whilst under writeback, * so discard the streaming record. */ unsigned long long fend; fend = folio_pos(folio) + finfo->dirty_offset + finfo->dirty_len; if (fend > ictx->zero_point) ictx->zero_point = fend; folio_detach_private(folio); group = finfo->netfs_group; gcount++; kfree(finfo); why = netfs_folio_trace_clear_s; goto end_wb; } if ((group = netfs_folio_group(folio))) { if (group == NETFS_FOLIO_COPY_TO_CACHE) { why = netfs_folio_trace_clear_cc; folio_detach_private(folio); goto end_wb; } /* Need to detach the group pointer if the page didn't get * redirtied. If it has been redirtied, then it must be within * the same group. */ why = netfs_folio_trace_redirtied; if (!folio_test_dirty(folio)) { folio_detach_private(folio); gcount++; why = netfs_folio_trace_clear_g; } } end_wb: trace_netfs_folio(folio, why); folio_end_writeback(folio); return gcount; } /* * Unlock any folios we've finished with. */ static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq, unsigned int *notes) { struct folio_queue *folioq = wreq->buffer; unsigned long long collected_to = wreq->collected_to; unsigned int slot = wreq->buffer_head_slot; if (wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) { if (netfs_pgpriv2_unlock_copied_folios(wreq)) *notes |= MADE_PROGRESS; return; } if (slot >= folioq_nr_slots(folioq)) { folioq = netfs_delete_buffer_head(wreq); slot = 0; } for (;;) { struct folio *folio; struct netfs_folio *finfo; unsigned long long fpos, fend; size_t fsize, flen; folio = folioq_folio(folioq, slot); if (WARN_ONCE(!folio_test_writeback(folio), "R=%08x: folio %lx is not under writeback\n", wreq->debug_id, folio->index)) trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); fpos = folio_pos(folio); fsize = folio_size(folio); finfo = netfs_folio_info(folio); flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize; fend = min_t(unsigned long long, fpos + flen, wreq->i_size); trace_netfs_collect_folio(wreq, folio, fend, collected_to); /* Unlock any folio we've transferred all of. */ if (collected_to < fend) break; wreq->nr_group_rel += netfs_folio_written_back(folio); wreq->cleaned_to = fpos + fsize; *notes |= MADE_PROGRESS; /* Clean up the head folioq. If we clear an entire folioq, then * we can get rid of it provided it's not also the tail folioq * being filled by the issuer. */ folioq_clear(folioq, slot); slot++; if (slot >= folioq_nr_slots(folioq)) { if (READ_ONCE(wreq->buffer_tail) == folioq) break; folioq = netfs_delete_buffer_head(wreq); slot = 0; } if (fpos + fsize >= collected_to) break; } wreq->buffer = folioq; wreq->buffer_head_slot = slot; } /* * Perform retries on the streams that need it. */ static void netfs_retry_write_stream(struct netfs_io_request *wreq, struct netfs_io_stream *stream) { struct list_head *next; _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); if (list_empty(&stream->subrequests)) return; if (stream->source == NETFS_UPLOAD_TO_SERVER && wreq->netfs_ops->retry_request) wreq->netfs_ops->retry_request(wreq, stream); if (unlikely(stream->failed)) return; /* If there's no renegotiation to do, just resend each failed subreq. */ if (!stream->prepare_write) { struct netfs_io_subrequest *subreq; list_for_each_entry(subreq, &stream->subrequests, rreq_link) { if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) break; if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { struct iov_iter source = subreq->io_iter; iov_iter_revert(&source, subreq->len - source.count); __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_write(stream, subreq, &source); } } return; } next = stream->subrequests.next; do { struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp; struct iov_iter source; unsigned long long start, len; size_t part; bool boundary = false; /* Go through the stream and find the next span of contiguous * data that we then rejig (cifs, for example, needs the wsize * renegotiating) and reissue. */ from = list_entry(next, struct netfs_io_subrequest, rreq_link); to = from; start = from->start + from->transferred; len = from->len - from->transferred; if (test_bit(NETFS_SREQ_FAILED, &from->flags) || !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) return; list_for_each_continue(next, &stream->subrequests) { subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); if (subreq->start + subreq->transferred != start + len || test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) || !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) break; to = subreq; len += to->len; } /* Determine the set of buffers we're going to use. Each * subreq gets a subset of a single overall contiguous buffer. */ netfs_reset_iter(from); source = from->io_iter; source.count = len; /* Work through the sublist. */ subreq = from; list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { if (!len) break; /* Renegotiate max_len (wsize) */ trace_netfs_sreq(subreq, netfs_sreq_trace_retry); __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); stream->prepare_write(subreq); part = min(len, stream->sreq_max_len); subreq->len = part; subreq->start = start; subreq->transferred = 0; len -= part; start += part; if (len && subreq == to && __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags)) boundary = true; netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); netfs_reissue_write(stream, subreq, &source); if (subreq == to) break; } /* If we managed to use fewer subreqs, we can discard the * excess; if we used the same number, then we're done. */ if (!len) { if (subreq == to) continue; list_for_each_entry_safe_from(subreq, tmp, &stream->subrequests, rreq_link) { trace_netfs_sreq(subreq, netfs_sreq_trace_discard); list_del(&subreq->rreq_link); netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); if (subreq == to) break; } continue; } /* We ran out of subrequests, so we need to allocate some more * and insert them after. */ do { subreq = netfs_alloc_subrequest(wreq); subreq->source = to->source; subreq->start = start; subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); subreq->stream_nr = to->stream_nr; __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, refcount_read(&subreq->ref), netfs_sreq_trace_new); netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); list_add(&subreq->rreq_link, &to->rreq_link); to = list_next_entry(to, rreq_link); trace_netfs_sreq(subreq, netfs_sreq_trace_retry); stream->sreq_max_len = len; stream->sreq_max_segs = INT_MAX; switch (stream->source) { case NETFS_UPLOAD_TO_SERVER: netfs_stat(&netfs_n_wh_upload); stream->sreq_max_len = umin(len, wreq->wsize); break; case NETFS_WRITE_TO_CACHE: netfs_stat(&netfs_n_wh_write); break; default: WARN_ON_ONCE(1); } stream->prepare_write(subreq); part = umin(len, stream->sreq_max_len); subreq->len = subreq->transferred + part; len -= part; start += part; if (!len && boundary) { __set_bit(NETFS_SREQ_BOUNDARY, &to->flags); boundary = false; } netfs_reissue_write(stream, subreq, &source); if (!len) break; } while (len); } while (!list_is_head(next, &stream->subrequests)); } /* * Perform retries on the streams that need it. If we're doing content * encryption and the server copy changed due to a third-party write, we may * need to do an RMW cycle and also rewrite the data to the cache. */ static void netfs_retry_writes(struct netfs_io_request *wreq) { struct netfs_io_subrequest *subreq; struct netfs_io_stream *stream; int s; /* Wait for all outstanding I/O to quiesce before performing retries as * we may need to renegotiate the I/O sizes. */ for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; if (!stream->active) continue; list_for_each_entry(subreq, &stream->subrequests, rreq_link) { wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, TASK_UNINTERRUPTIBLE); } } // TODO: Enc: Fetch changed partial pages // TODO: Enc: Reencrypt content if needed. // TODO: Enc: Wind back transferred point. // TODO: Enc: Mark cache pages for retry. for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; if (stream->need_retry) { stream->need_retry = false; netfs_retry_write_stream(wreq, stream); } } } /* * Collect and assess the results of various write subrequests. We may need to * retry some of the results - or even do an RMW cycle for content crypto. * * Note that we have a number of parallel, overlapping lists of subrequests, * one to the server and one to the local cache for example, which may not be * the same size or starting position and may not even correspond in boundary * alignment. */ static void netfs_collect_write_results(struct netfs_io_request *wreq) { struct netfs_io_subrequest *front, *remove; struct netfs_io_stream *stream; unsigned long long collected_to, issued_to; unsigned int notes; int s; _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); trace_netfs_collect(wreq); trace_netfs_rreq(wreq, netfs_rreq_trace_collect); reassess_streams: issued_to = atomic64_read(&wreq->issued_to); smp_rmb(); collected_to = ULLONG_MAX; if (wreq->origin == NETFS_WRITEBACK || wreq->origin == NETFS_WRITETHROUGH || wreq->origin == NETFS_PGPRIV2_COPY_TO_CACHE) notes = BUFFERED; else notes = 0; /* Remove completed subrequests from the front of the streams and * advance the completion point on each stream. We stop when we hit * something that's in progress. The issuer thread may be adding stuff * to the tail whilst we're doing this. */ for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; /* Read active flag before list pointers */ if (!smp_load_acquire(&stream->active)) continue; front = stream->front; while (front) { trace_netfs_collect_sreq(wreq, front); //_debug("sreq [%x] %llx %zx/%zx", // front->debug_index, front->start, front->transferred, front->len); if (stream->collected_to < front->start) { trace_netfs_collect_gap(wreq, stream, issued_to, 'F'); stream->collected_to = front->start; } /* Stall if the front is still undergoing I/O. */ if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { notes |= HIT_PENDING; break; } smp_rmb(); /* Read counters after I-P flag. */ if (stream->failed) { stream->collected_to = front->start + front->len; notes |= MADE_PROGRESS | SAW_FAILURE; goto cancel; } if (front->start + front->transferred > stream->collected_to) { stream->collected_to = front->start + front->transferred; stream->transferred = stream->collected_to - wreq->start; notes |= MADE_PROGRESS; } if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { stream->failed = true; stream->error = front->error; if (stream->source == NETFS_UPLOAD_TO_SERVER) mapping_set_error(wreq->mapping, front->error); notes |= NEED_REASSESS | SAW_FAILURE; break; } if (front->transferred < front->len) { stream->need_retry = true; notes |= NEED_RETRY | MADE_PROGRESS; break; } cancel: /* Remove if completely consumed. */ spin_lock_bh(&wreq->lock); remove = front; list_del_init(&front->rreq_link); front = list_first_entry_or_null(&stream->subrequests, struct netfs_io_subrequest, rreq_link); stream->front = front; spin_unlock_bh(&wreq->lock); netfs_put_subrequest(remove, false, notes & SAW_FAILURE ? netfs_sreq_trace_put_cancel : netfs_sreq_trace_put_done); } /* If we have an empty stream, we need to jump it forward * otherwise the collection point will never advance. */ if (!front && issued_to > stream->collected_to) { trace_netfs_collect_gap(wreq, stream, issued_to, 'E'); stream->collected_to = issued_to; } if (stream->collected_to < collected_to) collected_to = stream->collected_to; } if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to) wreq->collected_to = collected_to; for (s = 0; s < NR_IO_STREAMS; s++) { stream = &wreq->io_streams[s]; if (stream->active) trace_netfs_collect_stream(wreq, stream); } trace_netfs_collect_state(wreq, wreq->collected_to, notes); /* Unlock any folios that we have now finished with. */ if (notes & BUFFERED) { if (wreq->cleaned_to < wreq->collected_to) netfs_writeback_unlock_folios(wreq, &notes); } else { wreq->cleaned_to = wreq->collected_to; } // TODO: Discard encryption buffers if (notes & NEED_RETRY) goto need_retry; if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE); } if (notes & NEED_REASSESS) { //cond_resched(); goto reassess_streams; } if (notes & MADE_PROGRESS) { //cond_resched(); goto reassess_streams; } out: netfs_put_group_many(wreq->group, wreq->nr_group_rel); wreq->nr_group_rel = 0; _leave(" = %x", notes); return; need_retry: /* Okay... We're going to have to retry one or both streams. Note * that any partially completed op will have had any wholly transferred * folios removed from it. */ _debug("retry"); netfs_retry_writes(wreq); goto out; } /* * Perform the collection of subrequests, folios and encryption buffers. */ void netfs_write_collection_worker(struct work_struct *work) { struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; int s; _enter("R=%x", wreq->debug_id); netfs_see_request(wreq, netfs_rreq_trace_see_work); if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { netfs_put_request(wreq, false, netfs_rreq_trace_put_work); return; } netfs_collect_write_results(wreq); /* We're done when the app thread has finished posting subreqs and all * the queues in all the streams are empty. */ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { netfs_put_request(wreq, false, netfs_rreq_trace_put_work); return; } smp_rmb(); /* Read ALL_QUEUED before lists. */ transferred = LONG_MAX; for (s = 0; s < NR_IO_STREAMS; s++) { struct netfs_io_stream *stream = &wreq->io_streams[s]; if (!stream->active) continue; if (!list_empty(&stream->subrequests)) { netfs_put_request(wreq, false, netfs_rreq_trace_put_work); return; } if (stream->transferred < transferred) transferred = stream->transferred; } /* Okay, declare that all I/O is complete. */ wreq->transferred = transferred; trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); if (wreq->io_streams[1].active && wreq->io_streams[1].failed) { /* Cache write failure doesn't prevent writeback completion * unless we're in disconnected mode. */ ictx->ops->invalidate_cache(wreq); } if (wreq->cleanup) wreq->cleanup(wreq); if (wreq->origin == NETFS_DIO_WRITE && wreq->mapping->nrpages) { /* mmap may have got underfoot and we may now have folios * locally covering the region we just wrote. Attempt to * discard the folios, but leave in place any modified locally. * ->write_iter() is prevented from interfering by the DIO * counter. */ pgoff_t first = wreq->start >> PAGE_SHIFT; pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; invalidate_inode_pages2_range(wreq->mapping, first, last); } if (wreq->origin == NETFS_DIO_WRITE) inode_dio_end(wreq->inode); _debug("finished"); trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); if (wreq->iocb) { size_t written = min(wreq->transferred, wreq->len); wreq->iocb->ki_pos += written; if (wreq->iocb->ki_complete) wreq->iocb->ki_complete( wreq->iocb, wreq->error ? wreq->error : written); wreq->iocb = VFS_PTR_POISON; } netfs_clear_subrequests(wreq, false); netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); } /* * Wake the collection work item. */ void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) { if (!work_pending(&wreq->work)) { netfs_get_request(wreq, netfs_rreq_trace_get_work); if (!queue_work(system_unbound_wq, &wreq->work)) netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); } } /** * netfs_write_subrequest_terminated - Note the termination of a write operation. * @_op: The I/O request that has terminated. * @transferred_or_error: The amount of data transferred or an error code. * @was_async: The termination was asynchronous * * This tells the library that a contributory write I/O operation has * terminated, one way or another, and that it should collect the results. * * The caller indicates in @transferred_or_error the outcome of the operation, * supplying a positive value to indicate the number of bytes transferred or a * negative error code. The library will look after reissuing I/O operations * as appropriate and writing downloaded data to the cache. * * If @was_async is true, the caller might be running in softirq or interrupt * context and we can't sleep. * * When this is called, ownership of the subrequest is transferred back to the * library, along with a ref. * * Note that %_op is a void* so that the function can be passed to * kiocb::term_func without the need for a casting wrapper. */ void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, bool was_async) { struct netfs_io_subrequest *subreq = _op; struct netfs_io_request *wreq = subreq->rreq; struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); switch (subreq->source) { case NETFS_UPLOAD_TO_SERVER: netfs_stat(&netfs_n_wh_upload_done); break; case NETFS_WRITE_TO_CACHE: netfs_stat(&netfs_n_wh_write_done); break; case NETFS_INVALID_WRITE: break; default: BUG(); } if (IS_ERR_VALUE(transferred_or_error)) { subreq->error = transferred_or_error; if (subreq->error == -EAGAIN) set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); else set_bit(NETFS_SREQ_FAILED, &subreq->flags); trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write); switch (subreq->source) { case NETFS_WRITE_TO_CACHE: netfs_stat(&netfs_n_wh_write_failed); break; case NETFS_UPLOAD_TO_SERVER: netfs_stat(&netfs_n_wh_upload_failed); break; default: break; } trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause); set_bit(NETFS_RREQ_PAUSE, &wreq->flags); } else { if (WARN(transferred_or_error > subreq->len - subreq->transferred, "Subreq excess write: R=%x[%x] %zd > %zu - %zu", wreq->debug_id, subreq->debug_index, transferred_or_error, subreq->len, subreq->transferred)) transferred_or_error = subreq->len - subreq->transferred; subreq->error = 0; subreq->transferred += transferred_or_error; if (subreq->transferred < subreq->len) set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); } trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS); /* If we are at the head of the queue, wake up the collector, * transferring a ref to it if we were the ones to do so. */ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) netfs_wake_write_collector(wreq, was_async); netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); } EXPORT_SYMBOL(netfs_write_subrequest_terminated);
230 26 1 25 49 40 20 2 34 34 26 21 33 4 2 2 17 9 2 8 32 194 193 194 123 74 15 42 42 42 11 11 1 6 5 43 4 5 14 14 13 45 43 2 41 3 42 3 43 2 27 11 15 23 3 25 1 10 16 7 19 94 61 22 5 9 32 8 8 23 162 89 98 9 197 1 96 32 1 2 81 1 3 1 20 20 5 1 2 12 9 8 8 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/hfsplus/inode.c * * Copyright (C) 2001 * Brad Boyer (flar@allandria.com) * (C) 2003 Ardis Technologies <roman@ardistech.com> * * Inode handling routines */ #include <linux/blkdev.h> #include <linux/mm.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/uio.h> #include <linux/fileattr.h> #include "hfsplus_fs.h" #include "hfsplus_raw.h" #include "xattr.h" static int hfsplus_read_folio(struct file *file, struct folio *folio) { return block_read_full_folio(folio, hfsplus_get_block); } static void hfsplus_write_failed(struct address_space *mapping, loff_t to) { struct inode *inode = mapping->host; if (to > inode->i_size) { truncate_pagecache(inode, inode->i_size); hfsplus_file_truncate(inode); } } int hfsplus_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, struct folio **foliop, void **fsdata) { int ret; ret = cont_write_begin(file, mapping, pos, len, foliop, fsdata, hfsplus_get_block, &HFSPLUS_I(mapping->host)->phys_size); if (unlikely(ret)) hfsplus_write_failed(mapping, pos + len); return ret; } static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) { return generic_block_bmap(mapping, block, hfsplus_get_block); } static bool hfsplus_release_folio(struct folio *folio, gfp_t mask) { struct inode *inode = folio->mapping->host; struct super_block *sb = inode->i_sb; struct hfs_btree *tree; struct hfs_bnode *node; u32 nidx; int i; bool res = true; switch (inode->i_ino) { case HFSPLUS_EXT_CNID: tree = HFSPLUS_SB(sb)->ext_tree; break; case HFSPLUS_CAT_CNID: tree = HFSPLUS_SB(sb)->cat_tree; break; case HFSPLUS_ATTR_CNID: tree = HFSPLUS_SB(sb)->attr_tree; break; default: BUG(); return false; } if (!tree) return false; if (tree->node_size >= PAGE_SIZE) { nidx = folio->index >> (tree->node_size_shift - PAGE_SHIFT); spin_lock(&tree->hash_lock); node = hfs_bnode_findhash(tree, nidx); if (!node) ; else if (atomic_read(&node->refcnt)) res = false; if (res && node) { hfs_bnode_unhash(node); hfs_bnode_free(node); } spin_unlock(&tree->hash_lock); } else { nidx = folio->index << (PAGE_SHIFT - tree->node_size_shift); i = 1 << (PAGE_SHIFT - tree->node_size_shift); spin_lock(&tree->hash_lock); do { node = hfs_bnode_findhash(tree, nidx++); if (!node) continue; if (atomic_read(&node->refcnt)) { res = false; break; } hfs_bnode_unhash(node); hfs_bnode_free(node); } while (--i && nidx < tree->node_count); spin_unlock(&tree->hash_lock); } return res ? try_to_free_buffers(folio) : false; } static ssize_t hfsplus_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; size_t count = iov_iter_count(iter); ssize_t ret; ret = blockdev_direct_IO(iocb, inode, iter, hfsplus_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) { loff_t isize = i_size_read(inode); loff_t end = iocb->ki_pos + count; if (end > isize) hfsplus_write_failed(mapping, end); } return ret; } static int hfsplus_writepages(struct address_space *mapping, struct writeback_control *wbc) { return mpage_writepages(mapping, wbc, hfsplus_get_block); } const struct address_space_operations hfsplus_btree_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hfsplus_read_folio, .writepages = hfsplus_writepages, .write_begin = hfsplus_write_begin, .write_end = generic_write_end, .migrate_folio = buffer_migrate_folio, .bmap = hfsplus_bmap, .release_folio = hfsplus_release_folio, }; const struct address_space_operations hfsplus_aops = { .dirty_folio = block_dirty_folio, .invalidate_folio = block_invalidate_folio, .read_folio = hfsplus_read_folio, .write_begin = hfsplus_write_begin, .write_end = generic_write_end, .bmap = hfsplus_bmap, .direct_IO = hfsplus_direct_IO, .writepages = hfsplus_writepages, .migrate_folio = buffer_migrate_folio, }; const struct dentry_operations hfsplus_dentry_operations = { .d_hash = hfsplus_hash_dentry, .d_compare = hfsplus_compare_dentry, }; static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); u16 mode; mode = be16_to_cpu(perms->mode); i_uid_write(inode, be32_to_cpu(perms->owner)); if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode)) inode->i_uid = sbi->uid; i_gid_write(inode, be32_to_cpu(perms->group)); if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode)) inode->i_gid = sbi->gid; if (dir) { mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); mode |= S_IFDIR; } else if (!mode) mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); inode->i_mode = mode; HFSPLUS_I(inode)->userflags = perms->userflags; if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) inode->i_flags |= S_IMMUTABLE; else inode->i_flags &= ~S_IMMUTABLE; if (perms->rootflags & HFSPLUS_FLG_APPEND) inode->i_flags |= S_APPEND; else inode->i_flags &= ~S_APPEND; } static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) inode = HFSPLUS_I(inode)->rsrc_inode; if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EOVERFLOW; atomic_inc(&HFSPLUS_I(inode)->opencnt); return 0; } static int hfsplus_file_release(struct inode *inode, struct file *file) { struct super_block *sb = inode->i_sb; if (HFSPLUS_IS_RSRC(inode)) inode = HFSPLUS_I(inode)->rsrc_inode; if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { inode_lock(inode); hfsplus_file_truncate(inode); if (inode->i_flags & S_DEAD) { hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb)->hidden_dir, NULL); hfsplus_delete_inode(inode); } inode_unlock(inode); } return 0; } static int hfsplus_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_inode(dentry); int error; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { inode_dio_wait(inode); if (attr->ia_size > inode->i_size) { error = generic_cont_expand_simple(inode, attr->ia_size); if (error) return error; } truncate_setsize(inode, attr->ia_size); hfsplus_file_truncate(inode); inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); } setattr_copy(&nop_mnt_idmap, inode, attr); mark_inode_dirty(inode); return 0; } int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); if (request_mask & STATX_BTIME) { stat->result_mask |= STATX_BTIME; stat->btime = hfsp_mt2ut(hip->create_date); } if (inode->i_flags & S_APPEND) stat->attributes |= STATX_ATTR_APPEND; if (inode->i_flags & S_IMMUTABLE) stat->attributes |= STATX_ATTR_IMMUTABLE; if (hip->userflags & HFSPLUS_FLG_NODUMP) stat->attributes |= STATX_ATTR_NODUMP; stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | STATX_ATTR_NODUMP; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); return 0; } int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; struct hfsplus_inode_info *hip = HFSPLUS_I(inode); struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); int error = 0, error2; error = file_write_and_wait_range(file, start, end); if (error) return error; inode_lock(inode); /* * Sync inode metadata into the catalog and extent trees. */ sync_inode_metadata(inode, 1); /* * And explicitly write out the btrees. */ if (test_and_clear_bit(HFSPLUS_I_CAT_DIRTY, &hip->flags)) error = filemap_write_and_wait(sbi->cat_tree->inode->i_mapping); if (test_and_clear_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags)) { error2 = filemap_write_and_wait(sbi->ext_tree->inode->i_mapping); if (!error) error = error2; } if (test_and_clear_bit(HFSPLUS_I_ATTR_DIRTY, &hip->flags)) { if (sbi->attr_tree) { error2 = filemap_write_and_wait( sbi->attr_tree->inode->i_mapping); if (!error) error = error2; } else { pr_err("sync non-existent attributes tree\n"); } } if (test_and_clear_bit(HFSPLUS_I_ALLOC_DIRTY, &hip->flags)) { error2 = filemap_write_and_wait(sbi->alloc_file->i_mapping); if (!error) error = error2; } if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) blkdev_issue_flush(inode->i_sb->s_bdev); inode_unlock(inode); return error; } static const struct inode_operations hfsplus_file_inode_operations = { .setattr = hfsplus_setattr, .getattr = hfsplus_getattr, .listxattr = hfsplus_listxattr, .fileattr_get = hfsplus_fileattr_get, .fileattr_set = hfsplus_fileattr_set, }; static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = filemap_splice_read, .fsync = hfsplus_file_fsync, .open = hfsplus_file_open, .release = hfsplus_file_release, .unlocked_ioctl = hfsplus_ioctl, }; struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir, umode_t mode) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct inode *inode = new_inode(sb); struct hfsplus_inode_info *hip; if (!inode) return NULL; inode->i_ino = sbi->next_cnid++; inode_init_owner(&nop_mnt_idmap, inode, dir, mode); set_nlink(inode, 1); simple_inode_init_ts(inode); hip = HFSPLUS_I(inode); INIT_LIST_HEAD(&hip->open_dir_list); spin_lock_init(&hip->open_dir_lock); mutex_init(&hip->extents_lock); atomic_set(&hip->opencnt, 0); hip->extent_state = 0; hip->flags = 0; hip->userflags = 0; hip->subfolders = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->alloc_blocks = 0; hip->first_blocks = 0; hip->cached_start = 0; hip->cached_blocks = 0; hip->phys_size = 0; hip->fs_blocks = 0; hip->rsrc_inode = NULL; if (S_ISDIR(inode->i_mode)) { inode->i_size = 2; sbi->folder_count++; inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (S_ISREG(inode->i_mode)) { sbi->file_count++; inode->i_op = &hfsplus_file_inode_operations; inode->i_fop = &hfsplus_file_operations; inode->i_mapping->a_ops = &hfsplus_aops; hip->clump_blocks = sbi->data_clump_blocks; } else if (S_ISLNK(inode->i_mode)) { sbi->file_count++; inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &hfsplus_aops; hip->clump_blocks = 1; } else sbi->file_count++; insert_inode_hash(inode); mark_inode_dirty(inode); hfsplus_mark_mdb_dirty(sb); return inode; } void hfsplus_delete_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; if (S_ISDIR(inode->i_mode)) { HFSPLUS_SB(sb)->folder_count--; hfsplus_mark_mdb_dirty(sb); return; } HFSPLUS_SB(sb)->file_count--; if (S_ISREG(inode->i_mode)) { if (!inode->i_nlink) { inode->i_size = 0; hfsplus_file_truncate(inode); } } else if (S_ISLNK(inode->i_mode)) { inode->i_size = 0; hfsplus_file_truncate(inode); } hfsplus_mark_mdb_dirty(sb); } void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) { struct super_block *sb = inode->i_sb; struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); u32 count; int i; memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); for (count = 0, i = 0; i < 8; i++) count += be32_to_cpu(fork->extents[i].block_count); hip->first_blocks = count; memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->cached_start = 0; hip->cached_blocks = 0; hip->alloc_blocks = be32_to_cpu(fork->total_blocks); hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); hip->clump_blocks = be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; if (!hip->clump_blocks) { hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? sbi->rsrc_clump_blocks : sbi->data_clump_blocks; } } void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) { memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, sizeof(hfsplus_extent_rec)); fork->total_size = cpu_to_be64(inode->i_size); fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); } int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) { hfsplus_cat_entry entry; int res = 0; u16 type; type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); HFSPLUS_I(inode)->linkid = 0; if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) { pr_err("bad catalog folder entry\n"); res = -EIO; goto out; } hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_folder)); hfsplus_get_perms(inode, &folder->permissions, 1); set_nlink(inode, 1); inode->i_size = 2 + be32_to_cpu(folder->valence); inode_set_atime_to_ts(inode, hfsp_mt2ut(folder->access_date)); inode_set_mtime_to_ts(inode, hfsp_mt2ut(folder->content_mod_date)); inode_set_ctime_to_ts(inode, hfsp_mt2ut(folder->attribute_mod_date)); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { HFSPLUS_I(inode)->subfolders = be32_to_cpu(folder->subfolders); } inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (type == HFSPLUS_FILE) { struct hfsplus_cat_file *file = &entry.file; if (fd->entrylength < sizeof(struct hfsplus_cat_file)) { pr_err("bad catalog file entry\n"); res = -EIO; goto out; } hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_read_fork(inode, HFSPLUS_IS_RSRC(inode) ? &file->rsrc_fork : &file->data_fork); hfsplus_get_perms(inode, &file->permissions, 0); set_nlink(inode, 1); if (S_ISREG(inode->i_mode)) { if (file->permissions.dev) set_nlink(inode, be32_to_cpu(file->permissions.dev)); inode->i_op = &hfsplus_file_inode_operations; inode->i_fop = &hfsplus_file_operations; inode->i_mapping->a_ops = &hfsplus_aops; } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &hfsplus_aops; } else { init_special_inode(inode, inode->i_mode, be32_to_cpu(file->permissions.dev)); } inode_set_atime_to_ts(inode, hfsp_mt2ut(file->access_date)); inode_set_mtime_to_ts(inode, hfsp_mt2ut(file->content_mod_date)); inode_set_ctime_to_ts(inode, hfsp_mt2ut(file->attribute_mod_date)); HFSPLUS_I(inode)->create_date = file->create_date; } else { pr_err("bad catalog entry used to create inode\n"); res = -EIO; } out: return res; } int hfsplus_cat_write_inode(struct inode *inode) { struct inode *main_inode = inode; struct hfs_find_data fd; hfsplus_cat_entry entry; int res = 0; if (HFSPLUS_IS_RSRC(inode)) main_inode = HFSPLUS_I(inode)->rsrc_inode; if (!main_inode->i_nlink) return 0; if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) /* panic? */ return -EIO; if (hfsplus_find_cat(main_inode->i_sb, main_inode->i_ino, &fd)) /* panic? */ goto out; if (S_ISDIR(main_inode->i_mode)) { struct hfsplus_cat_folder *folder = &entry.folder; if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) { pr_err("bad catalog folder entry\n"); res = -EIO; goto out; } hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); /* simple node checks? */ hfsplus_cat_set_perms(inode, &folder->permissions); folder->access_date = hfsp_ut2mt(inode_get_atime(inode)); folder->content_mod_date = hfsp_ut2mt(inode_get_mtime(inode)); folder->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode)); folder->valence = cpu_to_be32(inode->i_size - 2); if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { folder->subfolders = cpu_to_be32(HFSPLUS_I(inode)->subfolders); } hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); } else if (HFSPLUS_IS_RSRC(inode)) { struct hfsplus_cat_file *file = &entry.file; hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->rsrc_fork); hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); } else { struct hfsplus_cat_file *file = &entry.file; if (fd.entrylength < sizeof(struct hfsplus_cat_file)) { pr_err("bad catalog file entry\n"); res = -EIO; goto out; } hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); hfsplus_cat_set_perms(inode, &file->permissions); if (HFSPLUS_FLG_IMMUTABLE & (file->permissions.rootflags | file->permissions.userflags)) file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); else file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED); file->access_date = hfsp_ut2mt(inode_get_atime(inode)); file->content_mod_date = hfsp_ut2mt(inode_get_mtime(inode)); file->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode)); hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); } set_bit(HFSPLUS_I_CAT_DIRTY, &HFSPLUS_I(inode)->flags); out: hfs_find_exit(&fd); return res; } int hfsplus_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int flags = 0; if (inode->i_flags & S_IMMUTABLE) flags |= FS_IMMUTABLE_FL; if (inode->i_flags & S_APPEND) flags |= FS_APPEND_FL; if (hip->userflags & HFSPLUS_FLG_NODUMP) flags |= FS_NODUMP_FL; fileattr_fill_flags(fa, flags); return 0; } int hfsplus_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); unsigned int new_fl = 0; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; /* don't silently ignore unsupported ext2 flags */ if (fa->flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) return -EOPNOTSUPP; if (fa->flags & FS_IMMUTABLE_FL) new_fl |= S_IMMUTABLE; if (fa->flags & FS_APPEND_FL) new_fl |= S_APPEND; inode_set_flags(inode, new_fl, S_IMMUTABLE | S_APPEND); if (fa->flags & FS_NODUMP_FL) hip->userflags |= HFSPLUS_FLG_NODUMP; else hip->userflags &= ~HFSPLUS_FLG_NODUMP; inode_set_ctime_current(inode); mark_inode_dirty(inode); return 0; }
4 4 1 3 3 1 1 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 // SPDX-License-Identifier: GPL-2.0 #include <linux/ptrace.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> #include <linux/export.h> #include <asm/syscall.h> static int collect_syscall(struct task_struct *target, struct syscall_info *info) { unsigned long args[6] = { }; struct pt_regs *regs; if (!try_get_task_stack(target)) { /* Task has no stack, so the task isn't in a syscall. */ memset(info, 0, sizeof(*info)); info->data.nr = -1; return 0; } regs = task_pt_regs(target); if (unlikely(!regs)) { put_task_stack(target); return -EAGAIN; } info->sp = user_stack_pointer(regs); info->data.instruction_pointer = instruction_pointer(regs); info->data.nr = syscall_get_nr(target, regs); if (info->data.nr != -1L) syscall_get_arguments(target, regs, args); info->data.args[0] = args[0]; info->data.args[1] = args[1]; info->data.args[2] = args[2]; info->data.args[3] = args[3]; info->data.args[4] = args[4]; info->data.args[5] = args[5]; put_task_stack(target); return 0; } /** * task_current_syscall - Discover what a blocked task is doing. * @target: thread to examine * @info: structure with the following fields: * .sp - filled with user stack pointer * .data.nr - filled with system call number or -1 * .data.args - filled with @maxargs system call arguments * .data.instruction_pointer - filled with user PC * * If @target is blocked in a system call, returns zero with @info.data.nr * set to the call's number and @info.data.args filled in with its * arguments. Registers not used for system call arguments may not be available * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target * has finished its system call but not yet returned to user mode, such * as when it's stopped for signal handling or syscall exit tracing. * * If @target is blocked in the kernel during a fault or exception, * returns zero with *@info.data.nr set to -1 and does not fill in * @info.data.args. If so, it's now safe to examine @target using * &struct user_regset get() calls as long as we're sure @target won't return * to user mode. * * Returns -%EAGAIN if @target does not remain blocked. */ int task_current_syscall(struct task_struct *target, struct syscall_info *info) { unsigned long ncsw; unsigned int state; if (target == current) return collect_syscall(target, info); state = READ_ONCE(target->__state); if (unlikely(!state)) return -EAGAIN; ncsw = wait_task_inactive(target, state); if (unlikely(!ncsw) || unlikely(collect_syscall(target, info)) || unlikely(wait_task_inactive(target, state) != ncsw)) return -EAGAIN; return 0; }
54 32 7227 1276 1250 32 449 417 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> #include <linux/bug.h> #include <linux/atomic.h> #include <linux/errseq.h> #include <linux/log2.h> /* * An errseq_t is a way of recording errors in one place, and allowing any * number of "subscribers" to tell whether it has changed since a previous * point where it was sampled. * * It's implemented as an unsigned 32-bit value. The low order bits are * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits * are used as a counter. This is done with atomics instead of locking so that * these functions can be called from any context. * * The general idea is for consumers to sample an errseq_t value. That value * can later be used to tell whether any new errors have occurred since that * sampling was done. * * Note that there is a risk of collisions if new errors are being recorded * frequently, since we have so few bits to use as a counter. * * To mitigate this, one bit is used as a flag to tell whether the value has * been sampled since a new value was recorded. That allows us to avoid bumping * the counter if no one has sampled it since the last time an error was * recorded. * * A new errseq_t should always be zeroed out. A errseq_t value of all zeroes * is the special (but common) case where there has never been an error. An all * zero value thus serves as the "epoch" if one wishes to know whether there * has ever been an error set since it was first initialized. */ /* The low bits are designated for error code (max of MAX_ERRNO) */ #define ERRSEQ_SHIFT ilog2(MAX_ERRNO + 1) /* This bit is used as a flag to indicate whether the value has been seen */ #define ERRSEQ_SEEN (1 << ERRSEQ_SHIFT) /* The lowest bit of the counter */ #define ERRSEQ_CTR_INC (1 << (ERRSEQ_SHIFT + 1)) /** * errseq_set - set a errseq_t for later reporting * @eseq: errseq_t field that should be set * @err: error to set (must be between -1 and -MAX_ERRNO) * * This function sets the error in @eseq, and increments the sequence counter * if the last sequence was sampled at some point in the past. * * Any error set will always overwrite an existing error. * * Return: The previous value, primarily for debugging purposes. The * return value should not be used as a previously sampled value in later * calls as it will not have the SEEN flag set. */ errseq_t errseq_set(errseq_t *eseq, int err) { errseq_t cur, old; /* MAX_ERRNO must be able to serve as a mask */ BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1); /* * Ensure the error code actually fits where we want it to go. If it * doesn't then just throw a warning and don't record anything. We * also don't accept zero here as that would effectively clear a * previous error. */ old = READ_ONCE(*eseq); if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO), "err = %d\n", err)) return old; for (;;) { errseq_t new; /* Clear out error bits and set new error */ new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err; /* Only increment if someone has looked at it */ if (old & ERRSEQ_SEEN) new += ERRSEQ_CTR_INC; /* If there would be no change, then call it done */ if (new == old) { cur = new; break; } /* Try to swap the new value into place */ cur = cmpxchg(eseq, old, new); /* * Call it success if we did the swap or someone else beat us * to it for the same value. */ if (likely(cur == old || cur == new)) break; /* Raced with an update, try again */ old = cur; } return cur; } EXPORT_SYMBOL(errseq_set); /** * errseq_sample() - Grab current errseq_t value. * @eseq: Pointer to errseq_t to be sampled. * * This function allows callers to initialise their errseq_t variable. * If the error has been "seen", new callers will not see an old error. * If there is an unseen error in @eseq, the caller of this function will * see it the next time it checks for an error. * * Context: Any context. * Return: The current errseq value. */ errseq_t errseq_sample(errseq_t *eseq) { errseq_t old = READ_ONCE(*eseq); /* If nobody has seen this error yet, then we can be the first. */ if (!(old & ERRSEQ_SEEN)) old = 0; return old; } EXPORT_SYMBOL(errseq_sample); /** * errseq_check() - Has an error occurred since a particular sample point? * @eseq: Pointer to errseq_t value to be checked. * @since: Previously-sampled errseq_t from which to check. * * Grab the value that eseq points to, and see if it has changed @since * the given value was sampled. The @since value is not advanced, so there * is no need to mark the value as seen. * * Return: The latest error set in the errseq_t or 0 if it hasn't changed. */ int errseq_check(errseq_t *eseq, errseq_t since) { errseq_t cur = READ_ONCE(*eseq); if (likely(cur == since)) return 0; return -(cur & MAX_ERRNO); } EXPORT_SYMBOL(errseq_check); /** * errseq_check_and_advance() - Check an errseq_t and advance to current value. * @eseq: Pointer to value being checked and reported. * @since: Pointer to previously-sampled errseq_t to check against and advance. * * Grab the eseq value, and see whether it matches the value that @since * points to. If it does, then just return 0. * * If it doesn't, then the value has changed. Set the "seen" flag, and try to * swap it into place as the new eseq value. Then, set that value as the new * "since" value, and return whatever the error portion is set to. * * Note that no locking is provided here for concurrent updates to the "since" * value. The caller must provide that if necessary. Because of this, callers * may want to do a lockless errseq_check before taking the lock and calling * this. * * Return: Negative errno if one has been stored, or 0 if no new error has * occurred. */ int errseq_check_and_advance(errseq_t *eseq, errseq_t *since) { int err = 0; errseq_t old, new; /* * Most callers will want to use the inline wrapper to check this, * so that the common case of no error is handled without needing * to take the lock that protects the "since" value. */ old = READ_ONCE(*eseq); if (old != *since) { /* * Set the flag and try to swap it into place if it has * changed. * * We don't care about the outcome of the swap here. If the * swap doesn't occur, then it has either been updated by a * writer who is altering the value in some way (updating * counter or resetting the error), or another reader who is * just setting the "seen" flag. Either outcome is OK, and we * can advance "since" and return an error based on what we * have. */ new = old | ERRSEQ_SEEN; if (new != old) cmpxchg(eseq, old, new); *since = new; err = -(new & MAX_ERRNO); } return err; } EXPORT_SYMBOL(errseq_check_and_advance);
61 2 76 76 30 43 6 1 36 33 34 30 30 30 40 29 29 9 9 14 61 90 13 87 1 22 15 8 66 38 6 41 43 41 2 26 13 28 7 31 30 29 45 1 2 16 27 4 31 4 27 31 30 30 84 4 35 38 29 28 2 2 1 2 22 15 16 2 1 26 30 22 28 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * MIDI device handlers * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include <sound/asoundef.h> #include "seq_oss_midi.h" #include "seq_oss_readq.h" #include "seq_oss_timer.h" #include "seq_oss_event.h" #include <sound/seq_midi_event.h> #include "../seq_lock.h" #include <linux/init.h> #include <linux/slab.h> #include <linux/nospec.h> /* * constants */ #define SNDRV_SEQ_OSS_MAX_MIDI_NAME 30 /* * definition of midi device record */ struct seq_oss_midi { int seq_device; /* device number */ int client; /* sequencer client number */ int port; /* sequencer port number */ unsigned int flags; /* port capability */ int opened; /* flag for opening */ unsigned char name[SNDRV_SEQ_OSS_MAX_MIDI_NAME]; struct snd_midi_event *coder; /* MIDI event coder */ struct seq_oss_devinfo *devinfo; /* assigned OSSseq device */ snd_use_lock_t use_lock; struct mutex open_mutex; }; /* * midi device table */ static int max_midi_devs; static struct seq_oss_midi *midi_devs[SNDRV_SEQ_OSS_MAX_MIDI_DEVS]; static DEFINE_SPINLOCK(register_lock); /* * prototypes */ static struct seq_oss_midi *get_mdev(int dev); static struct seq_oss_midi *get_mididev(struct seq_oss_devinfo *dp, int dev); static int send_synth_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, int dev); static int send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq_oss_midi *mdev); /* * look up the existing ports * this looks a very exhausting job. */ int snd_seq_oss_midi_lookup_ports(int client) { struct snd_seq_client_info *clinfo __free(kfree) = NULL; struct snd_seq_port_info *pinfo __free(kfree) = NULL; clinfo = kzalloc(sizeof(*clinfo), GFP_KERNEL); pinfo = kzalloc(sizeof(*pinfo), GFP_KERNEL); if (!clinfo || !pinfo) return -ENOMEM; clinfo->client = -1; while (snd_seq_kernel_client_ctl(client, SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT, clinfo) == 0) { if (clinfo->client == client) continue; /* ignore myself */ pinfo->addr.client = clinfo->client; pinfo->addr.port = -1; while (snd_seq_kernel_client_ctl(client, SNDRV_SEQ_IOCTL_QUERY_NEXT_PORT, pinfo) == 0) snd_seq_oss_midi_check_new_port(pinfo); } return 0; } /* */ static struct seq_oss_midi * get_mdev(int dev) { struct seq_oss_midi *mdev; unsigned long flags; spin_lock_irqsave(&register_lock, flags); mdev = midi_devs[dev]; if (mdev) snd_use_lock_use(&mdev->use_lock); spin_unlock_irqrestore(&register_lock, flags); return mdev; } /* * look for the identical slot */ static struct seq_oss_midi * find_slot(int client, int port) { int i; struct seq_oss_midi *mdev; unsigned long flags; spin_lock_irqsave(&register_lock, flags); for (i = 0; i < max_midi_devs; i++) { mdev = midi_devs[i]; if (mdev && mdev->client == client && mdev->port == port) { /* found! */ snd_use_lock_use(&mdev->use_lock); spin_unlock_irqrestore(&register_lock, flags); return mdev; } } spin_unlock_irqrestore(&register_lock, flags); return NULL; } #define PERM_WRITE (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_SUBS_WRITE) #define PERM_READ (SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ) /* * register a new port if it doesn't exist yet */ int snd_seq_oss_midi_check_new_port(struct snd_seq_port_info *pinfo) { int i; struct seq_oss_midi *mdev; unsigned long flags; /* the port must include generic midi */ if (! (pinfo->type & SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC)) return 0; /* either read or write subscribable */ if ((pinfo->capability & PERM_WRITE) != PERM_WRITE && (pinfo->capability & PERM_READ) != PERM_READ) return 0; /* * look for the identical slot */ mdev = find_slot(pinfo->addr.client, pinfo->addr.port); if (mdev) { /* already exists */ snd_use_lock_free(&mdev->use_lock); return 0; } /* * allocate midi info record */ mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) return -ENOMEM; /* copy the port information */ mdev->client = pinfo->addr.client; mdev->port = pinfo->addr.port; mdev->flags = pinfo->capability; mdev->opened = 0; snd_use_lock_init(&mdev->use_lock); mutex_init(&mdev->open_mutex); /* copy and truncate the name of synth device */ strscpy(mdev->name, pinfo->name, sizeof(mdev->name)); /* create MIDI coder */ if (snd_midi_event_new(MAX_MIDI_EVENT_BUF, &mdev->coder) < 0) { pr_err("ALSA: seq_oss: can't malloc midi coder\n"); kfree(mdev); return -ENOMEM; } /* OSS sequencer adds running status to all sequences */ snd_midi_event_no_status(mdev->coder, 1); /* * look for en empty slot */ spin_lock_irqsave(&register_lock, flags); for (i = 0; i < max_midi_devs; i++) { if (midi_devs[i] == NULL) break; } if (i >= max_midi_devs) { if (max_midi_devs >= SNDRV_SEQ_OSS_MAX_MIDI_DEVS) { spin_unlock_irqrestore(&register_lock, flags); snd_midi_event_free(mdev->coder); kfree(mdev); return -ENOMEM; } max_midi_devs++; } mdev->seq_device = i; midi_devs[mdev->seq_device] = mdev; spin_unlock_irqrestore(&register_lock, flags); return 0; } /* * release the midi device if it was registered */ int snd_seq_oss_midi_check_exit_port(int client, int port) { struct seq_oss_midi *mdev; unsigned long flags; int index; mdev = find_slot(client, port); if (mdev) { spin_lock_irqsave(&register_lock, flags); midi_devs[mdev->seq_device] = NULL; spin_unlock_irqrestore(&register_lock, flags); snd_use_lock_free(&mdev->use_lock); snd_use_lock_sync(&mdev->use_lock); snd_midi_event_free(mdev->coder); kfree(mdev); } spin_lock_irqsave(&register_lock, flags); for (index = max_midi_devs - 1; index >= 0; index--) { if (midi_devs[index]) break; } max_midi_devs = index + 1; spin_unlock_irqrestore(&register_lock, flags); return 0; } /* * release the midi device if it was registered */ void snd_seq_oss_midi_clear_all(void) { int i; struct seq_oss_midi *mdev; unsigned long flags; spin_lock_irqsave(&register_lock, flags); for (i = 0; i < max_midi_devs; i++) { mdev = midi_devs[i]; if (mdev) { snd_midi_event_free(mdev->coder); kfree(mdev); midi_devs[i] = NULL; } } max_midi_devs = 0; spin_unlock_irqrestore(&register_lock, flags); } /* * set up midi tables */ void snd_seq_oss_midi_setup(struct seq_oss_devinfo *dp) { spin_lock_irq(&register_lock); dp->max_mididev = max_midi_devs; spin_unlock_irq(&register_lock); } /* * clean up midi tables */ void snd_seq_oss_midi_cleanup(struct seq_oss_devinfo *dp) { int i; for (i = 0; i < dp->max_mididev; i++) snd_seq_oss_midi_close(dp, i); dp->max_mididev = 0; } /* * open all midi devices. ignore errors. */ void snd_seq_oss_midi_open_all(struct seq_oss_devinfo *dp, int file_mode) { int i; for (i = 0; i < dp->max_mididev; i++) snd_seq_oss_midi_open(dp, i, file_mode); } /* * get the midi device information */ static struct seq_oss_midi * get_mididev(struct seq_oss_devinfo *dp, int dev) { if (dev < 0 || dev >= dp->max_mididev) return NULL; dev = array_index_nospec(dev, dp->max_mididev); return get_mdev(dev); } /* * open the midi device if not opened yet */ int snd_seq_oss_midi_open(struct seq_oss_devinfo *dp, int dev, int fmode) { int perm; struct seq_oss_midi *mdev; struct snd_seq_port_subscribe subs; int err; mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; mutex_lock(&mdev->open_mutex); /* already used? */ if (mdev->opened && mdev->devinfo != dp) { err = -EBUSY; goto unlock; } perm = 0; if (is_write_mode(fmode)) perm |= PERM_WRITE; if (is_read_mode(fmode)) perm |= PERM_READ; perm &= mdev->flags; if (perm == 0) { err = -ENXIO; goto unlock; } /* already opened? */ if ((mdev->opened & perm) == perm) { err = 0; goto unlock; } perm &= ~mdev->opened; memset(&subs, 0, sizeof(subs)); if (perm & PERM_WRITE) { subs.sender = dp->addr; subs.dest.client = mdev->client; subs.dest.port = mdev->port; if (snd_seq_kernel_client_ctl(dp->cseq, SNDRV_SEQ_IOCTL_SUBSCRIBE_PORT, &subs) >= 0) mdev->opened |= PERM_WRITE; } if (perm & PERM_READ) { subs.sender.client = mdev->client; subs.sender.port = mdev->port; subs.dest = dp->addr; subs.flags = SNDRV_SEQ_PORT_SUBS_TIMESTAMP; subs.queue = dp->queue; /* queue for timestamps */ if (snd_seq_kernel_client_ctl(dp->cseq, SNDRV_SEQ_IOCTL_SUBSCRIBE_PORT, &subs) >= 0) mdev->opened |= PERM_READ; } if (! mdev->opened) { err = -ENXIO; goto unlock; } mdev->devinfo = dp; err = 0; unlock: mutex_unlock(&mdev->open_mutex); snd_use_lock_free(&mdev->use_lock); return err; } /* * close the midi device if already opened */ int snd_seq_oss_midi_close(struct seq_oss_devinfo *dp, int dev) { struct seq_oss_midi *mdev; struct snd_seq_port_subscribe subs; mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; mutex_lock(&mdev->open_mutex); if (!mdev->opened || mdev->devinfo != dp) goto unlock; memset(&subs, 0, sizeof(subs)); if (mdev->opened & PERM_WRITE) { subs.sender = dp->addr; subs.dest.client = mdev->client; subs.dest.port = mdev->port; snd_seq_kernel_client_ctl(dp->cseq, SNDRV_SEQ_IOCTL_UNSUBSCRIBE_PORT, &subs); } if (mdev->opened & PERM_READ) { subs.sender.client = mdev->client; subs.sender.port = mdev->port; subs.dest = dp->addr; snd_seq_kernel_client_ctl(dp->cseq, SNDRV_SEQ_IOCTL_UNSUBSCRIBE_PORT, &subs); } mdev->opened = 0; mdev->devinfo = NULL; unlock: mutex_unlock(&mdev->open_mutex); snd_use_lock_free(&mdev->use_lock); return 0; } /* * change seq capability flags to file mode flags */ int snd_seq_oss_midi_filemode(struct seq_oss_devinfo *dp, int dev) { struct seq_oss_midi *mdev; int mode; mdev = get_mididev(dp, dev); if (!mdev) return 0; mode = 0; if (mdev->opened & PERM_WRITE) mode |= SNDRV_SEQ_OSS_FILE_WRITE; if (mdev->opened & PERM_READ) mode |= SNDRV_SEQ_OSS_FILE_READ; snd_use_lock_free(&mdev->use_lock); return mode; } /* * reset the midi device and close it: * so far, only close the device. */ void snd_seq_oss_midi_reset(struct seq_oss_devinfo *dp, int dev) { struct seq_oss_midi *mdev; mdev = get_mididev(dp, dev); if (!mdev) return; if (! mdev->opened) { snd_use_lock_free(&mdev->use_lock); return; } if (mdev->opened & PERM_WRITE) { struct snd_seq_event ev; int c; memset(&ev, 0, sizeof(ev)); ev.dest.client = mdev->client; ev.dest.port = mdev->port; ev.queue = dp->queue; ev.source.port = dp->port; if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_SYNTH) { ev.type = SNDRV_SEQ_EVENT_SENSING; snd_seq_oss_dispatch(dp, &ev, 0, 0); } for (c = 0; c < 16; c++) { ev.type = SNDRV_SEQ_EVENT_CONTROLLER; ev.data.control.channel = c; ev.data.control.param = MIDI_CTL_ALL_NOTES_OFF; snd_seq_oss_dispatch(dp, &ev, 0, 0); if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) { ev.data.control.param = MIDI_CTL_RESET_CONTROLLERS; snd_seq_oss_dispatch(dp, &ev, 0, 0); ev.type = SNDRV_SEQ_EVENT_PITCHBEND; ev.data.control.value = 0; snd_seq_oss_dispatch(dp, &ev, 0, 0); } } } // snd_seq_oss_midi_close(dp, dev); snd_use_lock_free(&mdev->use_lock); } /* * get client/port of the specified MIDI device */ void snd_seq_oss_midi_get_addr(struct seq_oss_devinfo *dp, int dev, struct snd_seq_addr *addr) { struct seq_oss_midi *mdev; mdev = get_mididev(dp, dev); if (!mdev) return; addr->client = mdev->client; addr->port = mdev->port; snd_use_lock_free(&mdev->use_lock); } /* * input callback - this can be atomic */ int snd_seq_oss_midi_input(struct snd_seq_event *ev, int direct, void *private_data) { struct seq_oss_devinfo *dp = (struct seq_oss_devinfo *)private_data; struct seq_oss_midi *mdev; int rc; if (dp->readq == NULL) return 0; mdev = find_slot(ev->source.client, ev->source.port); if (!mdev) return 0; if (! (mdev->opened & PERM_READ)) { snd_use_lock_free(&mdev->use_lock); return 0; } if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) rc = send_synth_event(dp, ev, mdev->seq_device); else rc = send_midi_event(dp, ev, mdev); snd_use_lock_free(&mdev->use_lock); return rc; } /* * convert ALSA sequencer event to OSS synth event */ static int send_synth_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, int dev) { union evrec ossev; memset(&ossev, 0, sizeof(ossev)); switch (ev->type) { case SNDRV_SEQ_EVENT_NOTEON: ossev.v.cmd = MIDI_NOTEON; break; case SNDRV_SEQ_EVENT_NOTEOFF: ossev.v.cmd = MIDI_NOTEOFF; break; case SNDRV_SEQ_EVENT_KEYPRESS: ossev.v.cmd = MIDI_KEY_PRESSURE; break; case SNDRV_SEQ_EVENT_CONTROLLER: ossev.l.cmd = MIDI_CTL_CHANGE; break; case SNDRV_SEQ_EVENT_PGMCHANGE: ossev.l.cmd = MIDI_PGM_CHANGE; break; case SNDRV_SEQ_EVENT_CHANPRESS: ossev.l.cmd = MIDI_CHN_PRESSURE; break; case SNDRV_SEQ_EVENT_PITCHBEND: ossev.l.cmd = MIDI_PITCH_BEND; break; default: return 0; /* not supported */ } ossev.v.dev = dev; switch (ev->type) { case SNDRV_SEQ_EVENT_NOTEON: case SNDRV_SEQ_EVENT_NOTEOFF: case SNDRV_SEQ_EVENT_KEYPRESS: ossev.v.code = EV_CHN_VOICE; ossev.v.note = ev->data.note.note; ossev.v.parm = ev->data.note.velocity; ossev.v.chn = ev->data.note.channel; break; case SNDRV_SEQ_EVENT_CONTROLLER: case SNDRV_SEQ_EVENT_PGMCHANGE: case SNDRV_SEQ_EVENT_CHANPRESS: ossev.l.code = EV_CHN_COMMON; ossev.l.p1 = ev->data.control.param; ossev.l.val = ev->data.control.value; ossev.l.chn = ev->data.control.channel; break; case SNDRV_SEQ_EVENT_PITCHBEND: ossev.l.code = EV_CHN_COMMON; ossev.l.val = ev->data.control.value + 8192; ossev.l.chn = ev->data.control.channel; break; } snd_seq_oss_readq_put_timestamp(dp->readq, ev->time.tick, dp->seq_mode); snd_seq_oss_readq_put_event(dp->readq, &ossev); return 0; } /* * decode event and send MIDI bytes to read queue */ static int send_midi_event(struct seq_oss_devinfo *dp, struct snd_seq_event *ev, struct seq_oss_midi *mdev) { char msg[32]; int len; snd_seq_oss_readq_put_timestamp(dp->readq, ev->time.tick, dp->seq_mode); if (!dp->timer->running) len = snd_seq_oss_timer_start(dp->timer); if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { snd_seq_oss_readq_sysex(dp->readq, mdev->seq_device, ev); snd_midi_event_reset_decode(mdev->coder); } else { len = snd_midi_event_decode(mdev->coder, msg, sizeof(msg), ev); if (len > 0) snd_seq_oss_readq_puts(dp->readq, mdev->seq_device, msg, len); } return 0; } /* * dump midi data * return 0 : enqueued * non-zero : invalid - ignored */ int snd_seq_oss_midi_putc(struct seq_oss_devinfo *dp, int dev, unsigned char c, struct snd_seq_event *ev) { struct seq_oss_midi *mdev; mdev = get_mididev(dp, dev); if (!mdev) return -ENODEV; if (snd_midi_event_encode_byte(mdev->coder, c, ev)) { snd_seq_oss_fill_addr(dp, ev, mdev->client, mdev->port); snd_use_lock_free(&mdev->use_lock); return 0; } snd_use_lock_free(&mdev->use_lock); return -EINVAL; } /* * create OSS compatible midi_info record */ int snd_seq_oss_midi_make_info(struct seq_oss_devinfo *dp, int dev, struct midi_info *inf) { struct seq_oss_midi *mdev; mdev = get_mididev(dp, dev); if (!mdev) return -ENXIO; inf->device = dev; inf->dev_type = 0; /* FIXME: ?? */ inf->capabilities = 0; /* FIXME: ?? */ strscpy(inf->name, mdev->name, sizeof(inf->name)); snd_use_lock_free(&mdev->use_lock); return 0; } #ifdef CONFIG_SND_PROC_FS /* * proc interface */ static char * capmode_str(int val) { val &= PERM_READ|PERM_WRITE; if (val == (PERM_READ|PERM_WRITE)) return "read/write"; else if (val == PERM_READ) return "read"; else if (val == PERM_WRITE) return "write"; else return "none"; } void snd_seq_oss_midi_info_read(struct snd_info_buffer *buf) { int i; struct seq_oss_midi *mdev; snd_iprintf(buf, "\nNumber of MIDI devices: %d\n", max_midi_devs); for (i = 0; i < max_midi_devs; i++) { snd_iprintf(buf, "\nmidi %d: ", i); mdev = get_mdev(i); if (mdev == NULL) { snd_iprintf(buf, "*empty*\n"); continue; } snd_iprintf(buf, "[%s] ALSA port %d:%d\n", mdev->name, mdev->client, mdev->port); snd_iprintf(buf, " capability %s / opened %s\n", capmode_str(mdev->flags), capmode_str(mdev->opened)); snd_use_lock_free(&mdev->use_lock); } } #endif /* CONFIG_SND_PROC_FS */
1 78 66 50 50 50 50 50 50 50 50 50 50 50 50 50 50 1 2 45 2 24 32 24 7 8 8 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 /* * net/tipc/link.c: TIPC link code * * Copyright (c) 1996-2007, 2012-2016, Ericsson AB * Copyright (c) 2004-2007, 2010-2013, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include "core.h" #include "subscr.h" #include "link.h" #include "bcast.h" #include "socket.h" #include "name_distr.h" #include "discover.h" #include "netlink.h" #include "monitor.h" #include "trace.h" #include "crypto.h" #include <linux/pkt_sched.h> struct tipc_stats { u32 sent_pkts; u32 recv_pkts; u32 sent_states; u32 recv_states; u32 sent_probes; u32 recv_probes; u32 sent_nacks; u32 recv_nacks; u32 sent_acks; u32 sent_bundled; u32 sent_bundles; u32 recv_bundled; u32 recv_bundles; u32 retransmitted; u32 sent_fragmented; u32 sent_fragments; u32 recv_fragmented; u32 recv_fragments; u32 link_congs; /* # port sends blocked by congestion */ u32 deferred_recv; u32 duplicates; u32 max_queue_sz; /* send queue size high water mark */ u32 accu_queue_sz; /* used for send queue size profiling */ u32 queue_sz_counts; /* used for send queue size profiling */ u32 msg_length_counts; /* used for message length profiling */ u32 msg_lengths_total; /* used for message length profiling */ u32 msg_length_profile[7]; /* used for msg. length profiling */ }; /** * struct tipc_link - TIPC link data structure * @addr: network address of link's peer node * @name: link name character string * @net: pointer to namespace struct * @peer_session: link session # being used by peer end of link * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node * @silent_intv_cnt: # of timer intervals without any reception from peer * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') * @mon_state: cookie with information needed by link monitor * @mtu: current maximum packet size for this link * @advertised_mtu: advertised own mtu when link is being established * @backlogq: queue for messages waiting to be sent * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages * @inputq: buffer queue for messages to be delivered upwards * @namedq: buffer queue for name table messages to be delivered upwards * @wakeupq: linked list of wakeup msgs waiting for link congestion to abate * @reasm_buf: head of partially reassembled inbound message fragments * @stats: collects statistics regarding link activity * @session: session to be used by link * @snd_nxt_state: next send seq number * @rcv_nxt_state: next rcv seq number * @in_session: have received ACTIVATE_MSG from peer * @active: link is active * @if_name: associated interface name * @rst_cnt: link reset counter * @drop_point: seq number for failover handling (FIXME) * @failover_reasm_skb: saved failover msg ptr (FIXME) * @failover_deferdq: deferred message queue for failover processing (FIXME) * @transmq: the link's transmit queue * @backlog: link's backlog by priority (importance) * @snd_nxt: next sequence number to be used * @rcv_unacked: # messages read by user, but not yet acked back to peer * @deferdq: deferred receive queue * @window: sliding window size for congestion handling * @min_win: minimal send window to be used by link * @ssthresh: slow start threshold for congestion handling * @max_win: maximal send window to be used by link * @cong_acks: congestion acks for congestion avoidance (FIXME) * @checkpoint: seq number for congestion window size handling * @reasm_tnlmsg: fragmentation/reassembly area for tunnel protocol message * @last_gap: last gap ack blocks for bcast (FIXME) * @last_ga: ptr to gap ack blocks * @bc_rcvlink: the peer specific link used for broadcast reception * @bc_sndlink: the namespace global link used for broadcast sending * @nack_state: bcast nack state * @bc_peer_is_up: peer has acked the bcast init msg */ struct tipc_link { u32 addr; char name[TIPC_MAX_LINK_NAME]; struct net *net; /* Management and link supervision data */ u16 peer_session; u16 session; u16 snd_nxt_state; u16 rcv_nxt_state; u32 peer_bearer_id; u32 bearer_id; u32 tolerance; u32 abort_limit; u32 state; u16 peer_caps; bool in_session; bool active; u32 silent_intv_cnt; char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; struct tipc_mon_state mon_state; u16 rst_cnt; /* Failover/synch */ u16 drop_point; struct sk_buff *failover_reasm_skb; struct sk_buff_head failover_deferdq; /* Max packet negotiation */ u16 mtu; u16 advertised_mtu; /* Sending */ struct sk_buff_head transmq; struct sk_buff_head backlogq; struct { u16 len; u16 limit; struct sk_buff *target_bskb; } backlog[5]; u16 snd_nxt; /* Reception */ u16 rcv_nxt; u32 rcv_unacked; struct sk_buff_head deferdq; struct sk_buff_head *inputq; struct sk_buff_head *namedq; /* Congestion handling */ struct sk_buff_head wakeupq; u16 window; u16 min_win; u16 ssthresh; u16 max_win; u16 cong_acks; u16 checkpoint; /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; struct sk_buff *reasm_tnlmsg; /* Broadcast */ u16 ackers; u16 acked; u16 last_gap; struct tipc_gap_ack_blks *last_ga; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; u8 nack_state; bool bc_peer_is_up; /* Statistics */ struct tipc_stats stats; }; /* * Error message prefixes */ static const char *link_co_err = "Link tunneling error, "; static const char *link_rst_msg = "Resetting link "; /* Send states for broadcast NACKs */ enum { BC_NACK_SND_CONDITIONAL, BC_NACK_SND_UNCONDITIONAL, BC_NACK_SND_SUPPRESS, }; #define TIPC_BC_RETR_LIM (jiffies + msecs_to_jiffies(10)) #define TIPC_UC_RETR_TIME (jiffies + msecs_to_jiffies(1)) /* Link FSM states: */ enum { LINK_ESTABLISHED = 0xe, LINK_ESTABLISHING = 0xe << 4, LINK_RESET = 0x1 << 8, LINK_RESETTING = 0x2 << 12, LINK_PEER_RESET = 0xd << 16, LINK_FAILINGOVER = 0xf << 20, LINK_SYNCHING = 0xc << 24 }; static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, bool probe_reply, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, struct tipc_link *l, u8 start_index); static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr); static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq, bool *retransmitted, int *rc); static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted); /* * Simple non-static link routines (i.e. referenced outside this file) */ bool tipc_link_is_up(struct tipc_link *l) { return l->state & (LINK_ESTABLISHED | LINK_SYNCHING); } bool tipc_link_peer_is_down(struct tipc_link *l) { return l->state == LINK_PEER_RESET; } bool tipc_link_is_reset(struct tipc_link *l) { return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING); } bool tipc_link_is_establishing(struct tipc_link *l) { return l->state == LINK_ESTABLISHING; } bool tipc_link_is_synching(struct tipc_link *l) { return l->state == LINK_SYNCHING; } bool tipc_link_is_failingover(struct tipc_link *l) { return l->state == LINK_FAILINGOVER; } bool tipc_link_is_blocked(struct tipc_link *l) { return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER); } static bool link_is_bc_sndlink(struct tipc_link *l) { return !l->bc_sndlink; } static bool link_is_bc_rcvlink(struct tipc_link *l) { return ((l->bc_rcvlink == l) && !link_is_bc_sndlink(l)); } void tipc_link_set_active(struct tipc_link *l, bool active) { l->active = active; } u32 tipc_link_id(struct tipc_link *l) { return l->peer_bearer_id << 16 | l->bearer_id; } int tipc_link_min_win(struct tipc_link *l) { return l->min_win; } int tipc_link_max_win(struct tipc_link *l) { return l->max_win; } int tipc_link_prio(struct tipc_link *l) { return l->priority; } unsigned long tipc_link_tolerance(struct tipc_link *l) { return l->tolerance; } struct sk_buff_head *tipc_link_inputq(struct tipc_link *l) { return l->inputq; } char tipc_link_plane(struct tipc_link *l) { return l->net_plane; } struct net *tipc_link_net(struct tipc_link *l) { return l->net; } void tipc_link_update_caps(struct tipc_link *l, u16 capabilities) { l->peer_caps = capabilities; } void tipc_link_add_bc_peer(struct tipc_link *snd_l, struct tipc_link *uc_l, struct sk_buff_head *xmitq) { struct tipc_link *rcv_l = uc_l->bc_rcvlink; snd_l->ackers++; rcv_l->acked = snd_l->snd_nxt - 1; snd_l->state = LINK_ESTABLISHED; tipc_link_build_bc_init_msg(uc_l, xmitq); } void tipc_link_remove_bc_peer(struct tipc_link *snd_l, struct tipc_link *rcv_l, struct sk_buff_head *xmitq) { u16 ack = snd_l->snd_nxt - 1; snd_l->ackers--; rcv_l->bc_peer_is_up = true; rcv_l->state = LINK_ESTABLISHED; tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL); trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!"); tipc_link_reset(rcv_l); rcv_l->state = LINK_RESET; if (!snd_l->ackers) { trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!"); tipc_link_reset(snd_l); snd_l->state = LINK_RESET; __skb_queue_purge(xmitq); } } int tipc_link_bc_peers(struct tipc_link *l) { return l->ackers; } static u16 link_bc_rcv_gap(struct tipc_link *l) { struct sk_buff *skb = skb_peek(&l->deferdq); u16 gap = 0; if (more(l->snd_nxt, l->rcv_nxt)) gap = l->snd_nxt - l->rcv_nxt; if (skb) gap = buf_seqno(skb) - l->rcv_nxt; return gap; } void tipc_link_set_mtu(struct tipc_link *l, int mtu) { l->mtu = mtu; } int tipc_link_mtu(struct tipc_link *l) { return l->mtu; } int tipc_link_mss(struct tipc_link *l) { #ifdef CONFIG_TIPC_CRYPTO return l->mtu - INT_H_SIZE - EMSG_OVERHEAD; #else return l->mtu - INT_H_SIZE; #endif } u16 tipc_link_rcv_nxt(struct tipc_link *l) { return l->rcv_nxt; } u16 tipc_link_acked(struct tipc_link *l) { return l->acked; } char *tipc_link_name(struct tipc_link *l) { return l->name; } u32 tipc_link_state(struct tipc_link *l) { return l->state; } /** * tipc_link_create - create a new link * @net: pointer to associated network namespace * @if_name: associated interface name * @bearer_id: id (index) of associated bearer * @tolerance: link tolerance to be used by link * @net_plane: network plane (A,B,c..) this link belongs to * @mtu: mtu to be advertised by link * @priority: priority to be used by link * @min_win: minimal send window to be used by link * @max_win: maximal send window to be used by link * @session: session to be used by link * @peer: node id of peer node * @peer_caps: bitmap describing peer node capabilities * @bc_sndlink: the namespace global link used for broadcast sending * @bc_rcvlink: the peer specific link used for broadcast reception * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link * @self: local unicast link id * @peer_id: 128-bit ID of peer * * Return: true if link was created, otherwise false */ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, int tolerance, char net_plane, u32 mtu, int priority, u32 min_win, u32 max_win, u32 session, u32 self, u32 peer, u8 *peer_id, u16 peer_caps, struct tipc_link *bc_sndlink, struct tipc_link *bc_rcvlink, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link **link) { char peer_str[NODE_ID_STR_LEN] = {0,}; char self_str[NODE_ID_STR_LEN] = {0,}; struct tipc_link *l; l = kzalloc(sizeof(*l), GFP_ATOMIC); if (!l) return false; *link = l; l->session = session; /* Set link name for unicast links only */ if (peer_id) { tipc_nodeid2string(self_str, tipc_own_id(net)); if (strlen(self_str) > 16) sprintf(self_str, "%x", self); tipc_nodeid2string(peer_str, peer_id); if (strlen(peer_str) > 16) sprintf(peer_str, "%x", peer); } /* Peer i/f name will be completed by reset/activate message */ snprintf(l->name, sizeof(l->name), "%s:%s-%s:unknown", self_str, if_name, peer_str); strcpy(l->if_name, if_name); l->addr = peer; l->peer_caps = peer_caps; l->net = net; l->in_session = false; l->bearer_id = bearer_id; l->tolerance = tolerance; if (bc_rcvlink) bc_rcvlink->tolerance = tolerance; l->net_plane = net_plane; l->advertised_mtu = mtu; l->mtu = mtu; l->priority = priority; tipc_link_set_queue_limits(l, min_win, max_win); l->ackers = 1; l->bc_sndlink = bc_sndlink; l->bc_rcvlink = bc_rcvlink; l->inputq = inputq; l->namedq = namedq; l->state = LINK_RESETTING; __skb_queue_head_init(&l->transmq); __skb_queue_head_init(&l->backlogq); __skb_queue_head_init(&l->deferdq); __skb_queue_head_init(&l->failover_deferdq); skb_queue_head_init(&l->wakeupq); skb_queue_head_init(l->inputq); return true; } /** * tipc_link_bc_create - create new link to be used for broadcast * @net: pointer to associated network namespace * @mtu: mtu to be used initially if no peers * @min_win: minimal send window to be used by link * @max_win: maximal send window to be used by link * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery * @link: return value, pointer to put the created link * @ownnode: identity of own node * @peer: node id of peer node * @peer_id: 128-bit ID of peer * @peer_caps: bitmap describing peer node capabilities * @bc_sndlink: the namespace global link used for broadcast sending * * Return: true if link was created, otherwise false */ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id, int mtu, u32 min_win, u32 max_win, u16 peer_caps, struct sk_buff_head *inputq, struct sk_buff_head *namedq, struct tipc_link *bc_sndlink, struct tipc_link **link) { struct tipc_link *l; if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win, max_win, 0, ownnode, peer, NULL, peer_caps, bc_sndlink, NULL, inputq, namedq, link)) return false; l = *link; if (peer_id) { char peer_str[NODE_ID_STR_LEN] = {0,}; tipc_nodeid2string(peer_str, peer_id); if (strlen(peer_str) > 16) sprintf(peer_str, "%x", peer); /* Broadcast receiver link name: "broadcast-link:<peer>" */ snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name, peer_str); } else { strcpy(l->name, tipc_bclink_name); } trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!"); tipc_link_reset(l); l->state = LINK_RESET; l->ackers = 0; l->bc_rcvlink = l; /* Broadcast send link is always up */ if (link_is_bc_sndlink(l)) l->state = LINK_ESTABLISHED; /* Disable replicast if even a single peer doesn't support it */ if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST)) tipc_bcast_toggle_rcast(net, false); return true; } /** * tipc_link_fsm_evt - link finite state machine * @l: pointer to link * @evt: state machine event to be processed */ int tipc_link_fsm_evt(struct tipc_link *l, int evt) { int rc = 0; int old_state = l->state; switch (l->state) { case LINK_RESETTING: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_PEER_RESET; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_FAILURE_EVT: case LINK_FAILOVER_BEGIN_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILOVER_END_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_RESET: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_ESTABLISHING; break; case LINK_FAILOVER_BEGIN_EVT: l->state = LINK_FAILINGOVER; break; case LINK_FAILURE_EVT: case LINK_RESET_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILOVER_END_EVT: break; case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_PEER_RESET: switch (evt) { case LINK_RESET_EVT: l->state = LINK_ESTABLISHING; break; case LINK_PEER_RESET_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILURE_EVT: break; case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: goto illegal_evt; } break; case LINK_FAILINGOVER: switch (evt) { case LINK_FAILOVER_END_EVT: l->state = LINK_RESET; break; case LINK_PEER_RESET_EVT: case LINK_RESET_EVT: case LINK_ESTABLISH_EVT: case LINK_FAILURE_EVT: break; case LINK_FAILOVER_BEGIN_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_ESTABLISHING: switch (evt) { case LINK_ESTABLISH_EVT: l->state = LINK_ESTABLISHED; break; case LINK_FAILOVER_BEGIN_EVT: l->state = LINK_FAILINGOVER; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_FAILURE_EVT: case LINK_PEER_RESET_EVT: case LINK_SYNCH_BEGIN_EVT: case LINK_FAILOVER_END_EVT: break; case LINK_SYNCH_END_EVT: default: goto illegal_evt; } break; case LINK_ESTABLISHED: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_PEER_RESET; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_FAILURE_EVT: l->state = LINK_RESETTING; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_ESTABLISH_EVT: case LINK_SYNCH_END_EVT: break; case LINK_SYNCH_BEGIN_EVT: l->state = LINK_SYNCHING; break; case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: goto illegal_evt; } break; case LINK_SYNCHING: switch (evt) { case LINK_PEER_RESET_EVT: l->state = LINK_PEER_RESET; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_FAILURE_EVT: l->state = LINK_RESETTING; rc |= TIPC_LINK_DOWN_EVT; break; case LINK_RESET_EVT: l->state = LINK_RESET; break; case LINK_ESTABLISH_EVT: case LINK_SYNCH_BEGIN_EVT: break; case LINK_SYNCH_END_EVT: l->state = LINK_ESTABLISHED; break; case LINK_FAILOVER_BEGIN_EVT: case LINK_FAILOVER_END_EVT: default: goto illegal_evt; } break; default: pr_err("Unknown FSM state %x in %s\n", l->state, l->name); } trace_tipc_link_fsm(l->name, old_state, l->state, evt); return rc; illegal_evt: pr_err("Illegal FSM event %x in state %x on link %s\n", evt, l->state, l->name); trace_tipc_link_fsm(l->name, old_state, l->state, evt); return rc; } /* link_profile_stats - update statistical profiling of traffic */ static void link_profile_stats(struct tipc_link *l) { struct sk_buff *skb; struct tipc_msg *msg; int length; /* Update counters used in statistical profiling of send traffic */ l->stats.accu_queue_sz += skb_queue_len(&l->transmq); l->stats.queue_sz_counts++; skb = skb_peek(&l->transmq); if (!skb) return; msg = buf_msg(skb); length = msg_size(msg); if (msg_user(msg) == MSG_FRAGMENTER) { if (msg_type(msg) != FIRST_FRAGMENT) return; length = msg_size(msg_inner_hdr(msg)); } l->stats.msg_lengths_total += length; l->stats.msg_length_counts++; if (length <= 64) l->stats.msg_length_profile[0]++; else if (length <= 256) l->stats.msg_length_profile[1]++; else if (length <= 1024) l->stats.msg_length_profile[2]++; else if (length <= 4096) l->stats.msg_length_profile[3]++; else if (length <= 16384) l->stats.msg_length_profile[4]++; else if (length <= 32768) l->stats.msg_length_profile[5]++; else l->stats.msg_length_profile[6]++; } /** * tipc_link_too_silent - check if link is "too silent" * @l: tipc link to be checked * * Return: true if the link 'silent_intv_cnt' is about to reach the * 'abort_limit' value, otherwise false */ bool tipc_link_too_silent(struct tipc_link *l) { return (l->silent_intv_cnt + 2 > l->abort_limit); } /* tipc_link_timeout - perform periodic task as instructed from node timeout */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = 0; int rc = 0; bool state = false; bool probe = false; bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; struct tipc_mon_state *mstate = &l->mon_state; trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " "); trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " "); switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: mtyp = STATE_MSG; link_profile_stats(l); tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id); if (mstate->reset || (l->silent_intv_cnt > l->abort_limit)) return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); state = bc_acked != bc_snt; state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; probe |= !skb_queue_empty(&l->deferdq); if (l->snd_nxt == l->checkpoint) { tipc_link_update_cwin(l, 0, 0); probe = true; } l->checkpoint = l->snd_nxt; break; case LINK_RESET: setup = l->rst_cnt++ <= 4; setup |= !(l->rst_cnt % 16); mtyp = RESET_MSG; break; case LINK_ESTABLISHING: setup = true; mtyp = ACTIVATE_MSG; break; case LINK_PEER_RESET: case LINK_RESETTING: case LINK_FAILINGOVER: break; default: break; } if (state || probe || setup) tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); return rc; } /** * link_schedule_user - schedule a message sender for wakeup after congestion * @l: congested link * @hdr: header of message that is being sent * Create pseudo msg to send back to user when congestion abates */ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) { u32 dnode = tipc_own_addr(l->net); u32 dport = msg_origport(hdr); struct sk_buff *skb; /* Create and schedule wakeup pseudo message */ skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(&l->wakeupq, skb); l->stats.link_congs++; trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!"); return -ELINKCONG; } /** * link_prepare_wakeup - prepare users for wakeup after congestion * @l: congested link * Wake up a number of waiting users, as permitted by available space * in the send queue */ static void link_prepare_wakeup(struct tipc_link *l) { struct sk_buff_head *wakeupq = &l->wakeupq; struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; struct sk_buff_head tmpq; int avail[5] = {0,}; int imp = 0; __skb_queue_head_init(&tmpq); for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; if (avail[imp] <= 0) continue; avail[imp]--; __skb_unlink(skb, wakeupq); __skb_queue_tail(&tmpq, skb); } spin_lock_bh(&inputq->lock); skb_queue_splice_tail(&tmpq, inputq); spin_unlock_bh(&inputq->lock); } /** * tipc_link_set_skb_retransmit_time - set the time at which retransmission of * the given skb should be next attempted * @skb: skb to set a future retransmission time for * @l: link the skb will be transmitted on */ static void tipc_link_set_skb_retransmit_time(struct sk_buff *skb, struct tipc_link *l) { if (link_is_bc_sndlink(l)) TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; else TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; } void tipc_link_reset(struct tipc_link *l) { struct sk_buff_head list; u32 imp; __skb_queue_head_init(&list); l->in_session = false; /* Force re-synch of peer session number before establishing */ l->peer_session--; l->session++; l->mtu = l->advertised_mtu; spin_lock_bh(&l->wakeupq.lock); skb_queue_splice_init(&l->wakeupq, &list); spin_unlock_bh(&l->wakeupq.lock); spin_lock_bh(&l->inputq->lock); skb_queue_splice_init(&list, l->inputq); spin_unlock_bh(&l->inputq->lock); __skb_queue_purge(&l->transmq); __skb_queue_purge(&l->deferdq); __skb_queue_purge(&l->backlogq); __skb_queue_purge(&l->failover_deferdq); for (imp = 0; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) { l->backlog[imp].len = 0; l->backlog[imp].target_bskb = NULL; } kfree_skb(l->reasm_buf); kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; l->rcv_nxt = 1; l->snd_nxt_state = 1; l->rcv_nxt_state = 1; l->acked = 0; l->last_gap = 0; kfree(l->last_ga); l->last_ga = NULL; l->silent_intv_cnt = 0; l->rst_cnt = 0; l->bc_peer_is_up = false; memset(&l->mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } /** * tipc_link_xmit(): enqueue buffer list according to queue situation * @l: link to use * @list: chain of buffers containing message * @xmitq: returned list of packets to be sent by caller * * Consumes the buffer chain. * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted * Return: 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS */ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff_head *xmitq) { struct sk_buff_head *backlogq = &l->backlogq; struct sk_buff_head *transmq = &l->transmq; struct sk_buff *skb, *_skb; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; int pkt_cnt = skb_queue_len(list); unsigned int mss = tipc_link_mss(l); unsigned int cwin = l->window; unsigned int mtu = l->mtu; struct tipc_msg *hdr; bool new_bundle; int rc = 0; int imp; if (pkt_cnt <= 0) return 0; hdr = buf_msg(skb_peek(list)); if (unlikely(msg_size(hdr) > mtu)) { pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", skb_queue_len(list), msg_user(hdr), msg_type(hdr), msg_size(hdr), mtu); __skb_queue_purge(list); return -EMSGSIZE; } imp = msg_importance(hdr); /* Allow oversubscription of one data msg per source at congestion */ if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { if (imp == TIPC_SYSTEM_IMPORTANCE) { pr_warn("%s<%s>, link overflow", link_rst_msg, l->name); return -ENOBUFS; } rc = link_schedule_user(l, hdr); } if (pkt_cnt > 1) { l->stats.sent_fragmented++; l->stats.sent_fragments += pkt_cnt; } /* Prepare each packet for sending, and add to relevant queue: */ while ((skb = __skb_dequeue(list))) { if (likely(skb_queue_len(transmq) < cwin)) { hdr = buf_msg(skb); msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { kfree_skb(skb); __skb_queue_purge(list); return -ENOBUFS; } __skb_queue_tail(transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; l->rcv_unacked = 0; l->stats.sent_pkts++; seqno++; continue; } if (tipc_msg_try_bundle(l->backlog[imp].target_bskb, &skb, mss, l->addr, &new_bundle)) { if (skb) { /* Keep a ref. to the skb for next try */ l->backlog[imp].target_bskb = skb; l->backlog[imp].len++; __skb_queue_tail(backlogq, skb); } else { if (new_bundle) { l->stats.sent_bundles++; l->stats.sent_bundled++; } l->stats.sent_bundled++; } continue; } l->backlog[imp].target_bskb = NULL; l->backlog[imp].len += (1 + skb_queue_len(list)); __skb_queue_tail(backlogq, skb); skb_queue_splice_tail_init(list, backlogq); } l->snd_nxt = seqno; return rc; } static void tipc_link_update_cwin(struct tipc_link *l, int released, bool retransmitted) { int bklog_len = skb_queue_len(&l->backlogq); struct sk_buff_head *txq = &l->transmq; int txq_len = skb_queue_len(txq); u16 cwin = l->window; /* Enter fast recovery */ if (unlikely(retransmitted)) { l->ssthresh = max_t(u16, l->window / 2, 300); l->window = min_t(u16, l->ssthresh, l->window); return; } /* Enter slow start */ if (unlikely(!released)) { l->ssthresh = max_t(u16, l->window / 2, 300); l->window = l->min_win; return; } /* Don't increase window if no pressure on the transmit queue */ if (txq_len + bklog_len < cwin) return; /* Don't increase window if there are holes the transmit queue */ if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len) return; l->cong_acks += released; /* Slow start */ if (cwin <= l->ssthresh) { l->window = min_t(u16, cwin + released, l->max_win); return; } /* Congestion avoidance */ if (l->cong_acks < cwin) return; l->window = min_t(u16, ++cwin, l->max_win); l->cong_acks = 0; } static void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq) { u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; struct sk_buff_head *txq = &l->transmq; struct sk_buff *skb, *_skb; u16 ack = l->rcv_nxt - 1; u16 seqno = l->snd_nxt; struct tipc_msg *hdr; u16 cwin = l->window; u32 imp; while (skb_queue_len(txq) < cwin) { skb = skb_peek(&l->backlogq); if (!skb) break; _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) break; __skb_dequeue(&l->backlogq); hdr = buf_msg(skb); imp = msg_importance(hdr); l->backlog[imp].len--; if (unlikely(skb == l->backlog[imp].target_bskb)) l->backlog[imp].target_bskb = NULL; __skb_queue_tail(&l->transmq, skb); tipc_link_set_skb_retransmit_time(skb, l); __skb_queue_tail(xmitq, _skb); TIPC_SKB_CB(skb)->ackers = l->ackers; msg_set_seqno(hdr, seqno); msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); l->rcv_unacked = 0; l->stats.sent_pkts++; seqno++; } l->snd_nxt = seqno; } /** * link_retransmit_failure() - Detect repeated retransmit failures * @l: tipc link sender * @r: tipc link receiver (= l in case of unicast) * @rc: returned code * * Return: true if the repeated retransmit failures happens, otherwise * false */ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, int *rc) { struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; if (!skb) return false; if (!TIPC_SKB_CB(skb)->retr_cnt) return false; if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + msecs_to_jiffies(r->tolerance * 10))) return false; hdr = buf_msg(skb); if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) return false; pr_warn("Retransmission failure on link <%s>\n", l->name); link_print(l, "State of link "); pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); pr_info("sqno %u, prev: %x, dest: %x\n", msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); pr_info("retr_stamp %d, retr_cnt %d\n", jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), TIPC_SKB_CB(skb)->retr_cnt); trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); if (link_is_bc_sndlink(l)) { r->state = LINK_RESET; *rc |= TIPC_LINK_DOWN_EVT; } else { *rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return true; } /* tipc_data_input - deliver data and name distr msgs to upper layer * * Consumes buffer if message is of right type * Node lock must be held */ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; struct tipc_msg *hdr = buf_msg(skb); switch (msg_user(hdr)) { case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { skb_queue_tail(mc_inputq, skb); return true; } fallthrough; case CONN_MANAGER: skb_queue_tail(inputq, skb); return true; case GROUP_PROTOCOL: skb_queue_tail(mc_inputq, skb); return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; skb_queue_tail(l->namedq, skb); return true; case MSG_BUNDLER: case TUNNEL_PROTOCOL: case MSG_FRAGMENTER: case BCAST_PROTOCOL: return false; #ifdef CONFIG_TIPC_CRYPTO case MSG_CRYPTO: if (sysctl_tipc_key_exchange_enabled && TIPC_SKB_CB(skb)->decrypted) { tipc_crypto_msg_rcv(l->net, skb); return true; } fallthrough; #endif default: pr_warn("Dropping received illegal msg type\n"); kfree_skb(skb); return true; } } /* tipc_link_input - process packet that has passed link protocol check * * Consumes buffer */ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq, struct sk_buff **reasm_skb) { struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; struct sk_buff_head tmpq; int usr = msg_user(hdr); int pos = 0; if (usr == MSG_BUNDLER) { skb_queue_head_init(&tmpq); l->stats.recv_bundles++; l->stats.recv_bundled += msg_msgcnt(hdr); while (tipc_msg_extract(skb, &iskb, &pos)) tipc_data_input(l, iskb, &tmpq); tipc_skb_queue_splice_tail(&tmpq, inputq); return 0; } else if (usr == MSG_FRAGMENTER) { l->stats.recv_fragments++; if (tipc_buf_append(reasm_skb, &skb)) { l->stats.recv_fragmented++; tipc_data_input(l, skb, inputq); } else if (!*reasm_skb && !link_is_bc_rcvlink(l)) { pr_warn_ratelimited("Unable to build fragment list\n"); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } return 0; } else if (usr == BCAST_PROTOCOL) { tipc_bcast_lock(l->net); tipc_link_bc_init_rcv(l->bc_rcvlink, hdr); tipc_bcast_unlock(l->net); } kfree_skb(skb); return 0; } /* tipc_link_tnl_rcv() - receive TUNNEL_PROTOCOL message, drop or process the * inner message along with the ones in the old link's * deferdq * @l: tunnel link * @skb: TUNNEL_PROTOCOL message * @inputq: queue to put messages ready for delivery */ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { struct sk_buff **reasm_skb = &l->failover_reasm_skb; struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; struct sk_buff_head *fdefq = &l->failover_deferdq; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; int ipos = 0; int rc = 0; u16 seqno; if (msg_type(hdr) == SYNCH_MSG) { kfree_skb(skb); return 0; } /* Not a fragment? */ if (likely(!msg_nof_fragms(hdr))) { if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { pr_warn_ratelimited("Unable to extract msg, defq: %d\n", skb_queue_len(fdefq)); return 0; } kfree_skb(skb); } else { /* Set fragment type for buf_append */ if (msg_fragm_no(hdr) == 1) msg_set_type(hdr, FIRST_FRAGMENT); else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) msg_set_type(hdr, FRAGMENT); else msg_set_type(hdr, LAST_FRAGMENT); if (!tipc_buf_append(reasm_tnlmsg, &skb)) { /* Successful but non-complete reassembly? */ if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) return 0; pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } iskb = skb; } do { seqno = buf_seqno(iskb); if (unlikely(less(seqno, l->drop_point))) { kfree_skb(iskb); continue; } if (unlikely(seqno != l->drop_point)) { __tipc_skb_queue_sorted(fdefq, seqno, iskb); continue; } l->drop_point++; if (!tipc_data_input(l, iskb, inputq)) rc |= tipc_link_input(l, iskb, inputq, reasm_skb); if (unlikely(rc)) break; } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); return rc; } /** * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG * @ga: returned pointer to the Gap ACK blocks if any * @l: the tipc link * @hdr: the PROTOCOL/STATE_MSG header * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0) * * Return: the total Gap ACK blocks size */ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l, struct tipc_msg *hdr, bool uc) { struct tipc_gap_ack_blks *p; u16 sz = 0; /* Does peer support the Gap ACK blocks feature? */ if (l->peer_caps & TIPC_GAP_ACK_BLOCK) { p = (struct tipc_gap_ack_blks *)msg_data(hdr); sz = ntohs(p->len); /* Sanity check */ if (sz == struct_size(p, gacks, size_add(p->ugack_cnt, p->bgack_cnt))) { /* Good, check if the desired type exists */ if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt)) goto ok; /* Backward compatible: peer might not support bc, but uc? */ } else if (uc && sz == struct_size(p, gacks, p->ugack_cnt)) { if (p->ugack_cnt) { p->bgack_cnt = 0; goto ok; } } } /* Other cases: ignore! */ p = NULL; ok: *ga = p; return sz; } static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga, struct tipc_link *l, u8 start_index) { struct tipc_gap_ack *gacks = &ga->gacks[start_index]; struct sk_buff *skb = skb_peek(&l->deferdq); u16 expect, seqno = 0; u8 n = 0; if (!skb) return 0; expect = buf_seqno(skb); skb_queue_walk(&l->deferdq, skb) { seqno = buf_seqno(skb); if (unlikely(more(seqno, expect))) { gacks[n].ack = htons(expect - 1); gacks[n].gap = htons(seqno - expect); if (++n >= MAX_GAP_ACK_BLKS / 2) { pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n", l->name, n, skb_queue_len(&l->deferdq)); return n; } } else if (unlikely(less(seqno, expect))) { pr_warn("Unexpected skb in deferdq!\n"); continue; } expect = seqno + 1; } /* last block */ gacks[n].ack = htons(seqno); gacks[n].gap = 0; n++; return n; } /* tipc_build_gap_ack_blks - build Gap ACK blocks * @l: tipc unicast link * @hdr: the tipc message buffer to store the Gap ACK blocks after built * * The function builds Gap ACK blocks for both the unicast & broadcast receiver * links of a certain peer, the buffer after built has the network data format * as found at the struct tipc_gap_ack_blks definition. * * returns the actual allocated memory size */ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr) { struct tipc_link *bcl = l->bc_rcvlink; struct tipc_gap_ack_blks *ga; u16 len; ga = (struct tipc_gap_ack_blks *)msg_data(hdr); /* Start with broadcast link first */ tipc_bcast_lock(bcl->net); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0); tipc_bcast_unlock(bcl->net); /* Now for unicast link, but an explicit NACK only (???) */ ga->ugack_cnt = (msg_seq_gap(hdr)) ? __tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0; /* Total len */ len = struct_size(ga, gacks, size_add(ga->bgack_cnt, ga->ugack_cnt)); ga->len = htons(len); return len; } /* tipc_link_advance_transmq - advance TIPC link transmq queue by releasing * acked packets, also doing retransmissions if * gaps found * @l: tipc link with transmq queue to be advanced * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast) * @acked: seqno of last packet acked by peer without any gaps before * @gap: # of gap packets * @ga: buffer pointer to Gap ACK blocks from peer * @xmitq: queue for accumulating the retransmitted packets if any * @retransmitted: returned boolean value if a retransmission is really issued * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures * happens (- unlikely case) * * Return: the number of packets released from the link transmq */ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq, bool *retransmitted, int *rc) { struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL; struct tipc_gap_ack *gacks = NULL; struct sk_buff *skb, *_skb, *tmp; struct tipc_msg *hdr; u32 qlen = skb_queue_len(&l->transmq); u16 nacked = acked, ngap = gap, gack_cnt = 0; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; u16 seqno, n = 0; u16 end = r->acked, start = end, offset = r->last_gap; u16 si = (last_ga) ? last_ga->start_index : 0; bool is_uc = !link_is_bc_sndlink(l); bool bc_has_acked = false; trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq); /* Determine Gap ACK blocks if any for the particular link */ if (ga && is_uc) { /* Get the Gap ACKs, uc part */ gack_cnt = ga->ugack_cnt; gacks = &ga->gacks[ga->bgack_cnt]; } else if (ga) { /* Copy the Gap ACKs, bc part, for later renewal if needed */ this_ga = kmemdup(ga, struct_size(ga, gacks, ga->bgack_cnt), GFP_ATOMIC); if (likely(this_ga)) { this_ga->start_index = 0; /* Start with the bc Gap ACKs */ gack_cnt = this_ga->bgack_cnt; gacks = &this_ga->gacks[0]; } else { /* Hmm, we can get in trouble..., simply ignore it */ pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n"); } } /* Advance the link transmq */ skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb); next_gap_ack: if (less_eq(seqno, nacked)) { if (is_uc) goto release; /* Skip packets peer has already acked */ if (!more(seqno, r->acked)) continue; /* Get the next of last Gap ACK blocks */ while (more(seqno, end)) { if (!last_ga || si >= last_ga->bgack_cnt) break; start = end + offset + 1; end = ntohs(last_ga->gacks[si].ack); offset = ntohs(last_ga->gacks[si].gap); si++; WARN_ONCE(more(start, end) || (!offset && si < last_ga->bgack_cnt) || si > MAX_GAP_ACK_BLKS, "Corrupted Gap ACK: %d %d %d %d %d\n", start, end, offset, si, last_ga->bgack_cnt); } /* Check against the last Gap ACK block */ if (tipc_in_range(seqno, start, end)) continue; /* Update/release the packet peer is acking */ bc_has_acked = true; if (--TIPC_SKB_CB(skb)->ackers) continue; release: /* release skb */ __skb_unlink(skb, &l->transmq); kfree_skb(skb); } else if (less_eq(seqno, nacked + ngap)) { /* First gap: check if repeated retrans failures? */ if (unlikely(seqno == acked + 1 && link_retransmit_failure(l, r, rc))) { /* Ignore this bc Gap ACKs if any */ kfree(this_ga); this_ga = NULL; break; } /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; tipc_link_set_skb_retransmit_time(skb, l); _skb = pskb_copy(skb, GFP_ATOMIC); if (!_skb) continue; hdr = buf_msg(_skb); msg_set_ack(hdr, ack); msg_set_bcast_ack(hdr, bc_ack); _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; if (!is_uc) r->stats.retransmitted++; *retransmitted = true; /* Increase actual retrans counter & mark first time */ if (!TIPC_SKB_CB(skb)->retr_cnt++) TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ if (n >= gack_cnt) break; nacked = ntohs(gacks[n].ack); ngap = ntohs(gacks[n].gap); n++; goto next_gap_ack; } } /* Renew last Gap ACK blocks for bc if needed */ if (bc_has_acked) { if (this_ga) { kfree(last_ga); r->last_ga = this_ga; r->last_gap = gap; } else if (last_ga) { if (less(acked, start)) { si--; offset = start - acked - 1; } else if (less(acked, end)) { acked = end; } if (si < last_ga->bgack_cnt) { last_ga->start_index = si; r->last_gap = offset; } else { kfree(last_ga); r->last_ga = NULL; r->last_gap = 0; } } else { r->last_gap = 0; } r->acked = acked; } else { kfree(this_ga); } return qlen - skb_queue_len(&l->transmq); } /* tipc_link_build_state_msg: prepare link state message for transmission * * Note that sending of broadcast ack is coordinated among nodes, to reduce * risk of ack storms towards the sender */ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { if (!l) return 0; /* Broadcast ACK must be sent via a unicast link => defer to caller */ if (link_is_bc_rcvlink(l)) { if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) return 0; l->rcv_unacked = 0; /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ l->snd_nxt = l->rcv_nxt; return TIPC_LINK_SND_STATE; } /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } /* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message */ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { int mtyp = RESET_MSG; struct sk_buff *skb; if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); /* Inform peer that this endpoint is going down if applicable */ skb = skb_peek_tail(xmitq); if (skb && (l->state == LINK_RESET)) msg_set_peer_stopping(buf_msg(skb), 1); } /* tipc_link_build_nack_msg: prepare link nack message for transmission * Note that sending of broadcast NACK is coordinated among nodes, to * reduce the risk of NACK storms towards the sender */ static int tipc_link_build_nack_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; struct sk_buff_head *dfq = &l->deferdq; u32 defq_len = skb_queue_len(dfq); int match1, match2; if (link_is_bc_rcvlink(l)) { match1 = def_cnt & 0xf; match2 = tipc_own_addr(l->net) & 0xf; if (match1 == match2) return TIPC_LINK_SND_STATE; return 0; } if (defq_len >= 3 && !((defq_len - 3) % 16)) { u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, rcvgap, 0, 0, xmitq); } return 0; } /* tipc_link_rcv - process TIPC packets/messages arriving from off-node * @l: the link that should handle the message * @skb: TIPC packet * @xmitq: queue to place packets to be sent after this call */ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct sk_buff_head *defq = &l->deferdq; struct tipc_msg *hdr = buf_msg(skb); u16 seqno, rcv_nxt, win_lim; int released = 0; int rc = 0; /* Verify and update link state */ if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) return tipc_link_proto_rcv(l, skb, xmitq); /* Don't send probe at next timeout expiration */ l->silent_intv_cnt = 0; do { hdr = buf_msg(skb); seqno = msg_seqno(hdr); rcv_nxt = l->rcv_nxt; win_lim = rcv_nxt + TIPC_MAX_LINK_WIN; if (unlikely(!tipc_link_is_up(l))) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; kfree_skb(skb); break; } /* Drop if outside receive window */ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) { l->stats.duplicates++; kfree_skb(skb); break; } released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0, NULL, NULL, NULL, NULL); /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { if (!__tipc_skb_queue_sorted(defq, seqno, skb)) l->stats.duplicates++; rc |= tipc_link_build_nack_msg(l, xmitq); break; } /* Deliver packet */ l->rcv_nxt++; l->stats.recv_pkts++; if (unlikely(msg_user(hdr) == TUNNEL_PROTOCOL)) rc |= tipc_link_tnl_rcv(l, skb, l->inputq); else if (!tipc_data_input(l, skb, l->inputq)) rc |= tipc_link_input(l, skb, l->inputq, &l->reasm_buf); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; } while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt))); /* Forward queues and wake up waiting users */ if (released) { tipc_link_update_cwin(l, released, 0); tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } return rc; } static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, bool probe_reply, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { struct tipc_mon_state *mstate = &l->mon_state; struct sk_buff_head *dfq = &l->deferdq; struct tipc_link *bcl = l->bc_rcvlink; struct tipc_msg *hdr; struct sk_buff *skb; bool node_up = tipc_link_is_up(bcl); u16 glen = 0, bc_rcvgap = 0; int dlen = 0; void *data; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) return; if (!tipc_link_is_up(l) && (mtyp == STATE_MSG)) return; if ((probe || probe_reply) && !skb_queue_empty(dfq)) rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, tipc_max_domain_size + MAX_GAP_ACK_BLKS_SZ, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; hdr = buf_msg(skb); data = msg_data(hdr); msg_set_session(hdr, l->session); msg_set_bearer_id(hdr, l->bearer_id); msg_set_net_plane(hdr, l->net_plane); msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_bc_ack_invalid(hdr, !node_up); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); msg_set_redundant_link(hdr, node_up); msg_set_seq_gap(hdr, 0); msg_set_seqno(hdr, l->snd_nxt + U16_MAX / 2); if (mtyp == STATE_MSG) { if (l->peer_caps & TIPC_LINK_PROTO_SEQNO) msg_set_seqno(hdr, l->snd_nxt_state++); msg_set_seq_gap(hdr, rcvgap); bc_rcvgap = link_bc_rcv_gap(bcl); msg_set_bc_gap(hdr, bc_rcvgap); msg_set_probe(hdr, probe); msg_set_is_keepalive(hdr, probe || probe_reply); if (l->peer_caps & TIPC_GAP_ACK_BLOCK) glen = tipc_build_gap_ack_blks(l, hdr); tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + glen + dlen); skb_trim(skb, INT_H_SIZE + glen + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ if (mtyp == ACTIVATE_MSG) { msg_set_dest_session_valid(hdr, 1); msg_set_dest_session(hdr, l->peer_session); } msg_set_max_pkt(hdr, l->advertised_mtu); strcpy(data, l->if_name); msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); } if (probe) l->stats.sent_probes++; if (rcvgap) l->stats.sent_nacks++; if (bc_rcvgap) bcl->stats.sent_nacks++; skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, skb); trace_tipc_proto_build(skb, false, l->name); } void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { u32 onode = tipc_own_addr(l->net); struct tipc_msg *hdr, *ihdr; struct sk_buff_head tnlq; struct sk_buff *skb; u32 dnode = l->addr; __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); if (!skb) { pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } hdr = buf_msg(skb); msg_set_msgcnt(hdr, 1); msg_set_bearer_id(hdr, l->peer_bearer_id); ihdr = (struct tipc_msg *)msg_data(hdr); tipc_msg_init(onode, ihdr, TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, BASIC_H_SIZE, dnode); msg_set_errcode(ihdr, TIPC_ERR_NO_PORT); __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, xmitq); } /* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets * with contents of the link's transmit and backlog queues. */ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, int mtyp, struct sk_buff_head *xmitq) { struct sk_buff_head *fdefq = &tnl->failover_deferdq; struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; bool pktcnt_need_update = false; u16 syncpt; int rc; if (!tnl) return; __skb_queue_head_init(&tnlq); /* Link Synching: * From now on, send only one single ("dummy") SYNCH message * to peer. The SYNCH message does not contain any data, just * a header conveying the synch point to the peer. */ if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, INT_H_SIZE, 0, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!tnlskb) { pr_warn("%sunable to create dummy SYNCH_MSG\n", link_co_err); return; } hdr = buf_msg(tnlskb); syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; msg_set_syncpt(hdr, syncpt); msg_set_bearer_id(hdr, l->peer_bearer_id); __skb_queue_tail(&tnlq, tnlskb); tipc_link_xmit(tnl, &tnlq, xmitq); return; } __skb_queue_head_init(&tmpxq); __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, BASIC_H_SIZE, 0, l->addr, tipc_own_addr(l->net), 0, 0, TIPC_ERR_NO_PORT); if (!skb) { pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); if (mtyp == SYNCH_MSG) pktcnt = l->snd_nxt - buf_seqno(skb_peek(&l->transmq)); else pktcnt = skb_queue_len(&l->transmq); pktcnt += skb_queue_len(&l->backlogq); msg_set_msgcnt(&tnlhdr, pktcnt); msg_set_bearer_id(&tnlhdr, l->peer_bearer_id); tnl: /* Wrap each packet into a tunnel packet */ skb_queue_walk(queue, skb) { hdr = buf_msg(skb); if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); /* Tunnel link MTU is not large enough? This could be * due to: * 1) Link MTU has just changed or set differently; * 2) Or FAILOVER on the top of a SYNCH message * * The 2nd case should not happen if peer supports * TIPC_TUNNEL_ENHANCED */ if (pktlen > tnl->mtu - INT_H_SIZE) { if (mtyp == FAILOVER_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, &frags); if (rc) { pr_warn("%sunable to frag msg: rc %d\n", link_co_err, rc); return; } pktcnt += skb_queue_len(&frags) - 1; pktcnt_need_update = true; skb_queue_splice_tail_init(&frags, &tnlq); continue; } /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED * => Just warn it and return! */ pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", link_co_err, msg_user(hdr), msg_type(hdr), msg_size(hdr)); return; } msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { pr_warn("%sunable to send packet\n", link_co_err); return; } skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE); skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen); __skb_queue_tail(&tnlq, tnlskb); } if (queue != &l->backlogq) { queue = &l->backlogq; goto tnl; } if (pktcnt_need_update) skb_queue_walk(&tnlq, skb) { hdr = buf_msg(skb); msg_set_msgcnt(hdr, pktcnt); } tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { tnl->drop_point = l->rcv_nxt; tnl->failover_reasm_skb = l->reasm_buf; l->reasm_buf = NULL; /* Failover the link's deferdq */ if (unlikely(!skb_queue_empty(fdefq))) { pr_warn("Link failover deferdq not empty: %d!\n", skb_queue_len(fdefq)); __skb_queue_purge(fdefq); } skb_queue_splice_init(&l->deferdq, fdefq); } } /** * tipc_link_failover_prepare() - prepare tnl for link failover * * This is a special version of the precursor - tipc_link_tnl_prepare(), * see the tipc_node_link_failover() for details * * @l: failover link * @tnl: tunnel link * @xmitq: queue for messages to be xmited */ void tipc_link_failover_prepare(struct tipc_link *l, struct tipc_link *tnl, struct sk_buff_head *xmitq) { struct sk_buff_head *fdefq = &tnl->failover_deferdq; tipc_link_create_dummy_tnl_msg(tnl, xmitq); /* This failover link endpoint was never established before, * so it has not received anything from peer. * Otherwise, it must be a normal failover situation or the * node has entered SELF_DOWN_PEER_LEAVING and both peer nodes * would have to start over from scratch instead. */ tnl->drop_point = 1; tnl->failover_reasm_skb = NULL; /* Initiate the link's failover deferdq */ if (unlikely(!skb_queue_empty(fdefq))) { pr_warn("Link failover deferdq not empty: %d!\n", skb_queue_len(fdefq)); __skb_queue_purge(fdefq); } } /* tipc_link_validate_msg(): validate message against current link state * Returns true if message should be accepted, otherwise false */ bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr) { u16 curr_session = l->peer_session; u16 session = msg_session(hdr); int mtyp = msg_type(hdr); if (msg_user(hdr) != LINK_PROTOCOL) return true; switch (mtyp) { case RESET_MSG: if (!l->in_session) return true; /* Accept only RESET with new session number */ return more(session, curr_session); case ACTIVATE_MSG: if (!l->in_session) return true; /* Accept only ACTIVATE with new or current session number */ return !less(session, curr_session); case STATE_MSG: /* Accept only STATE with current session number */ if (!l->in_session) return false; if (session != curr_session) return false; /* Extra sanity check */ if (!tipc_link_is_up(l) && msg_ack(hdr)) return false; if (!(l->peer_caps & TIPC_LINK_PROTO_SEQNO)) return true; /* Accept only STATE with new sequence number */ return !less(msg_seqno(hdr), l->rcv_nxt_state); default: return false; } } /* tipc_link_proto_rcv(): receive link level protocol message : * Note that network plane id propagates through the network, and may * change at any time. The node with lowest numerical id determines * network plane */ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_gap_ack_blks *ga = NULL; bool reply = msg_probe(hdr), retransmitted = false; u32 dlen = msg_data_sz(hdr), glen = 0, msg_max; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 gap = msg_seq_gap(hdr); u16 ack = msg_ack(hdr); u16 rcv_nxt = l->rcv_nxt; u16 rcvgap = 0; int mtyp = msg_type(hdr); int rc = 0, released; char *if_name; void *data; trace_tipc_proto_rcv(skb, false, l->name); if (dlen > U16_MAX) goto exit; if (tipc_link_is_blocked(l) || !xmitq) goto exit; if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); if (skb_linearize(skb)) goto exit; hdr = buf_msg(skb); data = msg_data(hdr); if (!tipc_link_validate_msg(l, hdr)) { trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!"); trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!"); goto exit; } switch (mtyp) { case RESET_MSG: case ACTIVATE_MSG: msg_max = msg_max_pkt(hdr); if (msg_max < tipc_bearer_min_mtu(l->net, l->bearer_id)) break; /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME) break; if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) break; strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own priority if peer's priority is higher */ if (tipc_in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI)) l->priority = peers_prio; /* If peer is going down we want full re-establish cycle */ if (msg_peer_stopping(hdr)) { rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); break; } /* If this endpoint was re-created while peer was ESTABLISHING * it doesn't know current session number. Force re-synch. */ if (mtyp == ACTIVATE_MSG && msg_dest_session_valid(hdr) && l->session != msg_dest_session(hdr)) { if (less(l->session, msg_dest_session(hdr))) l->session = msg_dest_session(hdr) + 1; break; } /* ACTIVATE_MSG serves as PEER_RESET if link is already down */ if (mtyp == RESET_MSG || !tipc_link_is_up(l)) rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT); /* ACTIVATE_MSG takes up link if it was already locally reset */ if (mtyp == ACTIVATE_MSG && l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; l->peer_session = msg_session(hdr); l->in_session = true; l->peer_bearer_id = msg_bearer_id(hdr); if (l->mtu > msg_max) l->mtu = msg_max; break; case STATE_MSG: /* Validate Gap ACK blocks, drop if invalid */ glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); if (glen > dlen) break; l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ if (tipc_in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) { l->tolerance = peers_tol; l->bc_rcvlink->tolerance = peers_tol; } /* Update own prio if peer indicates a different value */ if ((peers_prio != l->priority) && tipc_in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) { l->priority = peers_prio; rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } l->silent_intv_cnt = 0; l->stats.recv_states++; if (msg_probe(hdr)) l->stats.recv_probes++; if (!tipc_link_is_up(l)) { if (l->state == LINK_ESTABLISHING) rc = TIPC_LINK_UP_EVT; break; } tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ if ((reply || msg_is_keepalive(hdr)) && more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l) && skb_queue_empty(&l->deferdq)) rcvgap = peers_snd_nxt - l->rcv_nxt; if (rcvgap || reply) tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, rcvgap, 0, 0, xmitq); released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq, &retransmitted, &rc); if (gap) l->stats.recv_nacks++; if (released || retransmitted) tipc_link_update_cwin(l, released, retransmitted); if (released) tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); } exit: kfree_skb(skb); return rc; } /* tipc_link_build_bc_proto_msg() - create broadcast protocol message */ static bool tipc_link_build_bc_proto_msg(struct tipc_link *l, bool bcast, u16 peers_snd_nxt, struct sk_buff_head *xmitq) { struct sk_buff *skb; struct tipc_msg *hdr; struct sk_buff *dfrd_skb = skb_peek(&l->deferdq); u16 ack = l->rcv_nxt - 1; u16 gap_to = peers_snd_nxt - 1; skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE, 0, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return false; hdr = buf_msg(skb); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_bcast_ack(hdr, ack); msg_set_bcgap_after(hdr, ack); if (dfrd_skb) gap_to = buf_seqno(dfrd_skb) - 1; msg_set_bcgap_to(hdr, gap_to); msg_set_non_seq(hdr, bcast); __skb_queue_tail(xmitq, skb); return true; } /* tipc_link_build_bc_init_msg() - synchronize broadcast link endpoints. * * Give a newly added peer node the sequence number where it should * start receiving and acking broadcast packets. */ static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq) { struct sk_buff_head list; __skb_queue_head_init(&list); if (!tipc_link_build_bc_proto_msg(l->bc_rcvlink, false, 0, &list)) return; msg_set_bc_ack_invalid(buf_msg(skb_peek(&list)), true); tipc_link_xmit(l, &list, xmitq); } /* tipc_link_bc_init_rcv - receive initial broadcast synch data from peer */ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) { int mtyp = msg_type(hdr); u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); if (tipc_link_is_up(l)) return; if (msg_user(hdr) == BCAST_PROTOCOL) { l->rcv_nxt = peers_snd_nxt; l->state = LINK_ESTABLISHED; return; } if (l->peer_caps & TIPC_BCAST_SYNCH) return; if (msg_peer_node_is_up(hdr)) return; /* Compatibility: accept older, less safe initial synch data */ if ((mtyp == RESET_MSG) || (mtyp == ACTIVATE_MSG)) l->rcv_nxt = peers_snd_nxt; } /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, struct sk_buff_head *xmitq) { u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); int rc = 0; if (!tipc_link_is_up(l)) return rc; if (!msg_peer_node_is_up(hdr)) return rc; /* Open when peer acknowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; if (!l->bc_peer_is_up) return rc; /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) return rc; l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) rc |= TIPC_LINK_SND_STATE; /* Return now if sender supports nack via STATE messages */ if (l->peer_caps & TIPC_BCAST_STATE_NACK) return rc; /* Otherwise, be backwards compatible */ if (!more(peers_snd_nxt, l->rcv_nxt)) { l->nack_state = BC_NACK_SND_CONDITIONAL; return 0; } /* Don't NACK if one was recently sent or peeked */ if (l->nack_state == BC_NACK_SND_SUPPRESS) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; return 0; } /* Conditionally delay NACK sending until next synch rcv */ if (l->nack_state == BC_NACK_SND_CONDITIONAL) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) return 0; } /* Send NACK now but suppress next one */ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); l->nack_state = BC_NACK_SND_SUPPRESS; return 0; } int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap, struct tipc_gap_ack_blks *ga, struct sk_buff_head *xmitq, struct sk_buff_head *retrq) { struct tipc_link *l = r->bc_sndlink; bool unused = false; int rc = 0; if (!tipc_link_is_up(r) || !r->bc_peer_is_up) return 0; if (gap) { l->stats.recv_nacks++; r->stats.recv_nacks++; } if (less(acked, r->acked) || (acked == r->acked && !gap && !ga)) return 0; trace_tipc_link_bc_ack(r, acked, gap, &l->transmq); tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc); tipc_link_advance_backlog(l, xmitq); if (unlikely(!skb_queue_empty(&l->wakeupq))) link_prepare_wakeup(l); return rc; } /* tipc_link_bc_nack_rcv(): receive broadcast nack message * This function is here for backwards compatibility, since * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. */ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) { struct tipc_msg *hdr = buf_msg(skb); u32 dnode = msg_destnode(hdr); int mtyp = msg_type(hdr); u16 acked = msg_bcast_ack(hdr); u16 from = acked + 1; u16 to = msg_bcgap_to(hdr); u16 peers_snd_nxt = to + 1; int rc = 0; kfree_skb(skb); if (!tipc_link_is_up(l) || !l->bc_peer_is_up) return 0; if (mtyp != STATE_MSG) return 0; if (dnode == tipc_own_addr(l->net)) { rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq, xmitq); l->stats.recv_nacks++; return rc; } /* Msg for other node => suppress own NACK at next sync if applicable */ if (more(peers_snd_nxt, l->rcv_nxt) && !less(l->rcv_nxt, from)) l->nack_state = BC_NACK_SND_SUPPRESS; return 0; } void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win) { int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE); l->min_win = min_win; l->ssthresh = max_win; l->max_win = max_win; l->window = min_win; l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2; l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4; l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6; l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8; l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } /** * tipc_link_reset_stats - reset link statistics * @l: pointer to link */ void tipc_link_reset_stats(struct tipc_link *l) { memset(&l->stats, 0, sizeof(l->stats)); } static void link_print(struct tipc_link *l, const char *str) { struct sk_buff *hskb = skb_peek(&l->transmq); u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt - 1; u16 tail = l->snd_nxt - 1; pr_info("%s Link <%s> state %x\n", str, l->name, l->state); pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n", skb_queue_len(&l->transmq), head, tail, skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt); } /* Parse and validate nested (link) properties valid for media, bearer and link */ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]) { int err; err = nla_parse_nested_deprecated(props, TIPC_NLA_PROP_MAX, prop, tipc_nl_prop_policy, NULL); if (err) return err; if (props[TIPC_NLA_PROP_PRIO]) { u32 prio; prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); if (prio > TIPC_MAX_LINK_PRI) return -EINVAL; } if (props[TIPC_NLA_PROP_TOL]) { u32 tol; tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]); if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL)) return -EINVAL; } if (props[TIPC_NLA_PROP_WIN]) { u32 max_win; max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]); if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN) return -EINVAL; } return 0; } static int __tipc_nl_add_stats(struct sk_buff *skb, struct tipc_stats *s) { int i; struct nlattr *stats; struct nla_map { u32 key; u32 val; }; struct nla_map map[] = { {TIPC_NLA_STATS_RX_INFO, 0}, {TIPC_NLA_STATS_RX_FRAGMENTS, s->recv_fragments}, {TIPC_NLA_STATS_RX_FRAGMENTED, s->recv_fragmented}, {TIPC_NLA_STATS_RX_BUNDLES, s->recv_bundles}, {TIPC_NLA_STATS_RX_BUNDLED, s->recv_bundled}, {TIPC_NLA_STATS_TX_INFO, 0}, {TIPC_NLA_STATS_TX_FRAGMENTS, s->sent_fragments}, {TIPC_NLA_STATS_TX_FRAGMENTED, s->sent_fragmented}, {TIPC_NLA_STATS_TX_BUNDLES, s->sent_bundles}, {TIPC_NLA_STATS_TX_BUNDLED, s->sent_bundled}, {TIPC_NLA_STATS_MSG_PROF_TOT, (s->msg_length_counts) ? s->msg_length_counts : 1}, {TIPC_NLA_STATS_MSG_LEN_CNT, s->msg_length_counts}, {TIPC_NLA_STATS_MSG_LEN_TOT, s->msg_lengths_total}, {TIPC_NLA_STATS_MSG_LEN_P0, s->msg_length_profile[0]}, {TIPC_NLA_STATS_MSG_LEN_P1, s->msg_length_profile[1]}, {TIPC_NLA_STATS_MSG_LEN_P2, s->msg_length_profile[2]}, {TIPC_NLA_STATS_MSG_LEN_P3, s->msg_length_profile[3]}, {TIPC_NLA_STATS_MSG_LEN_P4, s->msg_length_profile[4]}, {TIPC_NLA_STATS_MSG_LEN_P5, s->msg_length_profile[5]}, {TIPC_NLA_STATS_MSG_LEN_P6, s->msg_length_profile[6]}, {TIPC_NLA_STATS_RX_STATES, s->recv_states}, {TIPC_NLA_STATS_RX_PROBES, s->recv_probes}, {TIPC_NLA_STATS_RX_NACKS, s->recv_nacks}, {TIPC_NLA_STATS_RX_DEFERRED, s->deferred_recv}, {TIPC_NLA_STATS_TX_STATES, s->sent_states}, {TIPC_NLA_STATS_TX_PROBES, s->sent_probes}, {TIPC_NLA_STATS_TX_NACKS, s->sent_nacks}, {TIPC_NLA_STATS_TX_ACKS, s->sent_acks}, {TIPC_NLA_STATS_RETRANSMITTED, s->retransmitted}, {TIPC_NLA_STATS_DUPLICATES, s->duplicates}, {TIPC_NLA_STATS_LINK_CONGS, s->link_congs}, {TIPC_NLA_STATS_MAX_QUEUE, s->max_queue_sz}, {TIPC_NLA_STATS_AVG_QUEUE, s->queue_sz_counts ? (s->accu_queue_sz / s->queue_sz_counts) : 0} }; stats = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!stats) return -EMSGSIZE; for (i = 0; i < ARRAY_SIZE(map); i++) if (nla_put_u32(skb, map[i].key, map[i].val)) goto msg_full; nla_nest_end(skb, stats); return 0; msg_full: nla_nest_cancel(skb, stats); return -EMSGSIZE; } /* Caller should hold appropriate locks to protect the link */ int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *link, int nlflags) { u32 self = tipc_own_addr(net); struct nlattr *attrs; struct nlattr *prop; void *hdr; int err; hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, nlflags, TIPC_NL_LINK_GET); if (!hdr) return -EMSGSIZE; attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, link->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_DEST, tipc_cluster_mask(self))) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_MTU, link->mtu)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, link->stats.recv_pkts)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, link->stats.sent_pkts)) goto attr_msg_full; if (tipc_link_is_up(link)) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) goto attr_msg_full; if (link->active) if (nla_put_flag(msg->skb, TIPC_NLA_LINK_ACTIVE)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, link->tolerance)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, link->window)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_PRIO, link->priority)) goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_stats(msg->skb, &link->stats); if (err) goto attr_msg_full; nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } static int __tipc_nl_add_bc_link_stat(struct sk_buff *skb, struct tipc_stats *stats) { int i; struct nlattr *nest; struct nla_map { __u32 key; __u32 val; }; struct nla_map map[] = { {TIPC_NLA_STATS_RX_INFO, stats->recv_pkts}, {TIPC_NLA_STATS_RX_FRAGMENTS, stats->recv_fragments}, {TIPC_NLA_STATS_RX_FRAGMENTED, stats->recv_fragmented}, {TIPC_NLA_STATS_RX_BUNDLES, stats->recv_bundles}, {TIPC_NLA_STATS_RX_BUNDLED, stats->recv_bundled}, {TIPC_NLA_STATS_TX_INFO, stats->sent_pkts}, {TIPC_NLA_STATS_TX_FRAGMENTS, stats->sent_fragments}, {TIPC_NLA_STATS_TX_FRAGMENTED, stats->sent_fragmented}, {TIPC_NLA_STATS_TX_BUNDLES, stats->sent_bundles}, {TIPC_NLA_STATS_TX_BUNDLED, stats->sent_bundled}, {TIPC_NLA_STATS_RX_NACKS, stats->recv_nacks}, {TIPC_NLA_STATS_RX_DEFERRED, stats->deferred_recv}, {TIPC_NLA_STATS_TX_NACKS, stats->sent_nacks}, {TIPC_NLA_STATS_TX_ACKS, stats->sent_acks}, {TIPC_NLA_STATS_RETRANSMITTED, stats->retransmitted}, {TIPC_NLA_STATS_DUPLICATES, stats->duplicates}, {TIPC_NLA_STATS_LINK_CONGS, stats->link_congs}, {TIPC_NLA_STATS_MAX_QUEUE, stats->max_queue_sz}, {TIPC_NLA_STATS_AVG_QUEUE, stats->queue_sz_counts ? (stats->accu_queue_sz / stats->queue_sz_counts) : 0} }; nest = nla_nest_start_noflag(skb, TIPC_NLA_LINK_STATS); if (!nest) return -EMSGSIZE; for (i = 0; i < ARRAY_SIZE(map); i++) if (nla_put_u32(skb, map[i].key, map[i].val)) goto msg_full; nla_nest_end(skb, nest); return 0; msg_full: nla_nest_cancel(skb, nest); return -EMSGSIZE; } int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg, struct tipc_link *bcl) { int err; void *hdr; struct nlattr *attrs; struct nlattr *prop; u32 bc_mode = tipc_bcast_get_mode(net); u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net); if (!bcl) return 0; tipc_bcast_lock(net); hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, NLM_F_MULTI, TIPC_NL_LINK_GET); if (!hdr) { tipc_bcast_unlock(net); return -EMSGSIZE; } attrs = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK); if (!attrs) goto msg_full; /* The broadcast link is always up */ if (nla_put_flag(msg->skb, TIPC_NLA_LINK_UP)) goto attr_msg_full; if (nla_put_flag(msg->skb, TIPC_NLA_LINK_BROADCAST)) goto attr_msg_full; if (nla_put_string(msg->skb, TIPC_NLA_LINK_NAME, bcl->name)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_RX, 0)) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_LINK_TX, 0)) goto attr_msg_full; prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP); if (!prop) goto attr_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win)) goto prop_msg_full; if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode)) goto prop_msg_full; if (bc_mode & BCLINK_MODE_SEL) if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST_RATIO, bc_ratio)) goto prop_msg_full; nla_nest_end(msg->skb, prop); err = __tipc_nl_add_bc_link_stat(msg->skb, &bcl->stats); if (err) goto attr_msg_full; tipc_bcast_unlock(net); nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); return 0; prop_msg_full: nla_nest_cancel(msg->skb, prop); attr_msg_full: nla_nest_cancel(msg->skb, attrs); msg_full: tipc_bcast_unlock(net); genlmsg_cancel(msg->skb, hdr); return -EMSGSIZE; } void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; if (l->bc_rcvlink) l->bc_rcvlink->tolerance = tol; if (tipc_link_is_up(l)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq) { l->priority = prio; tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) { l->abort_limit = limit; } /** * tipc_link_dump - dump TIPC link data * @l: tipc link to be dumped * @dqueues: bitmask to decide if any link queue to be dumped? * - TIPC_DUMP_NONE: don't dump link queues * - TIPC_DUMP_TRANSMQ: dump link transmq queue * - TIPC_DUMP_BACKLOGQ: dump link backlog queue * - TIPC_DUMP_DEFERDQ: dump link deferd queue * - TIPC_DUMP_INPUTQ: dump link input queue * - TIPC_DUMP_WAKEUP: dump link wakeup queue * - TIPC_DUMP_ALL: dump all the link queues above * @buf: returned buffer of dump data in format */ int tipc_link_dump(struct tipc_link *l, u16 dqueues, char *buf) { int i = 0; size_t sz = (dqueues) ? LINK_LMAX : LINK_LMIN; struct sk_buff_head *list; struct sk_buff *hskb, *tskb; u32 len; if (!l) { i += scnprintf(buf, sz, "link data: (null)\n"); return i; } i += scnprintf(buf, sz, "link data: %x", l->addr); i += scnprintf(buf + i, sz - i, " %x", l->state); i += scnprintf(buf + i, sz - i, " %u", l->in_session); i += scnprintf(buf + i, sz - i, " %u", l->session); i += scnprintf(buf + i, sz - i, " %u", l->peer_session); i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt); i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt); i += scnprintf(buf + i, sz - i, " %u", l->snd_nxt_state); i += scnprintf(buf + i, sz - i, " %u", l->rcv_nxt_state); i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked); list = &l->transmq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); list = &l->deferdq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); list = &l->backlogq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); list = l->inputq; len = skb_queue_len(list); hskb = skb_peek(list); tskb = skb_peek_tail(list); i += scnprintf(buf + i, sz - i, " | %u %u %u\n", len, (hskb) ? msg_seqno(buf_msg(hskb)) : 0, (tskb) ? msg_seqno(buf_msg(tskb)) : 0); if (dqueues & TIPC_DUMP_TRANSMQ) { i += scnprintf(buf + i, sz - i, "transmq: "); i += tipc_list_dump(&l->transmq, false, buf + i); } if (dqueues & TIPC_DUMP_BACKLOGQ) { i += scnprintf(buf + i, sz - i, "backlogq: <%u %u %u %u %u>, ", l->backlog[TIPC_LOW_IMPORTANCE].len, l->backlog[TIPC_MEDIUM_IMPORTANCE].len, l->backlog[TIPC_HIGH_IMPORTANCE].len, l->backlog[TIPC_CRITICAL_IMPORTANCE].len, l->backlog[TIPC_SYSTEM_IMPORTANCE].len); i += tipc_list_dump(&l->backlogq, false, buf + i); } if (dqueues & TIPC_DUMP_DEFERDQ) { i += scnprintf(buf + i, sz - i, "deferdq: "); i += tipc_list_dump(&l->deferdq, false, buf + i); } if (dqueues & TIPC_DUMP_INPUTQ) { i += scnprintf(buf + i, sz - i, "inputq: "); i += tipc_list_dump(l->inputq, false, buf + i); } if (dqueues & TIPC_DUMP_WAKEUP) { i += scnprintf(buf + i, sz - i, "wakeup: "); i += tipc_list_dump(&l->wakeupq, false, buf + i); } return i; }
265 265 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> #include <linux/mm.h> #include <asm/current.h> #include <asm/traps.h> #include <asm/vdso.h> struct vdso_exception_table_entry { int insn, fixup; }; bool fixup_vdso_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, unsigned long fault_addr) { const struct vdso_image *image = current->mm->context.vdso_image; const struct vdso_exception_table_entry *extable; unsigned int nr_entries, i; unsigned long base; /* * Do not attempt to fixup #DB or #BP. It's impossible to identify * whether or not a #DB/#BP originated from within an SGX enclave and * SGX enclaves are currently the only use case for vDSO fixup. */ if (trapnr == X86_TRAP_DB || trapnr == X86_TRAP_BP) return false; if (!current->mm->context.vdso) return false; base = (unsigned long)current->mm->context.vdso + image->extable_base; nr_entries = image->extable_len / (sizeof(*extable)); extable = image->extable; for (i = 0; i < nr_entries; i++) { if (regs->ip == base + extable[i].insn) { regs->ip = base + extable[i].fixup; regs->di = trapnr; regs->si = error_code; regs->dx = fault_addr; return true; } } return false; }
6 1 3 1 1 1 5 1 1 1 2 6 2 2 1 1 1 7 2 1 1 1 2 6 1 1 2 1 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "../fs/internal.h" #include "io_uring.h" #include "fs.h" struct io_rename { struct file *file; int old_dfd; int new_dfd; struct filename *oldpath; struct filename *newpath; int flags; }; struct io_unlink { struct file *file; int dfd; int flags; struct filename *filename; }; struct io_mkdir { struct file *file; int dfd; umode_t mode; struct filename *filename; }; struct io_link { struct file *file; int old_dfd; int new_dfd; struct filename *oldpath; struct filename *newpath; int flags; }; int io_renameat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); const char __user *oldf, *newf; if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; ren->old_dfd = READ_ONCE(sqe->fd); oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); ren->new_dfd = READ_ONCE(sqe->len); ren->flags = READ_ONCE(sqe->rename_flags); ren->oldpath = getname(oldf); if (IS_ERR(ren->oldpath)) return PTR_ERR(ren->oldpath); ren->newpath = getname(newf); if (IS_ERR(ren->newpath)) { putname(ren->oldpath); return PTR_ERR(ren->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_renameat(struct io_kiocb *req, unsigned int issue_flags) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, ren->newpath, ren->flags); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_renameat_cleanup(struct io_kiocb *req) { struct io_rename *ren = io_kiocb_to_cmd(req, struct io_rename); putname(ren->oldpath); putname(ren->newpath); } int io_unlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink); const char __user *fname; if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; un->dfd = READ_ONCE(sqe->fd); un->flags = READ_ONCE(sqe->unlink_flags); if (un->flags & ~AT_REMOVEDIR) return -EINVAL; fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); un->filename = getname(fname); if (IS_ERR(un->filename)) return PTR_ERR(un->filename); req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_unlink *un = io_kiocb_to_cmd(req, struct io_unlink); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); if (un->flags & AT_REMOVEDIR) ret = do_rmdir(un->dfd, un->filename); else ret = do_unlinkat(un->dfd, un->filename); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_unlinkat_cleanup(struct io_kiocb *req) { struct io_unlink *ul = io_kiocb_to_cmd(req, struct io_unlink); putname(ul->filename); } int io_mkdirat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir); const char __user *fname; if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; mkd->dfd = READ_ONCE(sqe->fd); mkd->mode = READ_ONCE(sqe->len); fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); mkd->filename = getname(fname); if (IS_ERR(mkd->filename)) return PTR_ERR(mkd->filename); req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags) { struct io_mkdir *mkd = io_kiocb_to_cmd(req, struct io_mkdir); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_mkdirat_cleanup(struct io_kiocb *req) { struct io_mkdir *md = io_kiocb_to_cmd(req, struct io_mkdir); putname(md->filename); } int io_symlinkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); const char __user *oldpath, *newpath; if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; sl->new_dfd = READ_ONCE(sqe->fd); oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); sl->oldpath = getname(oldpath); if (IS_ERR(sl->oldpath)) return PTR_ERR(sl->oldpath); sl->newpath = getname(newpath); if (IS_ERR(sl->newpath)) { putname(sl->oldpath); return PTR_ERR(sl->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } int io_linkat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); const char __user *oldf, *newf; if (sqe->buf_index || sqe->splice_fd_in) return -EINVAL; if (unlikely(req->flags & REQ_F_FIXED_FILE)) return -EBADF; lnk->old_dfd = READ_ONCE(sqe->fd); lnk->new_dfd = READ_ONCE(sqe->len); oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); lnk->flags = READ_ONCE(sqe->hardlink_flags); lnk->oldpath = getname_uflags(oldf, lnk->flags); if (IS_ERR(lnk->oldpath)) return PTR_ERR(lnk->oldpath); lnk->newpath = getname(newf); if (IS_ERR(lnk->newpath)) { putname(lnk->oldpath); return PTR_ERR(lnk->newpath); } req->flags |= REQ_F_NEED_CLEANUP; req->flags |= REQ_F_FORCE_ASYNC; return 0; } int io_linkat(struct io_kiocb *req, unsigned int issue_flags) { struct io_link *lnk = io_kiocb_to_cmd(req, struct io_link); int ret; WARN_ON_ONCE(issue_flags & IO_URING_F_NONBLOCK); ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, lnk->newpath, lnk->flags); req->flags &= ~REQ_F_NEED_CLEANUP; io_req_set_res(req, ret, 0); return IOU_OK; } void io_link_cleanup(struct io_kiocb *req) { struct io_link *sl = io_kiocb_to_cmd(req, struct io_link); putname(sl->oldpath); putname(sl->newpath); }
2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 // SPDX-License-Identifier: GPL-2.0-or-later /* cx25840 - Conexant CX25840 audio/video decoder driver * * Copyright (C) 2004 Ulf Eklund * * Based on the saa7115 driver and on the first version of Chris Kennedy's * cx25840 driver. * * Changes by Tyler Trafford <tatrafford@comcast.net> * - cleanup/rewrite for V4L2 API (2005) * * VBI support by Hans Verkuil <hverkuil@xs4all.nl>. * * NTSC sliced VBI support by Christopher Neufeld <television@cneufeld.ca> * with additional fixes by Hans Verkuil <hverkuil@xs4all.nl>. * * CX23885 support by Steven Toth <stoth@linuxtv.org>. * * CX2388[578] IRQ handling, IO Pin mux configuration and other small fixes are * Copyright (C) 2010 Andy Walls <awalls@md.metrocast.net> * * CX23888 DIF support for the HVR1850 * Copyright (C) 2011 Steven Toth <stoth@kernellabs.com> * * CX2584x pin to pad mapping and output format configuration support are * Copyright (C) 2011 Maciej S. Szmigiero <mail@maciej.szmigiero.name> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/videodev2.h> #include <linux/i2c.h> #include <linux/delay.h> #include <linux/math64.h> #include <media/v4l2-common.h> #include <media/drv-intf/cx25840.h> #include "cx25840-core.h" MODULE_DESCRIPTION("Conexant CX25840 audio/video decoder driver"); MODULE_AUTHOR("Ulf Eklund, Chris Kennedy, Hans Verkuil, Tyler Trafford"); MODULE_LICENSE("GPL"); #define CX25840_VID_INT_STAT_REG 0x410 #define CX25840_VID_INT_STAT_BITS 0x0000ffff #define CX25840_VID_INT_MASK_BITS 0xffff0000 #define CX25840_VID_INT_MASK_SHFT 16 #define CX25840_VID_INT_MASK_REG 0x412 #define CX23885_AUD_MC_INT_MASK_REG 0x80c #define CX23885_AUD_MC_INT_STAT_BITS 0xffff0000 #define CX23885_AUD_MC_INT_CTRL_BITS 0x0000ffff #define CX23885_AUD_MC_INT_STAT_SHFT 16 #define CX25840_AUD_INT_CTRL_REG 0x812 #define CX25840_AUD_INT_STAT_REG 0x813 #define CX23885_PIN_CTRL_IRQ_REG 0x123 #define CX23885_PIN_CTRL_IRQ_IR_STAT 0x40 #define CX23885_PIN_CTRL_IRQ_AUD_STAT 0x20 #define CX23885_PIN_CTRL_IRQ_VID_STAT 0x10 #define CX25840_IR_STATS_REG 0x210 #define CX25840_IR_IRQEN_REG 0x214 static int cx25840_debug; module_param_named(debug, cx25840_debug, int, 0644); MODULE_PARM_DESC(debug, "Debugging messages [0=Off (default) 1=On]"); /* ----------------------------------------------------------------------- */ static void cx23888_std_setup(struct i2c_client *client); int cx25840_write(struct i2c_client *client, u16 addr, u8 value) { u8 buffer[3]; buffer[0] = addr >> 8; buffer[1] = addr & 0xff; buffer[2] = value; return i2c_master_send(client, buffer, 3); } int cx25840_write4(struct i2c_client *client, u16 addr, u32 value) { u8 buffer[6]; buffer[0] = addr >> 8; buffer[1] = addr & 0xff; buffer[2] = value & 0xff; buffer[3] = (value >> 8) & 0xff; buffer[4] = (value >> 16) & 0xff; buffer[5] = value >> 24; return i2c_master_send(client, buffer, 6); } u8 cx25840_read(struct i2c_client *client, u16 addr) { struct i2c_msg msgs[2]; u8 tx_buf[2], rx_buf[1]; /* Write register address */ tx_buf[0] = addr >> 8; tx_buf[1] = addr & 0xff; msgs[0].addr = client->addr; msgs[0].flags = 0; msgs[0].len = 2; msgs[0].buf = (char *)tx_buf; /* Read data from register */ msgs[1].addr = client->addr; msgs[1].flags = I2C_M_RD; msgs[1].len = 1; msgs[1].buf = (char *)rx_buf; if (i2c_transfer(client->adapter, msgs, 2) < 2) return 0; return rx_buf[0]; } u32 cx25840_read4(struct i2c_client *client, u16 addr) { struct i2c_msg msgs[2]; u8 tx_buf[2], rx_buf[4]; /* Write register address */ tx_buf[0] = addr >> 8; tx_buf[1] = addr & 0xff; msgs[0].addr = client->addr; msgs[0].flags = 0; msgs[0].len = 2; msgs[0].buf = (char *)tx_buf; /* Read data from registers */ msgs[1].addr = client->addr; msgs[1].flags = I2C_M_RD; msgs[1].len = 4; msgs[1].buf = (char *)rx_buf; if (i2c_transfer(client->adapter, msgs, 2) < 2) return 0; return (rx_buf[3] << 24) | (rx_buf[2] << 16) | (rx_buf[1] << 8) | rx_buf[0]; } int cx25840_and_or(struct i2c_client *client, u16 addr, unsigned int and_mask, u8 or_value) { return cx25840_write(client, addr, (cx25840_read(client, addr) & and_mask) | or_value); } int cx25840_and_or4(struct i2c_client *client, u16 addr, u32 and_mask, u32 or_value) { return cx25840_write4(client, addr, (cx25840_read4(client, addr) & and_mask) | or_value); } /* ----------------------------------------------------------------------- */ static int set_input(struct i2c_client *client, enum cx25840_video_input vid_input, enum cx25840_audio_input aud_input); /* ----------------------------------------------------------------------- */ static int cx23885_s_io_pin_config(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *p) { struct i2c_client *client = v4l2_get_subdevdata(sd); int i; u32 pin_ctrl; u8 gpio_oe, gpio_data, strength; pin_ctrl = cx25840_read4(client, 0x120); gpio_oe = cx25840_read(client, 0x160); gpio_data = cx25840_read(client, 0x164); for (i = 0; i < n; i++) { strength = p[i].strength; if (strength > CX25840_PIN_DRIVE_FAST) strength = CX25840_PIN_DRIVE_FAST; switch (p[i].pin) { case CX23885_PIN_IRQ_N_GPIO16: if (p[i].function != CX23885_PAD_IRQ_N) { /* GPIO16 */ pin_ctrl &= ~(0x1 << 25); } else { /* IRQ_N */ if (p[i].flags & (BIT(V4L2_SUBDEV_IO_PIN_DISABLE) | BIT(V4L2_SUBDEV_IO_PIN_INPUT))) { pin_ctrl &= ~(0x1 << 25); } else { pin_ctrl |= (0x1 << 25); } if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)) { pin_ctrl &= ~(0x1 << 24); } else { pin_ctrl |= (0x1 << 24); } } break; case CX23885_PIN_IR_RX_GPIO19: if (p[i].function != CX23885_PAD_GPIO19) { /* IR_RX */ gpio_oe |= (0x1 << 0); pin_ctrl &= ~(0x3 << 18); pin_ctrl |= (strength << 18); } else { /* GPIO19 */ gpio_oe &= ~(0x1 << 0); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 0); gpio_data |= ((p[i].value & 0x1) << 0); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_IR_TX_GPIO20: if (p[i].function != CX23885_PAD_GPIO20) { /* IR_TX */ gpio_oe |= (0x1 << 1); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pin_ctrl &= ~(0x1 << 10); else pin_ctrl |= (0x1 << 10); pin_ctrl &= ~(0x3 << 18); pin_ctrl |= (strength << 18); } else { /* GPIO20 */ gpio_oe &= ~(0x1 << 1); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 1); gpio_data |= ((p[i].value & 0x1) << 1); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_I2S_SDAT_GPIO21: if (p[i].function != CX23885_PAD_GPIO21) { /* I2S_SDAT */ /* TODO: Input or Output config */ gpio_oe |= (0x1 << 2); pin_ctrl &= ~(0x3 << 22); pin_ctrl |= (strength << 22); } else { /* GPIO21 */ gpio_oe &= ~(0x1 << 2); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 2); gpio_data |= ((p[i].value & 0x1) << 2); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_I2S_WCLK_GPIO22: if (p[i].function != CX23885_PAD_GPIO22) { /* I2S_WCLK */ /* TODO: Input or Output config */ gpio_oe |= (0x1 << 3); pin_ctrl &= ~(0x3 << 22); pin_ctrl |= (strength << 22); } else { /* GPIO22 */ gpio_oe &= ~(0x1 << 3); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 3); gpio_data |= ((p[i].value & 0x1) << 3); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; case CX23885_PIN_I2S_BCLK_GPIO23: if (p[i].function != CX23885_PAD_GPIO23) { /* I2S_BCLK */ /* TODO: Input or Output config */ gpio_oe |= (0x1 << 4); pin_ctrl &= ~(0x3 << 22); pin_ctrl |= (strength << 22); } else { /* GPIO23 */ gpio_oe &= ~(0x1 << 4); if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_SET_VALUE)) { gpio_data &= ~(0x1 << 4); gpio_data |= ((p[i].value & 0x1) << 4); } pin_ctrl &= ~(0x3 << 12); pin_ctrl |= (strength << 12); } break; } } cx25840_write(client, 0x164, gpio_data); cx25840_write(client, 0x160, gpio_oe); cx25840_write4(client, 0x120, pin_ctrl); return 0; } static u8 cx25840_function_to_pad(struct i2c_client *client, u8 function) { if (function > CX25840_PAD_VRESET) { v4l_err(client, "invalid function %u, assuming default\n", (unsigned int)function); return 0; } return function; } static void cx25840_set_invert(u8 *pinctrl3, u8 *voutctrl4, u8 function, u8 pin, bool invert) { switch (function) { case CX25840_PAD_IRQ_N: if (invert) *pinctrl3 &= ~2; else *pinctrl3 |= 2; break; case CX25840_PAD_ACTIVE: if (invert) *voutctrl4 |= BIT(2); else *voutctrl4 &= ~BIT(2); break; case CX25840_PAD_VACTIVE: if (invert) *voutctrl4 |= BIT(5); else *voutctrl4 &= ~BIT(5); break; case CX25840_PAD_CBFLAG: if (invert) *voutctrl4 |= BIT(4); else *voutctrl4 &= ~BIT(4); break; case CX25840_PAD_VRESET: if (invert) *voutctrl4 |= BIT(0); else *voutctrl4 &= ~BIT(0); break; } if (function != CX25840_PAD_DEFAULT) return; switch (pin) { case CX25840_PIN_DVALID_PRGM0: if (invert) *voutctrl4 |= BIT(6); else *voutctrl4 &= ~BIT(6); break; case CX25840_PIN_HRESET_PRGM2: if (invert) *voutctrl4 |= BIT(1); else *voutctrl4 &= ~BIT(1); break; } } static int cx25840_s_io_pin_config(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *p) { struct i2c_client *client = v4l2_get_subdevdata(sd); unsigned int i; u8 pinctrl[6], pinconf[10], voutctrl4; for (i = 0; i < 6; i++) pinctrl[i] = cx25840_read(client, 0x114 + i); for (i = 0; i < 10; i++) pinconf[i] = cx25840_read(client, 0x11c + i); voutctrl4 = cx25840_read(client, 0x407); for (i = 0; i < n; i++) { u8 strength = p[i].strength; if (strength != CX25840_PIN_DRIVE_SLOW && strength != CX25840_PIN_DRIVE_MEDIUM && strength != CX25840_PIN_DRIVE_FAST) { v4l_err(client, "invalid drive speed for pin %u (%u), assuming fast\n", (unsigned int)p[i].pin, (unsigned int)strength); strength = CX25840_PIN_DRIVE_FAST; } switch (p[i].pin) { case CX25840_PIN_DVALID_PRGM0: if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pinctrl[0] &= ~BIT(6); else pinctrl[0] |= BIT(6); pinconf[3] &= 0xf0; pinconf[3] |= cx25840_function_to_pad(client, p[i].function); cx25840_set_invert(&pinctrl[3], &voutctrl4, p[i].function, CX25840_PIN_DVALID_PRGM0, p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)); pinctrl[4] &= ~(3 << 2); /* CX25840_PIN_DRIVE_MEDIUM */ switch (strength) { case CX25840_PIN_DRIVE_SLOW: pinctrl[4] |= 1 << 2; break; case CX25840_PIN_DRIVE_FAST: pinctrl[4] |= 2 << 2; break; } break; case CX25840_PIN_HRESET_PRGM2: if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pinctrl[1] &= ~BIT(0); else pinctrl[1] |= BIT(0); pinconf[4] &= 0xf0; pinconf[4] |= cx25840_function_to_pad(client, p[i].function); cx25840_set_invert(&pinctrl[3], &voutctrl4, p[i].function, CX25840_PIN_HRESET_PRGM2, p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_ACTIVE_LOW)); pinctrl[4] &= ~(3 << 2); /* CX25840_PIN_DRIVE_MEDIUM */ switch (strength) { case CX25840_PIN_DRIVE_SLOW: pinctrl[4] |= 1 << 2; break; case CX25840_PIN_DRIVE_FAST: pinctrl[4] |= 2 << 2; break; } break; case CX25840_PIN_PLL_CLK_PRGM7: if (p[i].flags & BIT(V4L2_SUBDEV_IO_PIN_DISABLE)) pinctrl[2] &= ~BIT(2); else pinctrl[2] |= BIT(2); switch (p[i].function) { case CX25840_PAD_XTI_X5_DLL: pinconf[6] = 0; break; case CX25840_PAD_AUX_PLL: pinconf[6] = 1; break; case CX25840_PAD_VID_PLL: pinconf[6] = 5; break; case CX25840_PAD_XTI: pinconf[6] = 2; break; default: pinconf[6] = 3; pinconf[6] |= cx25840_function_to_pad(client, p[i].function) << 4; } break; default: v4l_err(client, "invalid or unsupported pin %u\n", (unsigned int)p[i].pin); break; } } cx25840_write(client, 0x407, voutctrl4); for (i = 0; i < 6; i++) cx25840_write(client, 0x114 + i, pinctrl[i]); for (i = 0; i < 10; i++) cx25840_write(client, 0x11c + i, pinconf[i]); return 0; } static int common_s_io_pin_config(struct v4l2_subdev *sd, size_t n, struct v4l2_subdev_io_pin_config *pincfg) { struct cx25840_state *state = to_state(sd); if (is_cx2388x(state)) return cx23885_s_io_pin_config(sd, n, pincfg); else if (is_cx2584x(state)) return cx25840_s_io_pin_config(sd, n, pincfg); return 0; } /* ----------------------------------------------------------------------- */ static void init_dll1(struct i2c_client *client) { /* * This is the Hauppauge sequence used to * initialize the Delay Lock Loop 1 (ADC DLL). */ cx25840_write(client, 0x159, 0x23); cx25840_write(client, 0x15a, 0x87); cx25840_write(client, 0x15b, 0x06); udelay(10); cx25840_write(client, 0x159, 0xe1); udelay(10); cx25840_write(client, 0x15a, 0x86); cx25840_write(client, 0x159, 0xe0); cx25840_write(client, 0x159, 0xe1); cx25840_write(client, 0x15b, 0x10); } static void init_dll2(struct i2c_client *client) { /* * This is the Hauppauge sequence used to * initialize the Delay Lock Loop 2 (ADC DLL). */ cx25840_write(client, 0x15d, 0xe3); cx25840_write(client, 0x15e, 0x86); cx25840_write(client, 0x15f, 0x06); udelay(10); cx25840_write(client, 0x15d, 0xe1); cx25840_write(client, 0x15d, 0xe0); cx25840_write(client, 0x15d, 0xe1); } static void cx25836_initialize(struct i2c_client *client) { /* *reset configuration is described on page 3-77 * of the CX25836 datasheet */ /* 2. */ cx25840_and_or(client, 0x000, ~0x01, 0x01); cx25840_and_or(client, 0x000, ~0x01, 0x00); /* 3a. */ cx25840_and_or(client, 0x15a, ~0x70, 0x00); /* 3b. */ cx25840_and_or(client, 0x15b, ~0x1e, 0x06); /* 3c. */ cx25840_and_or(client, 0x159, ~0x02, 0x02); /* 3d. */ udelay(10); /* 3e. */ cx25840_and_or(client, 0x159, ~0x02, 0x00); /* 3f. */ cx25840_and_or(client, 0x159, ~0xc0, 0xc0); /* 3g. */ cx25840_and_or(client, 0x159, ~0x01, 0x00); cx25840_and_or(client, 0x159, ~0x01, 0x01); /* 3h. */ cx25840_and_or(client, 0x15b, ~0x1e, 0x10); } static void cx25840_work_handler(struct work_struct *work) { struct cx25840_state *state = container_of(work, struct cx25840_state, fw_work); cx25840_loadfw(state->c); wake_up(&state->fw_wait); } #define CX25840_VCONFIG_SET_BIT(state, opt_msk, voc, idx, bit, oneval) \ do { \ if ((state)->vid_config & (opt_msk)) { \ if (((state)->vid_config & (opt_msk)) == \ (oneval)) \ (voc)[idx] |= BIT(bit); \ else \ (voc)[idx] &= ~BIT(bit); \ } \ } while (0) /* apply current vconfig to hardware regs */ static void cx25840_vconfig_apply(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 voutctrl[3]; unsigned int i; for (i = 0; i < 3; i++) voutctrl[i] = cx25840_read(client, 0x404 + i); if (state->vid_config & CX25840_VCONFIG_FMT_MASK) voutctrl[0] &= ~3; switch (state->vid_config & CX25840_VCONFIG_FMT_MASK) { case CX25840_VCONFIG_FMT_BT656: voutctrl[0] |= 1; break; case CX25840_VCONFIG_FMT_VIP11: voutctrl[0] |= 2; break; case CX25840_VCONFIG_FMT_VIP2: voutctrl[0] |= 3; break; case CX25840_VCONFIG_FMT_BT601: /* zero */ default: break; } CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_RES_MASK, voutctrl, 0, 2, CX25840_VCONFIG_RES_10BIT); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VBIRAW_MASK, voutctrl, 0, 3, CX25840_VCONFIG_VBIRAW_ENABLED); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_ANCDATA_MASK, voutctrl, 0, 4, CX25840_VCONFIG_ANCDATA_ENABLED); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_TASKBIT_MASK, voutctrl, 0, 5, CX25840_VCONFIG_TASKBIT_ONE); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_ACTIVE_MASK, voutctrl, 1, 2, CX25840_VCONFIG_ACTIVE_HORIZONTAL); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VALID_MASK, voutctrl, 1, 3, CX25840_VCONFIG_VALID_ANDACTIVE); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_HRESETW_MASK, voutctrl, 1, 4, CX25840_VCONFIG_HRESETW_PIXCLK); if (state->vid_config & CX25840_VCONFIG_CLKGATE_MASK) voutctrl[1] &= ~(3 << 6); switch (state->vid_config & CX25840_VCONFIG_CLKGATE_MASK) { case CX25840_VCONFIG_CLKGATE_VALID: voutctrl[1] |= 2; break; case CX25840_VCONFIG_CLKGATE_VALIDACTIVE: voutctrl[1] |= 3; break; case CX25840_VCONFIG_CLKGATE_NONE: /* zero */ default: break; } CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_DCMODE_MASK, voutctrl, 2, 0, CX25840_VCONFIG_DCMODE_BYTES); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_IDID0S_MASK, voutctrl, 2, 1, CX25840_VCONFIG_IDID0S_LINECNT); CX25840_VCONFIG_SET_BIT(state, CX25840_VCONFIG_VIPCLAMP_MASK, voutctrl, 2, 4, CX25840_VCONFIG_VIPCLAMP_ENABLED); for (i = 0; i < 3; i++) cx25840_write(client, 0x404 + i, voutctrl[i]); } static void cx25840_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); struct workqueue_struct *q; /* datasheet startup in numbered steps, refer to page 3-77 */ /* 2. */ cx25840_and_or(client, 0x803, ~0x10, 0x00); /* * The default of this register should be 4, but I get 0 instead. * Set this register to 4 manually. */ cx25840_write(client, 0x000, 0x04); /* 3. */ init_dll1(client); init_dll2(client); cx25840_write(client, 0x136, 0x0a); /* 4. */ cx25840_write(client, 0x13c, 0x01); cx25840_write(client, 0x13c, 0x00); /* 5. */ /* * Do the firmware load in a work handler to prevent. * Otherwise the kernel is blocked waiting for the * bit-banging i2c interface to finish uploading the * firmware. */ INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); if (q) { prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); queue_work(q, &state->fw_work); schedule(); finish_wait(&state->fw_wait, &wait); destroy_workqueue(q); } /* 6. */ cx25840_write(client, 0x115, 0x8c); cx25840_write(client, 0x116, 0x07); cx25840_write(client, 0x118, 0x02); /* 7. */ cx25840_write(client, 0x4a5, 0x80); cx25840_write(client, 0x4a5, 0x00); cx25840_write(client, 0x402, 0x00); /* 8. */ cx25840_and_or(client, 0x401, ~0x18, 0); cx25840_and_or(client, 0x4a2, ~0x10, 0x10); /* steps 8c and 8d are done in change_input() */ /* 10. */ cx25840_write(client, 0x8d3, 0x1f); cx25840_write(client, 0x8e3, 0x03); cx25840_std_setup(client); /* trial and error says these are needed to get audio */ cx25840_write(client, 0x914, 0xa0); cx25840_write(client, 0x918, 0xa0); cx25840_write(client, 0x919, 0x01); /* stereo preferred */ cx25840_write(client, 0x809, 0x04); /* AC97 shift */ cx25840_write(client, 0x8cf, 0x0f); /* (re)set input */ set_input(client, state->vid_input, state->aud_input); if (state->generic_mode) cx25840_vconfig_apply(client); /* start microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x10); } static void cx23885_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u32 clk_freq = 0; struct workqueue_struct *q; /* cx23885 sets hostdata to clk_freq pointer */ if (v4l2_get_subdev_hostdata(&state->sd)) clk_freq = *((u32 *)v4l2_get_subdev_hostdata(&state->sd)); /* * Come out of digital power down * The CX23888, at least, needs this, otherwise registers aside from * 0x0-0x2 can't be read or written. */ cx25840_write(client, 0x000, 0); /* Internal Reset */ cx25840_and_or(client, 0x102, ~0x01, 0x01); cx25840_and_or(client, 0x102, ~0x01, 0x00); /* Stop microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x00); /* DIF in reset? */ cx25840_write(client, 0x398, 0); /* * Trust the default xtal, no division * '885: 28.636363... MHz * '887: 25.000000 MHz * '888: 50.000000 MHz */ cx25840_write(client, 0x2, 0x76); /* Power up all the PLL's and DLL */ cx25840_write(client, 0x1, 0x40); /* Sys PLL */ switch (state->id) { case CX23888_AV: /* * 50.0 MHz * (0xb + 0xe8ba26/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ if (clk_freq == 25000000) { /* 888/ImpactVCBe or 25Mhz xtal */ ; /* nothing to do */ } else { /* HVR1850 or 50MHz xtal */ cx25840_write(client, 0x2, 0x71); } cx25840_write4(client, 0x11c, 0x01d1744c); cx25840_write4(client, 0x118, 0x00000416); cx25840_write4(client, 0x404, 0x0010253e); cx25840_write4(client, 0x42c, 0x42600000); cx25840_write4(client, 0x44c, 0x161f1000); break; case CX23887_AV: /* * 25.0 MHz * (0x16 + 0x1d1744c/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ cx25840_write4(client, 0x11c, 0x01d1744c); cx25840_write4(client, 0x118, 0x00000416); break; case CX23885_AV: default: /* * 28.636363 MHz * (0x14 + 0x0/0x2000000)/4 = 5 * 28.636363 MHz * 572.73 MHz before post divide */ cx25840_write4(client, 0x11c, 0x00000000); cx25840_write4(client, 0x118, 0x00000414); break; } /* Disable DIF bypass */ cx25840_write4(client, 0x33c, 0x00000001); /* DIF Src phase inc */ cx25840_write4(client, 0x340, 0x0df7df83); /* * Vid PLL * Setup for a BT.656 pixel clock of 13.5 Mpixels/second * * 28.636363 MHz * (0xf + 0x02be2c9/0x2000000)/4 = 8 * 13.5 MHz * 432.0 MHz before post divide */ /* HVR1850 */ switch (state->id) { case CX23888_AV: if (clk_freq == 25000000) { /* 888/ImpactVCBe or 25MHz xtal */ cx25840_write4(client, 0x10c, 0x01b6db7b); cx25840_write4(client, 0x108, 0x00000512); } else { /* 888/HVR1250 or 50MHz xtal */ cx25840_write4(client, 0x10c, 0x13333333); cx25840_write4(client, 0x108, 0x00000515); } break; default: cx25840_write4(client, 0x10c, 0x002be2c9); cx25840_write4(client, 0x108, 0x0000040f); } /* Luma */ cx25840_write4(client, 0x414, 0x00107d12); /* Chroma */ if (is_cx23888(state)) cx25840_write4(client, 0x418, 0x1d008282); else cx25840_write4(client, 0x420, 0x3d008282); /* * Aux PLL * Initial setup for audio sample clock: * 48 ksps, 16 bits/sample, x160 multiplier = 122.88 MHz * Initial I2S output/master clock(?): * 48 ksps, 16 bits/sample, x16 multiplier = 12.288 MHz */ switch (state->id) { case CX23888_AV: /* * 50.0 MHz * (0x7 + 0x0bedfa4/0x2000000)/3 = 122.88 MHz * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ /* HVR1850 or 50MHz xtal or 25MHz xtal */ cx25840_write4(client, 0x114, 0x017dbf48); cx25840_write4(client, 0x110, 0x000a030e); break; case CX23887_AV: /* * 25.0 MHz * (0xe + 0x17dbf48/0x2000000)/3 = 122.88 MHz * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ cx25840_write4(client, 0x114, 0x017dbf48); cx25840_write4(client, 0x110, 0x000a030e); break; case CX23885_AV: default: /* * 28.636363 MHz * (0xc + 0x1bf0c9e/0x2000000)/3 = 122.88 MHz * 368.64 MHz before post divide * 122.88 MHz / 0xa = 12.288 MHz */ cx25840_write4(client, 0x114, 0x01bf0c9e); cx25840_write4(client, 0x110, 0x000a030c); break; } /* ADC2 input select */ cx25840_write(client, 0x102, 0x10); /* VIN1 & VIN5 */ cx25840_write(client, 0x103, 0x11); /* Enable format auto detect */ cx25840_write(client, 0x400, 0); /* Fast subchroma lock */ /* White crush, Chroma AGC & Chroma Killer enabled */ cx25840_write(client, 0x401, 0xe8); /* Select AFE clock pad output source */ cx25840_write(client, 0x144, 0x05); /* Drive GPIO2 direction and values for HVR1700 * where an onboard mux selects the output of demodulator * vs the 417. Failure to set this results in no DTV. * It's safe to set this across all Hauppauge boards * currently, regardless of the board type. */ cx25840_write(client, 0x160, 0x1d); cx25840_write(client, 0x164, 0x00); /* * Do the firmware load in a work handler to prevent. * Otherwise the kernel is blocked waiting for the * bit-banging i2c interface to finish uploading the * firmware. */ INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); if (q) { prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); queue_work(q, &state->fw_work); schedule(); finish_wait(&state->fw_wait, &wait); destroy_workqueue(q); } /* * Call the cx23888 specific std setup func, we no longer rely on * the generic cx24840 func. */ if (is_cx23888(state)) cx23888_std_setup(client); else cx25840_std_setup(client); /* (re)set input */ set_input(client, state->vid_input, state->aud_input); /* start microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x10); /* Disable and clear video interrupts - we don't use them */ cx25840_write4(client, CX25840_VID_INT_STAT_REG, 0xffffffff); /* Disable and clear audio interrupts - we don't use them */ cx25840_write(client, CX25840_AUD_INT_CTRL_REG, 0xff); cx25840_write(client, CX25840_AUD_INT_STAT_REG, 0xff); /* CC raw enable */ /* * - VIP 1.1 control codes - 10bit, blue field enable. * - enable raw data during vertical blanking. * - enable ancillary Data insertion for 656 or VIP. */ cx25840_write4(client, 0x404, 0x0010253e); /* CC on - VBI_LINE_CTRL3, FLD_VBI_MD_LINE12 */ cx25840_write(client, state->vbi_regs_offset + 0x42f, 0x66); /* HVR-1250 / HVR1850 DIF related */ /* Power everything up */ cx25840_write4(client, 0x130, 0x0); /* SRC_COMB_CFG */ if (is_cx23888(state)) cx25840_write4(client, 0x454, 0x6628021F); else cx25840_write4(client, 0x478, 0x6628021F); /* AFE_CLK_OUT_CTRL - Select the clock output source as output */ cx25840_write4(client, 0x144, 0x5); /* I2C_OUT_CTL - I2S output configuration as * Master, Sony, Left justified, left sample on WS=1 */ cx25840_write4(client, 0x918, 0x1a0); /* AFE_DIAG_CTRL1 */ cx25840_write4(client, 0x134, 0x000a1800); /* AFE_DIAG_CTRL3 - Inverted Polarity for Audio and Video */ cx25840_write4(client, 0x13c, 0x00310000); } /* ----------------------------------------------------------------------- */ static void cx231xx_initialize(struct i2c_client *client) { DEFINE_WAIT(wait); struct cx25840_state *state = to_state(i2c_get_clientdata(client)); struct workqueue_struct *q; /* Internal Reset */ cx25840_and_or(client, 0x102, ~0x01, 0x01); cx25840_and_or(client, 0x102, ~0x01, 0x00); /* Stop microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x00); /* DIF in reset? */ cx25840_write(client, 0x398, 0); /* Trust the default xtal, no division */ /* This changes for the cx23888 products */ cx25840_write(client, 0x2, 0x76); /* Bring down the regulator for AUX clk */ cx25840_write(client, 0x1, 0x40); /* Disable DIF bypass */ cx25840_write4(client, 0x33c, 0x00000001); /* DIF Src phase inc */ cx25840_write4(client, 0x340, 0x0df7df83); /* Luma */ cx25840_write4(client, 0x414, 0x00107d12); /* Chroma */ cx25840_write4(client, 0x420, 0x3d008282); /* ADC2 input select */ cx25840_write(client, 0x102, 0x10); /* VIN1 & VIN5 */ cx25840_write(client, 0x103, 0x11); /* Enable format auto detect */ cx25840_write(client, 0x400, 0); /* Fast subchroma lock */ /* White crush, Chroma AGC & Chroma Killer enabled */ cx25840_write(client, 0x401, 0xe8); /* * Do the firmware load in a work handler to prevent. * Otherwise the kernel is blocked waiting for the * bit-banging i2c interface to finish uploading the * firmware. */ INIT_WORK(&state->fw_work, cx25840_work_handler); init_waitqueue_head(&state->fw_wait); q = create_singlethread_workqueue("cx25840_fw"); if (q) { prepare_to_wait(&state->fw_wait, &wait, TASK_UNINTERRUPTIBLE); queue_work(q, &state->fw_work); schedule(); finish_wait(&state->fw_wait, &wait); destroy_workqueue(q); } cx25840_std_setup(client); /* (re)set input */ set_input(client, state->vid_input, state->aud_input); /* start microcontroller */ cx25840_and_or(client, 0x803, ~0x10, 0x10); /* CC raw enable */ cx25840_write(client, 0x404, 0x0b); /* CC on */ cx25840_write(client, 0x42f, 0x66); cx25840_write4(client, 0x474, 0x1e1e601a); } /* ----------------------------------------------------------------------- */ void cx25840_std_setup(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); v4l2_std_id std = state->std; int hblank, hactive, burst, vblank, vactive, sc; int vblank656, src_decimation; int luma_lpf, uv_lpf, comb; u32 pll_int, pll_frac, pll_post; /* datasheet startup, step 8d */ if (std & ~V4L2_STD_NTSC) cx25840_write(client, 0x49f, 0x11); else cx25840_write(client, 0x49f, 0x14); /* generic mode uses the values that the chip autoconfig would set */ if (std & V4L2_STD_625_50) { hblank = 132; hactive = 720; burst = 93; if (state->generic_mode) { vblank = 34; vactive = 576; vblank656 = 38; } else { vblank = 36; vactive = 580; vblank656 = 40; } src_decimation = 0x21f; luma_lpf = 2; if (std & V4L2_STD_SECAM) { uv_lpf = 0; comb = 0; sc = 0x0a425f; } else if (std == V4L2_STD_PAL_Nc) { if (state->generic_mode) { burst = 95; luma_lpf = 1; } uv_lpf = 1; comb = 0x20; sc = 556453; } else { uv_lpf = 1; comb = 0x20; sc = 688739; } } else { hactive = 720; hblank = 122; vactive = 487; luma_lpf = 1; uv_lpf = 1; if (state->generic_mode) { vblank = 20; vblank656 = 24; } src_decimation = 0x21f; if (std == V4L2_STD_PAL_60) { if (!state->generic_mode) { vblank = 26; vblank656 = 26; burst = 0x5b; } else { burst = 0x59; } luma_lpf = 2; comb = 0x20; sc = 688739; } else if (std == V4L2_STD_PAL_M) { vblank = 20; vblank656 = 24; burst = 0x61; comb = 0x20; sc = 555452; } else { if (!state->generic_mode) { vblank = 26; vblank656 = 26; } burst = 0x5b; comb = 0x66; sc = 556063; } } /* DEBUG: Displays configured PLL frequency */ if (!is_cx231xx(state)) { pll_int = cx25840_read(client, 0x108); pll_frac = cx25840_read4(client, 0x10c) & 0x1ffffff; pll_post = cx25840_read(client, 0x109); v4l_dbg(1, cx25840_debug, client, "PLL regs = int: %u, frac: %u, post: %u\n", pll_int, pll_frac, pll_post); if (pll_post) { int fin, fsc; int pll = (28636363L * ((((u64)pll_int) << 25L) + pll_frac)) >> 25L; pll /= pll_post; v4l_dbg(1, cx25840_debug, client, "PLL = %d.%06d MHz\n", pll / 1000000, pll % 1000000); v4l_dbg(1, cx25840_debug, client, "PLL/8 = %d.%06d MHz\n", pll / 8000000, (pll / 8) % 1000000); fin = ((u64)src_decimation * pll) >> 12; v4l_dbg(1, cx25840_debug, client, "ADC Sampling freq = %d.%06d MHz\n", fin / 1000000, fin % 1000000); fsc = (((u64)sc) * pll) >> 24L; v4l_dbg(1, cx25840_debug, client, "Chroma sub-carrier freq = %d.%06d MHz\n", fsc / 1000000, fsc % 1000000); v4l_dbg(1, cx25840_debug, client, "hblank %i, hactive %i, vblank %i, vactive %i, vblank656 %i, src_dec %i, burst 0x%02x, luma_lpf %i, uv_lpf %i, comb 0x%02x, sc 0x%06x\n", hblank, hactive, vblank, vactive, vblank656, src_decimation, burst, luma_lpf, uv_lpf, comb, sc); } } /* Sets horizontal blanking delay and active lines */ cx25840_write(client, 0x470, hblank); cx25840_write(client, 0x471, (((hblank >> 8) & 0x3) | (hactive << 4)) & 0xff); cx25840_write(client, 0x472, hactive >> 4); /* Sets burst gate delay */ cx25840_write(client, 0x473, burst); /* Sets vertical blanking delay and active duration */ cx25840_write(client, 0x474, vblank); cx25840_write(client, 0x475, (((vblank >> 8) & 0x3) | (vactive << 4)) & 0xff); cx25840_write(client, 0x476, vactive >> 4); cx25840_write(client, 0x477, vblank656); /* Sets src decimation rate */ cx25840_write(client, 0x478, src_decimation & 0xff); cx25840_write(client, 0x479, (src_decimation >> 8) & 0xff); /* Sets Luma and UV Low pass filters */ cx25840_write(client, 0x47a, luma_lpf << 6 | ((uv_lpf << 4) & 0x30)); /* Enables comb filters */ cx25840_write(client, 0x47b, comb); /* Sets SC Step*/ cx25840_write(client, 0x47c, sc); cx25840_write(client, 0x47d, (sc >> 8) & 0xff); cx25840_write(client, 0x47e, (sc >> 16) & 0xff); /* Sets VBI parameters */ if (std & V4L2_STD_625_50) { cx25840_write(client, 0x47f, 0x01); state->vbi_line_offset = 5; } else { cx25840_write(client, 0x47f, 0x00); state->vbi_line_offset = 8; } } /* ----------------------------------------------------------------------- */ static void input_change(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); v4l2_std_id std = state->std; /* Follow step 8c and 8d of section 3.16 in the cx25840 datasheet */ if (std & V4L2_STD_SECAM) { cx25840_write(client, 0x402, 0); } else { cx25840_write(client, 0x402, 0x04); cx25840_write(client, 0x49f, (std & V4L2_STD_NTSC) ? 0x14 : 0x11); } cx25840_and_or(client, 0x401, ~0x60, 0); cx25840_and_or(client, 0x401, ~0x60, 0x60); /* Don't write into audio registers on cx2583x chips */ if (is_cx2583x(state)) return; cx25840_and_or(client, 0x810, ~0x01, 1); if (state->radio) { cx25840_write(client, 0x808, 0xf9); cx25840_write(client, 0x80b, 0x00); } else if (std & V4L2_STD_525_60) { /* * Certain Hauppauge PVR150 models have a hardware bug * that causes audio to drop out. For these models the * audio standard must be set explicitly. * To be precise: it affects cards with tuner models * 85, 99 and 112 (model numbers from tveeprom). */ int hw_fix = state->pvr150_workaround; if (std == V4L2_STD_NTSC_M_JP) { /* Japan uses EIAJ audio standard */ cx25840_write(client, 0x808, hw_fix ? 0x2f : 0xf7); } else if (std == V4L2_STD_NTSC_M_KR) { /* South Korea uses A2 audio standard */ cx25840_write(client, 0x808, hw_fix ? 0x3f : 0xf8); } else { /* Others use the BTSC audio standard */ cx25840_write(client, 0x808, hw_fix ? 0x1f : 0xf6); } cx25840_write(client, 0x80b, 0x00); } else if (std & V4L2_STD_PAL) { /* Autodetect audio standard and audio system */ cx25840_write(client, 0x808, 0xff); /* * Since system PAL-L is pretty much non-existent and * not used by any public broadcast network, force * 6.5 MHz carrier to be interpreted as System DK, * this avoids DK audio detection instability */ cx25840_write(client, 0x80b, 0x00); } else if (std & V4L2_STD_SECAM) { /* Autodetect audio standard and audio system */ cx25840_write(client, 0x808, 0xff); /* * If only one of SECAM-DK / SECAM-L is required, then force * 6.5MHz carrier, else autodetect it */ if ((std & V4L2_STD_SECAM_DK) && !(std & (V4L2_STD_SECAM_L | V4L2_STD_SECAM_LC))) { /* 6.5 MHz carrier to be interpreted as System DK */ cx25840_write(client, 0x80b, 0x00); } else if (!(std & V4L2_STD_SECAM_DK) && (std & (V4L2_STD_SECAM_L | V4L2_STD_SECAM_LC))) { /* 6.5 MHz carrier to be interpreted as System L */ cx25840_write(client, 0x80b, 0x08); } else { /* 6.5 MHz carrier to be autodetected */ cx25840_write(client, 0x80b, 0x10); } } cx25840_and_or(client, 0x810, ~0x01, 0); } static int set_input(struct i2c_client *client, enum cx25840_video_input vid_input, enum cx25840_audio_input aud_input) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 is_composite = (vid_input >= CX25840_COMPOSITE1 && vid_input <= CX25840_COMPOSITE8); u8 is_component = (vid_input & CX25840_COMPONENT_ON) == CX25840_COMPONENT_ON; u8 is_dif = (vid_input & CX25840_DIF_ON) == CX25840_DIF_ON; u8 is_svideo = (vid_input & CX25840_SVIDEO_ON) == CX25840_SVIDEO_ON; int luma = vid_input & 0xf0; int chroma = vid_input & 0xf00; u8 reg; u32 val; v4l_dbg(1, cx25840_debug, client, "decoder set video input %d, audio input %d\n", vid_input, aud_input); if (vid_input >= CX25840_VIN1_CH1) { v4l_dbg(1, cx25840_debug, client, "vid_input 0x%x\n", vid_input); reg = vid_input & 0xff; is_composite = !is_component && ((vid_input & CX25840_SVIDEO_ON) != CX25840_SVIDEO_ON); v4l_dbg(1, cx25840_debug, client, "mux cfg 0x%x comp=%d\n", reg, is_composite); } else if (is_composite) { reg = 0xf0 + (vid_input - CX25840_COMPOSITE1); } else { if ((vid_input & ~0xff0) || luma < CX25840_SVIDEO_LUMA1 || luma > CX25840_SVIDEO_LUMA8 || chroma < CX25840_SVIDEO_CHROMA4 || chroma > CX25840_SVIDEO_CHROMA8) { v4l_err(client, "0x%04x is not a valid video input!\n", vid_input); return -EINVAL; } reg = 0xf0 + ((luma - CX25840_SVIDEO_LUMA1) >> 4); if (chroma >= CX25840_SVIDEO_CHROMA7) { reg &= 0x3f; reg |= (chroma - CX25840_SVIDEO_CHROMA7) >> 2; } else { reg &= 0xcf; reg |= (chroma - CX25840_SVIDEO_CHROMA4) >> 4; } } /* The caller has previously prepared the correct routing * configuration in reg (for the cx23885) so we have no * need to attempt to flip bits for earlier av decoders. */ if (!is_cx2388x(state) && !is_cx231xx(state)) { switch (aud_input) { case CX25840_AUDIO_SERIAL: /* do nothing, use serial audio input */ break; case CX25840_AUDIO4: reg &= ~0x30; break; case CX25840_AUDIO5: reg &= ~0x30; reg |= 0x10; break; case CX25840_AUDIO6: reg &= ~0x30; reg |= 0x20; break; case CX25840_AUDIO7: reg &= ~0xc0; break; case CX25840_AUDIO8: reg &= ~0xc0; reg |= 0x40; break; default: v4l_err(client, "0x%04x is not a valid audio input!\n", aud_input); return -EINVAL; } } cx25840_write(client, 0x103, reg); /* Set INPUT_MODE to Composite, S-Video or Component */ if (is_component) cx25840_and_or(client, 0x401, ~0x6, 0x6); else cx25840_and_or(client, 0x401, ~0x6, is_composite ? 0 : 0x02); if (is_cx2388x(state)) { /* Enable or disable the DIF for tuner use */ if (is_dif) { cx25840_and_or(client, 0x102, ~0x80, 0x80); /* Set of defaults for NTSC and PAL */ cx25840_write4(client, 0x31c, 0xc2262600); cx25840_write4(client, 0x320, 0xc2262600); /* 18271 IF - Nobody else yet uses a different * tuner with the DIF, so these are reasonable * assumptions (HVR1250 and HVR1850 specific). */ cx25840_write4(client, 0x318, 0xda262600); cx25840_write4(client, 0x33c, 0x2a24c800); cx25840_write4(client, 0x104, 0x0704dd00); } else { cx25840_write4(client, 0x300, 0x015c28f5); cx25840_and_or(client, 0x102, ~0x80, 0); cx25840_write4(client, 0x340, 0xdf7df83); cx25840_write4(client, 0x104, 0x0704dd80); cx25840_write4(client, 0x314, 0x22400600); cx25840_write4(client, 0x318, 0x40002600); cx25840_write4(client, 0x324, 0x40002600); cx25840_write4(client, 0x32c, 0x0250e620); cx25840_write4(client, 0x39c, 0x01FF0B00); cx25840_write4(client, 0x410, 0xffff0dbf); cx25840_write4(client, 0x414, 0x00137d03); if (is_cx23888(state)) { /* 888 MISC_TIM_CTRL */ cx25840_write4(client, 0x42c, 0x42600000); /* 888 FIELD_COUNT */ cx25840_write4(client, 0x430, 0x0000039b); /* 888 VSCALE_CTRL */ cx25840_write4(client, 0x438, 0x00000000); /* 888 DFE_CTRL1 */ cx25840_write4(client, 0x440, 0xF8E3E824); /* 888 DFE_CTRL2 */ cx25840_write4(client, 0x444, 0x401040dc); /* 888 DFE_CTRL3 */ cx25840_write4(client, 0x448, 0xcd3f02a0); /* 888 PLL_CTRL */ cx25840_write4(client, 0x44c, 0x161f1000); /* 888 HTL_CTRL */ cx25840_write4(client, 0x450, 0x00000802); } cx25840_write4(client, 0x91c, 0x01000000); cx25840_write4(client, 0x8e0, 0x03063870); cx25840_write4(client, 0x8d4, 0x7FFF0024); cx25840_write4(client, 0x8d0, 0x00063073); cx25840_write4(client, 0x8c8, 0x00010000); cx25840_write4(client, 0x8cc, 0x00080023); /* DIF BYPASS */ cx25840_write4(client, 0x33c, 0x2a04c800); } /* Reset the DIF */ cx25840_write4(client, 0x398, 0); } if (!is_cx2388x(state) && !is_cx231xx(state)) { /* Set CH_SEL_ADC2 to 1 if input comes from CH3 */ cx25840_and_or(client, 0x102, ~0x2, (reg & 0x80) == 0 ? 2 : 0); /* Set DUAL_MODE_ADC2 to 1 if input comes from both CH2&CH3 */ if ((reg & 0xc0) != 0xc0 && (reg & 0x30) != 0x30) cx25840_and_or(client, 0x102, ~0x4, 4); else cx25840_and_or(client, 0x102, ~0x4, 0); } else { /* Set DUAL_MODE_ADC2 to 1 if component*/ cx25840_and_or(client, 0x102, ~0x4, is_component ? 0x4 : 0x0); if (is_composite) { /* ADC2 input select channel 2 */ cx25840_and_or(client, 0x102, ~0x2, 0); } else if (!is_component) { /* S-Video */ if (chroma >= CX25840_SVIDEO_CHROMA7) { /* ADC2 input select channel 3 */ cx25840_and_or(client, 0x102, ~0x2, 2); } else { /* ADC2 input select channel 2 */ cx25840_and_or(client, 0x102, ~0x2, 0); } } /* cx23885 / SVIDEO */ if (is_cx2388x(state) && is_svideo) { #define AFE_CTRL (0x104) #define MODE_CTRL (0x400) cx25840_and_or(client, 0x102, ~0x2, 0x2); val = cx25840_read4(client, MODE_CTRL); val &= 0xFFFFF9FF; /* YC */ val |= 0x00000200; val &= ~0x2000; cx25840_write4(client, MODE_CTRL, val); val = cx25840_read4(client, AFE_CTRL); /* Chroma in select */ val |= 0x00001000; val &= 0xfffffe7f; /* Clear VGA_SEL_CH2 and VGA_SEL_CH3 (bits 7 and 8). * This sets them to use video rather than audio. * Only one of the two will be in use. */ cx25840_write4(client, AFE_CTRL, val); } else { cx25840_and_or(client, 0x102, ~0x2, 0); } } state->vid_input = vid_input; state->aud_input = aud_input; cx25840_audio_set_path(client); input_change(client); if (is_cx2388x(state)) { /* Audio channel 1 src : Parallel 1 */ cx25840_write(client, 0x124, 0x03); /* Select AFE clock pad output source */ cx25840_write(client, 0x144, 0x05); /* I2S_IN_CTL: I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 */ cx25840_write(client, 0x914, 0xa0); /* I2S_OUT_CTL: * I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 * I2S_OUT_MASTER_MODE = Master */ cx25840_write(client, 0x918, 0xa0); cx25840_write(client, 0x919, 0x01); } else if (is_cx231xx(state)) { /* Audio channel 1 src : Parallel 1 */ cx25840_write(client, 0x124, 0x03); /* I2S_IN_CTL: I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 */ cx25840_write(client, 0x914, 0xa0); /* I2S_OUT_CTL: * I2S_IN_SONY_MODE, LEFT SAMPLE on WS=1 * I2S_OUT_MASTER_MODE = Master */ cx25840_write(client, 0x918, 0xa0); cx25840_write(client, 0x919, 0x01); } if (is_cx2388x(state) && ((aud_input == CX25840_AUDIO7) || (aud_input == CX25840_AUDIO6))) { /* Configure audio from LR1 or LR2 input */ cx25840_write4(client, 0x910, 0); cx25840_write4(client, 0x8d0, 0x63073); } else if (is_cx2388x(state) && (aud_input == CX25840_AUDIO8)) { /* Configure audio from tuner/sif input */ cx25840_write4(client, 0x910, 0x12b000c9); cx25840_write4(client, 0x8d0, 0x1f063870); } if (is_cx23888(state)) { /* * HVR1850 * * AUD_IO_CTRL - I2S Input, Parallel1 * - Channel 1 src - Parallel1 (Merlin out) * - Channel 2 src - Parallel2 (Merlin out) * - Channel 3 src - Parallel3 (Merlin AC97 out) * - I2S source and dir - Merlin, output */ cx25840_write4(client, 0x124, 0x100); if (!is_dif) { /* * Stop microcontroller if we don't need it * to avoid audio popping on svideo/composite use. */ cx25840_and_or(client, 0x803, ~0x10, 0x00); } } return 0; } /* ----------------------------------------------------------------------- */ static int set_v4lstd(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 fmt = 0; /* zero is autodetect */ u8 pal_m = 0; /* First tests should be against specific std */ if (state->std == V4L2_STD_NTSC_M_JP) { fmt = 0x2; } else if (state->std == V4L2_STD_NTSC_443) { fmt = 0x3; } else if (state->std == V4L2_STD_PAL_M) { pal_m = 1; fmt = 0x5; } else if (state->std == V4L2_STD_PAL_N) { fmt = 0x6; } else if (state->std == V4L2_STD_PAL_Nc) { fmt = 0x7; } else if (state->std == V4L2_STD_PAL_60) { fmt = 0x8; } else { /* Then, test against generic ones */ if (state->std & V4L2_STD_NTSC) fmt = 0x1; else if (state->std & V4L2_STD_PAL) fmt = 0x4; else if (state->std & V4L2_STD_SECAM) fmt = 0xc; } v4l_dbg(1, cx25840_debug, client, "changing video std to fmt %i\n", fmt); /* * Follow step 9 of section 3.16 in the cx25840 datasheet. * Without this PAL may display a vertical ghosting effect. * This happens for example with the Yuan MPC622. */ if (fmt >= 4 && fmt < 8) { /* Set format to NTSC-M */ cx25840_and_or(client, 0x400, ~0xf, 1); /* Turn off LCOMB */ cx25840_and_or(client, 0x47b, ~6, 0); } cx25840_and_or(client, 0x400, ~0xf, fmt); cx25840_and_or(client, 0x403, ~0x3, pal_m); if (is_cx23888(state)) cx23888_std_setup(client); else cx25840_std_setup(client); if (!is_cx2583x(state)) input_change(client); return 0; } /* ----------------------------------------------------------------------- */ static int cx25840_s_ctrl(struct v4l2_ctrl *ctrl) { struct v4l2_subdev *sd = to_sd(ctrl); struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); switch (ctrl->id) { case V4L2_CID_BRIGHTNESS: cx25840_write(client, 0x414, ctrl->val - 128); break; case V4L2_CID_CONTRAST: cx25840_write(client, 0x415, ctrl->val << 1); break; case V4L2_CID_SATURATION: if (is_cx23888(state)) { cx25840_write(client, 0x418, ctrl->val << 1); cx25840_write(client, 0x419, ctrl->val << 1); } else { cx25840_write(client, 0x420, ctrl->val << 1); cx25840_write(client, 0x421, ctrl->val << 1); } break; case V4L2_CID_HUE: if (is_cx23888(state)) cx25840_write(client, 0x41a, ctrl->val); else cx25840_write(client, 0x422, ctrl->val); break; default: return -EINVAL; } return 0; } /* ----------------------------------------------------------------------- */ static int cx25840_set_fmt(struct v4l2_subdev *sd, struct v4l2_subdev_state *sd_state, struct v4l2_subdev_format *format) { struct v4l2_mbus_framefmt *fmt = &format->format; struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); u32 hsc, vsc, v_src, h_src, v_add; int filter; int is_50hz = !(state->std & V4L2_STD_525_60); if (format->pad || fmt->code != MEDIA_BUS_FMT_FIXED) return -EINVAL; fmt->field = V4L2_FIELD_INTERLACED; fmt->colorspace = V4L2_COLORSPACE_SMPTE170M; if (is_cx23888(state)) { v_src = (cx25840_read(client, 0x42a) & 0x3f) << 4; v_src |= (cx25840_read(client, 0x429) & 0xf0) >> 4; } else { v_src = (cx25840_read(client, 0x476) & 0x3f) << 4; v_src |= (cx25840_read(client, 0x475) & 0xf0) >> 4; } if (is_cx23888(state)) { h_src = (cx25840_read(client, 0x426) & 0x3f) << 4; h_src |= (cx25840_read(client, 0x425) & 0xf0) >> 4; } else { h_src = (cx25840_read(client, 0x472) & 0x3f) << 4; h_src |= (cx25840_read(client, 0x471) & 0xf0) >> 4; } if (!state->generic_mode) { v_add = is_50hz ? 4 : 7; /* * cx23888 in 525-line mode is programmed for 486 active lines * while other chips use 487 active lines. * * See reg 0x428 bits [21:12] in cx23888_std_setup() vs * vactive in cx25840_std_setup(). */ if (is_cx23888(state) && !is_50hz) v_add--; } else { v_add = 0; } if (h_src == 0 || v_src <= v_add) { v4l_err(client, "chip reported picture size (%u x %u) is far too small\n", (unsigned int)h_src, (unsigned int)v_src); /* * that's the best we can do since the output picture * size is completely unknown in this case */ return -EINVAL; } fmt->width = clamp(fmt->width, (h_src + 15) / 16, h_src); if (v_add * 8 >= v_src) fmt->height = clamp(fmt->height, (u32)1, v_src - v_add); else fmt->height = clamp(fmt->height, (v_src - v_add * 8 + 7) / 8, v_src - v_add); if (format->which == V4L2_SUBDEV_FORMAT_TRY) return 0; hsc = (h_src * (1 << 20)) / fmt->width - (1 << 20); vsc = (1 << 16) - (v_src * (1 << 9) / (fmt->height + v_add) - (1 << 9)); vsc &= 0x1fff; if (fmt->width >= 385) filter = 0; else if (fmt->width > 192) filter = 1; else if (fmt->width > 96) filter = 2; else filter = 3; v4l_dbg(1, cx25840_debug, client, "decoder set size %u x %u with scale %x x %x\n", (unsigned int)fmt->width, (unsigned int)fmt->height, (unsigned int)hsc, (unsigned int)vsc); /* HSCALE=hsc */ if (is_cx23888(state)) { cx25840_write4(client, 0x434, hsc | (1 << 24)); /* VSCALE=vsc VS_INTRLACE=1 VFILT=filter */ cx25840_write4(client, 0x438, vsc | (1 << 19) | (filter << 16)); } else { cx25840_write(client, 0x418, hsc & 0xff); cx25840_write(client, 0x419, (hsc >> 8) & 0xff); cx25840_write(client, 0x41a, hsc >> 16); /* VSCALE=vsc */ cx25840_write(client, 0x41c, vsc & 0xff); cx25840_write(client, 0x41d, vsc >> 8); /* VS_INTRLACE=1 VFILT=filter */ cx25840_write(client, 0x41e, 0x8 | filter); } return 0; } /* ----------------------------------------------------------------------- */ static void log_video_status(struct i2c_client *client) { static const char *const fmt_strs[] = { "0x0", "NTSC-M", "NTSC-J", "NTSC-4.43", "PAL-BDGHI", "PAL-M", "PAL-N", "PAL-Nc", "PAL-60", "0x9", "0xA", "0xB", "SECAM", "0xD", "0xE", "0xF" }; struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 vidfmt_sel = cx25840_read(client, 0x400) & 0xf; u8 gen_stat1 = cx25840_read(client, 0x40d); u8 gen_stat2 = cx25840_read(client, 0x40e); int vid_input = state->vid_input; v4l_info(client, "Video signal: %spresent\n", (gen_stat2 & 0x20) ? "" : "not "); v4l_info(client, "Detected format: %s\n", fmt_strs[gen_stat1 & 0xf]); v4l_info(client, "Specified standard: %s\n", vidfmt_sel ? fmt_strs[vidfmt_sel] : "automatic detection"); if (vid_input >= CX25840_COMPOSITE1 && vid_input <= CX25840_COMPOSITE8) { v4l_info(client, "Specified video input: Composite %d\n", vid_input - CX25840_COMPOSITE1 + 1); } else { v4l_info(client, "Specified video input: S-Video (Luma In%d, Chroma In%d)\n", (vid_input & 0xf0) >> 4, (vid_input & 0xf00) >> 8); } v4l_info(client, "Specified audioclock freq: %d Hz\n", state->audclk_freq); } /* ----------------------------------------------------------------------- */ static void log_audio_status(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); u8 download_ctl = cx25840_read(client, 0x803); u8 mod_det_stat0 = cx25840_read(client, 0x804); u8 mod_det_stat1 = cx25840_read(client, 0x805); u8 audio_config = cx25840_read(client, 0x808); u8 pref_mode = cx25840_read(client, 0x809); u8 afc0 = cx25840_read(client, 0x80b); u8 mute_ctl = cx25840_read(client, 0x8d3); int aud_input = state->aud_input; char *p; switch (mod_det_stat0) { case 0x00: p = "mono"; break; case 0x01: p = "stereo"; break; case 0x02: p = "dual"; break; case 0x04: p = "tri"; break; case 0x10: p = "mono with SAP"; break; case 0x11: p = "stereo with SAP"; break; case 0x12: p = "dual with SAP"; break; case 0x14: p = "tri with SAP"; break; case 0xfe: p = "forced mode"; break; default: p = "not defined"; } v4l_info(client, "Detected audio mode: %s\n", p); switch (mod_det_stat1) { case 0x00: p = "not defined"; break; case 0x01: p = "EIAJ"; break; case 0x02: p = "A2-M"; break; case 0x03: p = "A2-BG"; break; case 0x04: p = "A2-DK1"; break; case 0x05: p = "A2-DK2"; break; case 0x06: p = "A2-DK3"; break; case 0x07: p = "A1 (6.0 MHz FM Mono)"; break; case 0x08: p = "AM-L"; break; case 0x09: p = "NICAM-BG"; break; case 0x0a: p = "NICAM-DK"; break; case 0x0b: p = "NICAM-I"; break; case 0x0c: p = "NICAM-L"; break; case 0x0d: p = "BTSC/EIAJ/A2-M Mono (4.5 MHz FMMono)"; break; case 0x0e: p = "IF FM Radio"; break; case 0x0f: p = "BTSC"; break; case 0x10: p = "high-deviation FM"; break; case 0x11: p = "very high-deviation FM"; break; case 0xfd: p = "unknown audio standard"; break; case 0xfe: p = "forced audio standard"; break; case 0xff: p = "no detected audio standard"; break; default: p = "not defined"; } v4l_info(client, "Detected audio standard: %s\n", p); v4l_info(client, "Audio microcontroller: %s\n", (download_ctl & 0x10) ? ((mute_ctl & 0x2) ? "detecting" : "running") : "stopped"); switch (audio_config >> 4) { case 0x00: p = "undefined"; break; case 0x01: p = "BTSC"; break; case 0x02: p = "EIAJ"; break; case 0x03: p = "A2-M"; break; case 0x04: p = "A2-BG"; break; case 0x05: p = "A2-DK1"; break; case 0x06: p = "A2-DK2"; break; case 0x07: p = "A2-DK3"; break; case 0x08: p = "A1 (6.0 MHz FM Mono)"; break; case 0x09: p = "AM-L"; break; case 0x0a: p = "NICAM-BG"; break; case 0x0b: p = "NICAM-DK"; break; case 0x0c: p = "NICAM-I"; break; case 0x0d: p = "NICAM-L"; break; case 0x0e: p = "FM radio"; break; case 0x0f: p = "automatic detection"; break; default: p = "undefined"; } v4l_info(client, "Configured audio standard: %s\n", p); if ((audio_config >> 4) < 0xF) { switch (audio_config & 0xF) { case 0x00: p = "MONO1 (LANGUAGE A/Mono L+R channel for BTSC, EIAJ, A2)"; break; case 0x01: p = "MONO2 (LANGUAGE B)"; break; case 0x02: p = "MONO3 (STEREO forced MONO)"; break; case 0x03: p = "MONO4 (NICAM ANALOG-Language C/Analog Fallback)"; break; case 0x04: p = "STEREO"; break; case 0x05: p = "DUAL1 (AB)"; break; case 0x06: p = "DUAL2 (AC) (FM)"; break; case 0x07: p = "DUAL3 (BC) (FM)"; break; case 0x08: p = "DUAL4 (AC) (AM)"; break; case 0x09: p = "DUAL5 (BC) (AM)"; break; case 0x0a: p = "SAP"; break; default: p = "undefined"; } v4l_info(client, "Configured audio mode: %s\n", p); } else { switch (audio_config & 0xF) { case 0x00: p = "BG"; break; case 0x01: p = "DK1"; break; case 0x02: p = "DK2"; break; case 0x03: p = "DK3"; break; case 0x04: p = "I"; break; case 0x05: p = "L"; break; case 0x06: p = "BTSC"; break; case 0x07: p = "EIAJ"; break; case 0x08: p = "A2-M"; break; case 0x09: p = "FM Radio"; break; case 0x0f: p = "automatic standard and mode detection"; break; default: p = "undefined"; } v4l_info(client, "Configured audio system: %s\n", p); } if (aud_input) { v4l_info(client, "Specified audio input: Tuner (In%d)\n", aud_input); } else { v4l_info(client, "Specified audio input: External\n"); } switch (pref_mode & 0xf) { case 0: p = "mono/language A"; break; case 1: p = "language B"; break; case 2: p = "language C"; break; case 3: p = "analog fallback"; break; case 4: p = "stereo"; break; case 5: p = "language AC"; break; case 6: p = "language BC"; break; case 7: p = "language AB"; break; default: p = "undefined"; } v4l_info(client, "Preferred audio mode: %s\n", p); if ((audio_config & 0xf) == 0xf) { switch ((afc0 >> 3) & 0x3) { case 0: p = "system DK"; break; case 1: p = "system L"; break; case 2: p = "autodetect"; break; default: p = "undefined"; } v4l_info(client, "Selected 65 MHz format: %s\n", p); switch (afc0 & 0x7) { case 0: p = "chroma"; break; case 1: p = "BTSC"; break; case 2: p = "EIAJ"; break; case 3: p = "A2-M"; break; case 4: p = "autodetect"; break; default: p = "undefined"; } v4l_info(client, "Selected 45 MHz format: %s\n", p); } } #define CX25840_VCONFIG_OPTION(state, cfg_in, opt_msk) \ do { \ if ((cfg_in) & (opt_msk)) { \ (state)->vid_config &= ~(opt_msk); \ (state)->vid_config |= (cfg_in) & (opt_msk); \ } \ } while (0) /* apply incoming options to the current vconfig */ static void cx25840_vconfig_add(struct cx25840_state *state, u32 cfg_in) { CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_FMT_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_RES_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_VBIRAW_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_ANCDATA_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_TASKBIT_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_ACTIVE_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_VALID_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_HRESETW_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_CLKGATE_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_DCMODE_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_IDID0S_MASK); CX25840_VCONFIG_OPTION(state, cfg_in, CX25840_VCONFIG_VIPCLAMP_MASK); } /* ----------------------------------------------------------------------- */ /* * Initializes the device in the generic mode. * For cx2584x chips also adds additional video output settings provided * in @val parameter (CX25840_VCONFIG_*). * * The generic mode disables some of the ivtv-related hacks in this driver. * For cx2584x chips it also enables setting video output configuration while * setting it according to datasheet defaults by default. */ static int cx25840_init(struct v4l2_subdev *sd, u32 val) { struct cx25840_state *state = to_state(sd); state->generic_mode = true; if (is_cx2584x(state)) { /* set datasheet video output defaults */ state->vid_config = CX25840_VCONFIG_FMT_BT656 | CX25840_VCONFIG_RES_8BIT | CX25840_VCONFIG_VBIRAW_DISABLED | CX25840_VCONFIG_ANCDATA_ENABLED | CX25840_VCONFIG_TASKBIT_ONE | CX25840_VCONFIG_ACTIVE_HORIZONTAL | CX25840_VCONFIG_VALID_NORMAL | CX25840_VCONFIG_HRESETW_NORMAL | CX25840_VCONFIG_CLKGATE_NONE | CX25840_VCONFIG_DCMODE_DWORDS | CX25840_VCONFIG_IDID0S_NORMAL | CX25840_VCONFIG_VIPCLAMP_DISABLED; /* add additional settings */ cx25840_vconfig_add(state, val); } else { /* TODO: generic mode needs to be developed for other chips */ WARN_ON(1); } return 0; } static int cx25840_reset(struct v4l2_subdev *sd, u32 val) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); if (is_cx2583x(state)) cx25836_initialize(client); else if (is_cx2388x(state)) cx23885_initialize(client); else if (is_cx231xx(state)) cx231xx_initialize(client); else cx25840_initialize(client); state->is_initialized = 1; return 0; } /* * This load_fw operation must be called to load the driver's firmware. * This will load the firmware on the first invocation (further ones are NOP). * Without this the audio standard detection will fail and you will * only get mono. * Alternatively, you can call the reset operation instead of this one. * * Since loading the firmware is often problematic when the driver is * compiled into the kernel I recommend postponing calling this function * until the first open of the video device. Another reason for * postponing it is that loading this firmware takes a long time (seconds) * due to the slow i2c bus speed. So it will speed up the boot process if * you can avoid loading the fw as long as the video device isn't used. */ static int cx25840_load_fw(struct v4l2_subdev *sd) { struct cx25840_state *state = to_state(sd); if (!state->is_initialized) { /* initialize and load firmware */ cx25840_reset(sd, 0); } return 0; } #ifdef CONFIG_VIDEO_ADV_DEBUG static int cx25840_g_register(struct v4l2_subdev *sd, struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); reg->size = 1; reg->val = cx25840_read(client, reg->reg & 0x0fff); return 0; } static int cx25840_s_register(struct v4l2_subdev *sd, const struct v4l2_dbg_register *reg) { struct i2c_client *client = v4l2_get_subdevdata(sd); cx25840_write(client, reg->reg & 0x0fff, reg->val & 0xff); return 0; } #endif static int cx25840_s_audio_stream(struct v4l2_subdev *sd, int enable) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); u8 v; if (is_cx2583x(state) || is_cx2388x(state) || is_cx231xx(state)) return 0; v4l_dbg(1, cx25840_debug, client, "%s audio output\n", enable ? "enable" : "disable"); if (enable) { v = cx25840_read(client, 0x115) | 0x80; cx25840_write(client, 0x115, v); v = cx25840_read(client, 0x116) | 0x03; cx25840_write(client, 0x116, v); } else { v = cx25840_read(client, 0x115) & ~(0x80); cx25840_write(client, 0x115, v); v = cx25840_read(client, 0x116) & ~(0x03); cx25840_write(client, 0x116, v); } return 0; } static int cx25840_s_stream(struct v4l2_subdev *sd, int enable) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); u8 v; v4l_dbg(1, cx25840_debug, client, "%s video output\n", enable ? "enable" : "disable"); /* * It's not clear what should be done for these devices. * The original code used the same addresses as for the cx25840, but * those addresses do something else entirely on the cx2388x and * cx231xx. Since it never did anything in the first place, just do * nothing. */ if (is_cx2388x(state) || is_cx231xx(state)) return 0; if (enable) { v = cx25840_read(client, 0x115) | 0x0c; cx25840_write(client, 0x115, v); v = cx25840_read(client, 0x116) | 0x04; cx25840_write(client, 0x116, v); } else { v = cx25840_read(client, 0x115) & ~(0x0c); cx25840_write(client, 0x115, v); v = cx25840_read(client, 0x116) & ~(0x04); cx25840_write(client, 0x116, v); } return 0; } /* Query the current detected video format */ static int cx25840_querystd(struct v4l2_subdev *sd, v4l2_std_id *std) { struct i2c_client *client = v4l2_get_subdevdata(sd); static const v4l2_std_id stds[] = { /* 0000 */ V4L2_STD_UNKNOWN, /* 0001 */ V4L2_STD_NTSC_M, /* 0010 */ V4L2_STD_NTSC_M_JP, /* 0011 */ V4L2_STD_NTSC_443, /* 0100 */ V4L2_STD_PAL, /* 0101 */ V4L2_STD_PAL_M, /* 0110 */ V4L2_STD_PAL_N, /* 0111 */ V4L2_STD_PAL_Nc, /* 1000 */ V4L2_STD_PAL_60, /* 1001 */ V4L2_STD_UNKNOWN, /* 1010 */ V4L2_STD_UNKNOWN, /* 1011 */ V4L2_STD_UNKNOWN, /* 1100 */ V4L2_STD_SECAM, /* 1101 */ V4L2_STD_UNKNOWN, /* 1110 */ V4L2_STD_UNKNOWN, /* 1111 */ V4L2_STD_UNKNOWN }; u32 fmt = (cx25840_read4(client, 0x40c) >> 8) & 0xf; *std = stds[fmt]; v4l_dbg(1, cx25840_debug, client, "querystd fmt = %x, v4l2_std_id = 0x%x\n", fmt, (unsigned int)stds[fmt]); return 0; } static int cx25840_g_input_status(struct v4l2_subdev *sd, u32 *status) { struct i2c_client *client = v4l2_get_subdevdata(sd); /* * A limited function that checks for signal status and returns * the state. */ /* Check for status of Horizontal lock (SRC lock isn't reliable) */ if ((cx25840_read4(client, 0x40c) & 0x00010000) == 0) *status |= V4L2_IN_ST_NO_SIGNAL; return 0; } static int cx25840_g_std(struct v4l2_subdev *sd, v4l2_std_id *std) { struct cx25840_state *state = to_state(sd); *std = state->std; return 0; } static int cx25840_s_std(struct v4l2_subdev *sd, v4l2_std_id std) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); if (state->radio == 0 && state->std == std) return 0; state->radio = 0; state->std = std; return set_v4lstd(client); } static int cx25840_s_radio(struct v4l2_subdev *sd) { struct cx25840_state *state = to_state(sd); state->radio = 1; return 0; } static int cx25840_s_video_routing(struct v4l2_subdev *sd, u32 input, u32 output, u32 config) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); if (is_cx23888(state)) cx23888_std_setup(client); if (is_cx2584x(state) && state->generic_mode && config) { cx25840_vconfig_add(state, config); cx25840_vconfig_apply(client); } return set_input(client, input, state->aud_input); } static int cx25840_s_audio_routing(struct v4l2_subdev *sd, u32 input, u32 output, u32 config) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); if (is_cx23888(state)) cx23888_std_setup(client); return set_input(client, state->vid_input, input); } static int cx25840_s_frequency(struct v4l2_subdev *sd, const struct v4l2_frequency *freq) { struct i2c_client *client = v4l2_get_subdevdata(sd); input_change(client); return 0; } static int cx25840_g_tuner(struct v4l2_subdev *sd, struct v4l2_tuner *vt) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); u8 vpres = cx25840_read(client, 0x40e) & 0x20; u8 mode; int val = 0; if (state->radio) return 0; vt->signal = vpres ? 0xffff : 0x0; if (is_cx2583x(state)) return 0; vt->capability |= V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_LANG1 | V4L2_TUNER_CAP_LANG2 | V4L2_TUNER_CAP_SAP; mode = cx25840_read(client, 0x804); /* get rxsubchans and audmode */ if ((mode & 0xf) == 1) val |= V4L2_TUNER_SUB_STEREO; else val |= V4L2_TUNER_SUB_MONO; if (mode == 2 || mode == 4) val = V4L2_TUNER_SUB_LANG1 | V4L2_TUNER_SUB_LANG2; if (mode & 0x10) val |= V4L2_TUNER_SUB_SAP; vt->rxsubchans = val; vt->audmode = state->audmode; return 0; } static int cx25840_s_tuner(struct v4l2_subdev *sd, const struct v4l2_tuner *vt) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); if (state->radio || is_cx2583x(state)) return 0; switch (vt->audmode) { case V4L2_TUNER_MODE_MONO: /* * mono -> mono * stereo -> mono * bilingual -> lang1 */ cx25840_and_or(client, 0x809, ~0xf, 0x00); break; case V4L2_TUNER_MODE_STEREO: case V4L2_TUNER_MODE_LANG1: /* * mono -> mono * stereo -> stereo * bilingual -> lang1 */ cx25840_and_or(client, 0x809, ~0xf, 0x04); break; case V4L2_TUNER_MODE_LANG1_LANG2: /* * mono -> mono * stereo -> stereo * bilingual -> lang1/lang2 */ cx25840_and_or(client, 0x809, ~0xf, 0x07); break; case V4L2_TUNER_MODE_LANG2: /* * mono -> mono * stereo -> stereo * bilingual -> lang2 */ cx25840_and_or(client, 0x809, ~0xf, 0x01); break; default: return -EINVAL; } state->audmode = vt->audmode; return 0; } static int cx25840_log_status(struct v4l2_subdev *sd) { struct cx25840_state *state = to_state(sd); struct i2c_client *client = v4l2_get_subdevdata(sd); log_video_status(client); if (!is_cx2583x(state)) log_audio_status(client); cx25840_ir_log_status(sd); v4l2_ctrl_handler_log_status(&state->hdl, sd->name); return 0; } static int cx23885_irq_handler(struct v4l2_subdev *sd, u32 status, bool *handled) { struct cx25840_state *state = to_state(sd); struct i2c_client *c = v4l2_get_subdevdata(sd); u8 irq_stat, aud_stat, aud_en, ir_stat, ir_en; u32 vid_stat, aud_mc_stat; bool block_handled; int ret = 0; irq_stat = cx25840_read(c, CX23885_PIN_CTRL_IRQ_REG); v4l_dbg(2, cx25840_debug, c, "AV Core IRQ status (entry): %s %s %s\n", irq_stat & CX23885_PIN_CTRL_IRQ_IR_STAT ? "ir" : " ", irq_stat & CX23885_PIN_CTRL_IRQ_AUD_STAT ? "aud" : " ", irq_stat & CX23885_PIN_CTRL_IRQ_VID_STAT ? "vid" : " "); if ((is_cx23885(state) || is_cx23887(state))) { ir_stat = cx25840_read(c, CX25840_IR_STATS_REG); ir_en = cx25840_read(c, CX25840_IR_IRQEN_REG); v4l_dbg(2, cx25840_debug, c, "AV Core ir IRQ status: %#04x disables: %#04x\n", ir_stat, ir_en); if (irq_stat & CX23885_PIN_CTRL_IRQ_IR_STAT) { block_handled = false; ret = cx25840_ir_irq_handler(sd, status, &block_handled); if (block_handled) *handled = true; } } aud_stat = cx25840_read(c, CX25840_AUD_INT_STAT_REG); aud_en = cx25840_read(c, CX25840_AUD_INT_CTRL_REG); v4l_dbg(2, cx25840_debug, c, "AV Core audio IRQ status: %#04x disables: %#04x\n", aud_stat, aud_en); aud_mc_stat = cx25840_read4(c, CX23885_AUD_MC_INT_MASK_REG); v4l_dbg(2, cx25840_debug, c, "AV Core audio MC IRQ status: %#06x enables: %#06x\n", aud_mc_stat >> CX23885_AUD_MC_INT_STAT_SHFT, aud_mc_stat & CX23885_AUD_MC_INT_CTRL_BITS); if (irq_stat & CX23885_PIN_CTRL_IRQ_AUD_STAT) { if (aud_stat) { cx25840_write(c, CX25840_AUD_INT_STAT_REG, aud_stat); *handled = true; } } vid_stat = cx25840_read4(c, CX25840_VID_INT_STAT_REG); v4l_dbg(2, cx25840_debug, c, "AV Core video IRQ status: %#06x disables: %#06x\n", vid_stat & CX25840_VID_INT_STAT_BITS, vid_stat >> CX25840_VID_INT_MASK_SHFT); if (irq_stat & CX23885_PIN_CTRL_IRQ_VID_STAT) { if (vid_stat & CX25840_VID_INT_STAT_BITS) { cx25840_write4(c, CX25840_VID_INT_STAT_REG, vid_stat); *handled = true; } } irq_stat = cx25840_read(c, CX23885_PIN_CTRL_IRQ_REG); v4l_dbg(2, cx25840_debug, c, "AV Core IRQ status (exit): %s %s %s\n", irq_stat & CX23885_PIN_CTRL_IRQ_IR_STAT ? "ir" : " ", irq_stat & CX23885_PIN_CTRL_IRQ_AUD_STAT ? "aud" : " ", irq_stat & CX23885_PIN_CTRL_IRQ_VID_STAT ? "vid" : " "); return ret; } static int cx25840_irq_handler(struct v4l2_subdev *sd, u32 status, bool *handled) { struct cx25840_state *state = to_state(sd); *handled = false; /* Only support the CX2388[578] AV Core for now */ if (is_cx2388x(state)) return cx23885_irq_handler(sd, status, handled); return -ENODEV; } /* ----------------------------------------------------------------------- */ #define DIF_PLL_FREQ_WORD (0x300) #define DIF_BPF_COEFF01 (0x348) #define DIF_BPF_COEFF23 (0x34c) #define DIF_BPF_COEFF45 (0x350) #define DIF_BPF_COEFF67 (0x354) #define DIF_BPF_COEFF89 (0x358) #define DIF_BPF_COEFF1011 (0x35c) #define DIF_BPF_COEFF1213 (0x360) #define DIF_BPF_COEFF1415 (0x364) #define DIF_BPF_COEFF1617 (0x368) #define DIF_BPF_COEFF1819 (0x36c) #define DIF_BPF_COEFF2021 (0x370) #define DIF_BPF_COEFF2223 (0x374) #define DIF_BPF_COEFF2425 (0x378) #define DIF_BPF_COEFF2627 (0x37c) #define DIF_BPF_COEFF2829 (0x380) #define DIF_BPF_COEFF3031 (0x384) #define DIF_BPF_COEFF3233 (0x388) #define DIF_BPF_COEFF3435 (0x38c) #define DIF_BPF_COEFF36 (0x390) static const u32 ifhz_coeffs[][19] = { { // 3.0 MHz 0x00000002, 0x00080012, 0x001e0024, 0x001bfff8, 0xffb4ff50, 0xfed8fe68, 0xfe24fe34, 0xfebaffc7, 0x014d031f, 0x04f0065d, 0x07010688, 0x04c901d6, 0xfe00f9d3, 0xf600f342, 0xf235f337, 0xf64efb22, 0x0105070f, 0x0c460fce, 0x110d0000, }, { // 3.1 MHz 0x00000001, 0x00070012, 0x00220032, 0x00370026, 0xfff0ff91, 0xff0efe7c, 0xfe01fdcc, 0xfe0afedb, 0x00440224, 0x0434060c, 0x0738074e, 0x06090361, 0xff99fb39, 0xf6fef3b6, 0xf21af2a5, 0xf573fa33, 0x0034067d, 0x0bfb0fb9, 0x110d0000, }, { // 3.2 MHz 0x00000000, 0x0004000e, 0x00200038, 0x004c004f, 0x002fffdf, 0xff5cfeb6, 0xfe0dfd92, 0xfd7ffe03, 0xff36010a, 0x03410575, 0x072607d2, 0x071804d5, 0x0134fcb7, 0xf81ff451, 0xf223f22e, 0xf4a7f94b, 0xff6405e8, 0x0bae0fa4, 0x110d0000, }, { // 3.3 MHz 0x0000ffff, 0x00000008, 0x001a0036, 0x0056006d, 0x00670030, 0xffbdff10, 0xfe46fd8d, 0xfd25fd4f, 0xfe35ffe0, 0x0224049f, 0x06c9080e, 0x07ef0627, 0x02c9fe45, 0xf961f513, 0xf250f1d2, 0xf3ecf869, 0xfe930552, 0x0b5f0f8f, 0x110d0000, }, { // 3.4 MHz 0xfffffffe, 0xfffd0001, 0x000f002c, 0x0054007d, 0x0093007c, 0x0024ff82, 0xfea6fdbb, 0xfd03fcca, 0xfd51feb9, 0x00eb0392, 0x06270802, 0x08880750, 0x044dffdb, 0xfabdf5f8, 0xf2a0f193, 0xf342f78f, 0xfdc404b9, 0x0b0e0f78, 0x110d0000, }, { // 3.5 MHz 0xfffffffd, 0xfffafff9, 0x0002001b, 0x0046007d, 0x00ad00ba, 0x00870000, 0xff26fe1a, 0xfd1bfc7e, 0xfc99fda4, 0xffa5025c, 0x054507ad, 0x08dd0847, 0x05b80172, 0xfc2ef6ff, 0xf313f170, 0xf2abf6bd, 0xfcf6041f, 0x0abc0f61, 0x110d0000, }, { // 3.6 MHz 0xfffffffd, 0xfff8fff3, 0xfff50006, 0x002f006c, 0x00b200e3, 0x00dc007e, 0xffb9fea0, 0xfd6bfc71, 0xfc17fcb1, 0xfe65010b, 0x042d0713, 0x08ec0906, 0x07020302, 0xfdaff823, 0xf3a7f16a, 0xf228f5f5, 0xfc2a0384, 0x0a670f4a, 0x110d0000, }, { // 3.7 MHz 0x0000fffd, 0xfff7ffef, 0xffe9fff1, 0x0010004d, 0x00a100f2, 0x011a00f0, 0x0053ff44, 0xfdedfca2, 0xfbd3fbef, 0xfd39ffae, 0x02ea0638, 0x08b50987, 0x08230483, 0xff39f960, 0xf45bf180, 0xf1b8f537, 0xfb6102e7, 0x0a110f32, 0x110d0000, }, { // 3.8 MHz 0x0000fffe, 0xfff9ffee, 0xffe1ffdd, 0xfff00024, 0x007c00e5, 0x013a014a, 0x00e6fff8, 0xfe98fd0f, 0xfbd3fb67, 0xfc32fe54, 0x01880525, 0x083909c7, 0x091505ee, 0x00c7fab3, 0xf52df1b4, 0xf15df484, 0xfa9b0249, 0x09ba0f19, 0x110d0000, }, { // 3.9 MHz 0x00000000, 0xfffbfff0, 0xffdeffcf, 0xffd1fff6, 0x004800be, 0x01390184, 0x016300ac, 0xff5efdb1, 0xfc17fb23, 0xfb5cfd0d, 0x001703e4, 0x077b09c4, 0x09d2073c, 0x0251fc18, 0xf61cf203, 0xf118f3dc, 0xf9d801aa, 0x09600eff, 0x110d0000, }, { // 4.0 MHz 0x00000001, 0xfffefff4, 0xffe1ffc8, 0xffbaffca, 0x000b0082, 0x01170198, 0x01c10152, 0x0030fe7b, 0xfc99fb24, 0xfac3fbe9, 0xfea5027f, 0x0683097f, 0x0a560867, 0x03d2fd89, 0xf723f26f, 0xf0e8f341, 0xf919010a, 0x09060ee5, 0x110d0000, }, { // 4.1 MHz 0x00010002, 0x0002fffb, 0xffe8ffca, 0xffacffa4, 0xffcd0036, 0x00d70184, 0x01f601dc, 0x00ffff60, 0xfd51fb6d, 0xfa6efaf5, 0xfd410103, 0x055708f9, 0x0a9e0969, 0x0543ff02, 0xf842f2f5, 0xf0cef2b2, 0xf85e006b, 0x08aa0ecb, 0x110d0000, }, { // 4.2 MHz 0x00010003, 0x00050003, 0xfff3ffd3, 0xffaaff8b, 0xff95ffe5, 0x0080014a, 0x01fe023f, 0x01ba0050, 0xfe35fbf8, 0xfa62fa3b, 0xfbf9ff7e, 0x04010836, 0x0aa90a3d, 0x069f007f, 0xf975f395, 0xf0cbf231, 0xf7a9ffcb, 0x084c0eaf, 0x110d0000, }, { // 4.3 MHz 0x00010003, 0x0008000a, 0x0000ffe4, 0xffb4ff81, 0xff6aff96, 0x001c00f0, 0x01d70271, 0x0254013b, 0xff36fcbd, 0xfa9ff9c5, 0xfadbfdfe, 0x028c073b, 0x0a750adf, 0x07e101fa, 0xfab8f44e, 0xf0ddf1be, 0xf6f9ff2b, 0x07ed0e94, 0x110d0000, }, { // 4.4 MHz 0x00000003, 0x0009000f, 0x000efff8, 0xffc9ff87, 0xff52ff54, 0xffb5007e, 0x01860270, 0x02c00210, 0x0044fdb2, 0xfb22f997, 0xf9f2fc90, 0x0102060f, 0x0a050b4c, 0x0902036e, 0xfc0af51e, 0xf106f15a, 0xf64efe8b, 0x078d0e77, 0x110d0000, }, { // 4.5 MHz 0x00000002, 0x00080012, 0x0019000e, 0xffe5ff9e, 0xff4fff25, 0xff560000, 0x0112023b, 0x02f702c0, 0x014dfec8, 0xfbe5f9b3, 0xf947fb41, 0xff7004b9, 0x095a0b81, 0x0a0004d8, 0xfd65f603, 0xf144f104, 0xf5aafdec, 0x072b0e5a, 0x110d0000, }, { // 4.6 MHz 0x00000001, 0x00060012, 0x00200022, 0x0005ffc1, 0xff61ff10, 0xff09ff82, 0x008601d7, 0x02f50340, 0x0241fff0, 0xfcddfa19, 0xf8e2fa1e, 0xfde30343, 0x08790b7f, 0x0ad50631, 0xfec7f6fc, 0xf198f0bd, 0xf50dfd4e, 0x06c90e3d, 0x110d0000, }, { // 4.7 MHz 0x0000ffff, 0x0003000f, 0x00220030, 0x0025ffed, 0xff87ff15, 0xfed6ff10, 0xffed014c, 0x02b90386, 0x03110119, 0xfdfefac4, 0xf8c6f92f, 0xfc6701b7, 0x07670b44, 0x0b7e0776, 0x002df807, 0xf200f086, 0xf477fcb1, 0x06650e1e, 0x110d0000, }, { // 4.8 MHz 0xfffffffe, 0xffff0009, 0x001e0038, 0x003f001b, 0xffbcff36, 0xfec2feb6, 0xff5600a5, 0x0248038d, 0x03b00232, 0xff39fbab, 0xf8f4f87f, 0xfb060020, 0x062a0ad2, 0x0bf908a3, 0x0192f922, 0xf27df05e, 0xf3e8fc14, 0x06000e00, 0x110d0000, }, { // 4.9 MHz 0xfffffffd, 0xfffc0002, 0x00160037, 0x00510046, 0xfff9ff6d, 0xfed0fe7c, 0xfecefff0, 0x01aa0356, 0x0413032b, 0x007ffcc5, 0xf96cf812, 0xf9cefe87, 0x04c90a2c, 0x0c4309b4, 0x02f3fa4a, 0xf30ef046, 0xf361fb7a, 0x059b0de0, 0x110d0000, }, { // 5.0 MHz 0xfffffffd, 0xfff9fffa, 0x000a002d, 0x00570067, 0x0037ffb5, 0xfefffe68, 0xfe62ff3d, 0x00ec02e3, 0x043503f6, 0x01befe05, 0xfa27f7ee, 0xf8c6fcf8, 0x034c0954, 0x0c5c0aa4, 0x044cfb7e, 0xf3b1f03f, 0xf2e2fae1, 0x05340dc0, 0x110d0000, }, { // 5.1 MHz 0x0000fffd, 0xfff8fff4, 0xfffd001e, 0x0051007b, 0x006e0006, 0xff48fe7c, 0xfe1bfe9a, 0x001d023e, 0x04130488, 0x02e6ff5b, 0xfb1ef812, 0xf7f7fb7f, 0x01bc084e, 0x0c430b72, 0x059afcba, 0xf467f046, 0xf26cfa4a, 0x04cd0da0, 0x110d0000, }, { // 5.2 MHz 0x0000fffe, 0xfff8ffef, 0xfff00009, 0x003f007f, 0x00980056, 0xffa5feb6, 0xfe00fe15, 0xff4b0170, 0x03b004d7, 0x03e800b9, 0xfc48f87f, 0xf768fa23, 0x0022071f, 0x0bf90c1b, 0x06dafdfd, 0xf52df05e, 0xf1fef9b5, 0x04640d7f, 0x110d0000, }, { // 5.3 MHz 0x0000ffff, 0xfff9ffee, 0xffe6fff3, 0x00250072, 0x00af009c, 0x000cff10, 0xfe13fdb8, 0xfe870089, 0x031104e1, 0x04b8020f, 0xfd98f92f, 0xf71df8f0, 0xfe8805ce, 0x0b7e0c9c, 0x0808ff44, 0xf603f086, 0xf19af922, 0x03fb0d5e, 0x110d0000, }, { // 5.4 MHz 0x00000001, 0xfffcffef, 0xffe0ffe0, 0x00050056, 0x00b000d1, 0x0071ff82, 0xfe53fd8c, 0xfddfff99, 0x024104a3, 0x054a034d, 0xff01fa1e, 0xf717f7ed, 0xfcf50461, 0x0ad50cf4, 0x0921008d, 0xf6e7f0bd, 0xf13ff891, 0x03920d3b, 0x110d0000, }, { // 5.5 MHz 0x00010002, 0xfffffff3, 0xffdeffd1, 0xffe5002f, 0x009c00ed, 0x00cb0000, 0xfebafd94, 0xfd61feb0, 0x014d0422, 0x05970464, 0x0074fb41, 0xf759f721, 0xfb7502de, 0x0a000d21, 0x0a2201d4, 0xf7d9f104, 0xf0edf804, 0x03280d19, 0x110d0000, }, { // 5.6 MHz 0x00010003, 0x0003fffa, 0xffe3ffc9, 0xffc90002, 0x007500ef, 0x010e007e, 0xff3dfdcf, 0xfd16fddd, 0x00440365, 0x059b0548, 0x01e3fc90, 0xf7dff691, 0xfa0f014d, 0x09020d23, 0x0b0a0318, 0xf8d7f15a, 0xf0a5f779, 0x02bd0cf6, 0x110d0000, }, { // 5.7 MHz 0x00010003, 0x00060001, 0xffecffc9, 0xffb4ffd4, 0x004000d5, 0x013600f0, 0xffd3fe39, 0xfd04fd31, 0xff360277, 0x055605ef, 0x033efdfe, 0xf8a5f642, 0xf8cbffb6, 0x07e10cfb, 0x0bd50456, 0xf9dff1be, 0xf067f6f2, 0x02520cd2, 0x110d0000, }, { // 5.8 MHz 0x00000003, 0x00080009, 0xfff8ffd2, 0xffaaffac, 0x000200a3, 0x013c014a, 0x006dfec9, 0xfd2bfcb7, 0xfe350165, 0x04cb0651, 0x0477ff7e, 0xf9a5f635, 0xf7b1fe20, 0x069f0ca8, 0x0c81058b, 0xfaf0f231, 0xf033f66d, 0x01e60cae, 0x110d0000, }, { // 5.9 MHz 0x00000002, 0x0009000e, 0x0005ffe1, 0xffacff90, 0xffc5005f, 0x01210184, 0x00fcff72, 0xfd8afc77, 0xfd51003f, 0x04020669, 0x05830103, 0xfad7f66b, 0xf6c8fc93, 0x05430c2b, 0x0d0d06b5, 0xfc08f2b2, 0xf00af5ec, 0x017b0c89, 0x110d0000, }, { // 6.0 MHz 0x00000001, 0x00070012, 0x0012fff5, 0xffbaff82, 0xff8e000f, 0x00e80198, 0x01750028, 0xfe18fc75, 0xfc99ff15, 0x03050636, 0x0656027f, 0xfc32f6e2, 0xf614fb17, 0x03d20b87, 0x0d7707d2, 0xfd26f341, 0xefeaf56f, 0x010f0c64, 0x110d0000, }, { // 6.1 MHz 0xffff0000, 0x00050012, 0x001c000b, 0xffd1ff84, 0xff66ffbe, 0x00960184, 0x01cd00da, 0xfeccfcb2, 0xfc17fdf9, 0x01e005bc, 0x06e703e4, 0xfdabf798, 0xf599f9b3, 0x02510abd, 0x0dbf08df, 0xfe48f3dc, 0xefd5f4f6, 0x00a20c3e, 0x110d0000, }, { // 6.2 MHz 0xfffffffe, 0x0002000f, 0x0021001f, 0xfff0ff97, 0xff50ff74, 0x0034014a, 0x01fa0179, 0xff97fd2a, 0xfbd3fcfa, 0x00a304fe, 0x07310525, 0xff37f886, 0xf55cf86e, 0x00c709d0, 0x0de209db, 0xff6df484, 0xefcbf481, 0x00360c18, 0x110d0000, }, { // 6.3 MHz 0xfffffffd, 0xfffe000a, 0x0021002f, 0x0010ffb8, 0xff50ff3b, 0xffcc00f0, 0x01fa01fa, 0x0069fdd4, 0xfbd3fc26, 0xff5d0407, 0x07310638, 0x00c9f9a8, 0xf55cf74e, 0xff3908c3, 0x0de20ac3, 0x0093f537, 0xefcbf410, 0xffca0bf2, 0x110d0000, }, { // 6.4 MHz 0xfffffffd, 0xfffb0003, 0x001c0037, 0x002fffe2, 0xff66ff17, 0xff6a007e, 0x01cd0251, 0x0134fea5, 0xfc17fb8b, 0xfe2002e0, 0x06e70713, 0x0255faf5, 0xf599f658, 0xfdaf0799, 0x0dbf0b96, 0x01b8f5f5, 0xefd5f3a3, 0xff5e0bca, 0x110d0000, }, { // 6.5 MHz 0x0000fffd, 0xfff9fffb, 0x00120037, 0x00460010, 0xff8eff0f, 0xff180000, 0x01750276, 0x01e8ff8d, 0xfc99fb31, 0xfcfb0198, 0x065607ad, 0x03cefc64, 0xf614f592, 0xfc2e0656, 0x0d770c52, 0x02daf6bd, 0xefeaf33b, 0xfef10ba3, 0x110d0000, }, { // 6.6 MHz 0x0000fffe, 0xfff7fff5, 0x0005002f, 0x0054003c, 0xffc5ff22, 0xfedfff82, 0x00fc0267, 0x0276007e, 0xfd51fb1c, 0xfbfe003e, 0x05830802, 0x0529fdec, 0xf6c8f4fe, 0xfabd04ff, 0x0d0d0cf6, 0x03f8f78f, 0xf00af2d7, 0xfe850b7b, 0x110d0000, }, { // 6.7 MHz 0x0000ffff, 0xfff8fff0, 0xfff80020, 0x00560060, 0x0002ff4e, 0xfec4ff10, 0x006d0225, 0x02d50166, 0xfe35fb4e, 0xfb35fee1, 0x0477080e, 0x065bff82, 0xf7b1f4a0, 0xf9610397, 0x0c810d80, 0x0510f869, 0xf033f278, 0xfe1a0b52, 0x110d0000, }, { // 6.8 MHz 0x00010000, 0xfffaffee, 0xffec000c, 0x004c0078, 0x0040ff8e, 0xfecafeb6, 0xffd301b6, 0x02fc0235, 0xff36fbc5, 0xfaaafd90, 0x033e07d2, 0x075b011b, 0xf8cbf47a, 0xf81f0224, 0x0bd50def, 0x0621f94b, 0xf067f21e, 0xfdae0b29, 0x110d0000, }, { // 6.9 MHz 0x00010001, 0xfffdffef, 0xffe3fff6, 0x0037007f, 0x0075ffdc, 0xfef2fe7c, 0xff3d0122, 0x02ea02dd, 0x0044fc79, 0xfa65fc5d, 0x01e3074e, 0x082102ad, 0xfa0ff48c, 0xf6fe00a9, 0x0b0a0e43, 0x0729fa33, 0xf0a5f1c9, 0xfd430b00, 0x110d0000, }, { // 7.0 MHz 0x00010002, 0x0001fff3, 0xffdeffe2, 0x001b0076, 0x009c002d, 0xff35fe68, 0xfeba0076, 0x029f0352, 0x014dfd60, 0xfa69fb53, 0x00740688, 0x08a7042d, 0xfb75f4d6, 0xf600ff2d, 0x0a220e7a, 0x0827fb22, 0xf0edf17a, 0xfcd80ad6, 0x110d0000, }, { // 7.1 MHz 0x00000003, 0x0004fff9, 0xffe0ffd2, 0xfffb005e, 0x00b0007a, 0xff8ffe7c, 0xfe53ffc1, 0x0221038c, 0x0241fe6e, 0xfab6fa80, 0xff010587, 0x08e90590, 0xfcf5f556, 0xf52bfdb3, 0x09210e95, 0x0919fc15, 0xf13ff12f, 0xfc6e0aab, 0x110d0000, }, { // 7.2 MHz 0x00000003, 0x00070000, 0xffe6ffc9, 0xffdb0039, 0x00af00b8, 0xfff4feb6, 0xfe13ff10, 0x01790388, 0x0311ff92, 0xfb48f9ed, 0xfd980453, 0x08e306cd, 0xfe88f60a, 0xf482fc40, 0x08080e93, 0x09fdfd0c, 0xf19af0ea, 0xfc050a81, 0x110d0000, }, { // 7.3 MHz 0x00000002, 0x00080008, 0xfff0ffc9, 0xffc1000d, 0x009800e2, 0x005bff10, 0xfe00fe74, 0x00b50345, 0x03b000bc, 0xfc18f9a1, 0xfc4802f9, 0x089807dc, 0x0022f6f0, 0xf407fada, 0x06da0e74, 0x0ad3fe06, 0xf1fef0ab, 0xfb9c0a55, 0x110d0000, }, { // 7.4 MHz 0x00000001, 0x0008000e, 0xfffdffd0, 0xffafffdf, 0x006e00f2, 0x00b8ff82, 0xfe1bfdf8, 0xffe302c8, 0x041301dc, 0xfd1af99e, 0xfb1e0183, 0x080908b5, 0x01bcf801, 0xf3bdf985, 0x059a0e38, 0x0b99ff03, 0xf26cf071, 0xfb330a2a, 0x110d0000, }, { // 7.5 MHz 0xffff0000, 0x00070011, 0x000affdf, 0xffa9ffb5, 0x003700e6, 0x01010000, 0xfe62fda8, 0xff140219, 0x043502e1, 0xfe42f9e6, 0xfa270000, 0x073a0953, 0x034cf939, 0xf3a4f845, 0x044c0de1, 0x0c4f0000, 0xf2e2f03c, 0xfacc09fe, 0x110d0000, }, { // 7.6 MHz 0xffffffff, 0x00040012, 0x0016fff3, 0xffafff95, 0xfff900c0, 0x0130007e, 0xfecefd89, 0xfe560146, 0x041303bc, 0xff81fa76, 0xf96cfe7d, 0x063209b1, 0x04c9fa93, 0xf3bdf71e, 0x02f30d6e, 0x0cf200fd, 0xf361f00e, 0xfa6509d1, 0x110d0000, }, { // 7.7 MHz 0xfffffffe, 0x00010010, 0x001e0008, 0xffc1ff84, 0xffbc0084, 0x013e00f0, 0xff56fd9f, 0xfdb8005c, 0x03b00460, 0x00c7fb45, 0xf8f4fd07, 0x04fa09ce, 0x062afc07, 0xf407f614, 0x01920ce0, 0x0d8301fa, 0xf3e8efe5, 0xfa0009a4, 0x110d0000, }, { // 7.8 MHz 0x0000fffd, 0xfffd000b, 0x0022001d, 0xffdbff82, 0xff870039, 0x012a014a, 0xffedfde7, 0xfd47ff6b, 0x031104c6, 0x0202fc4c, 0xf8c6fbad, 0x039909a7, 0x0767fd8e, 0xf482f52b, 0x002d0c39, 0x0e0002f4, 0xf477efc2, 0xf99b0977, 0x110d0000, }, { // 7.9 MHz 0x0000fffd, 0xfffa0004, 0x0020002d, 0xfffbff91, 0xff61ffe8, 0x00f70184, 0x0086fe5c, 0xfd0bfe85, 0x024104e5, 0x0323fd7d, 0xf8e2fa79, 0x021d093f, 0x0879ff22, 0xf52bf465, 0xfec70b79, 0x0e6803eb, 0xf50defa5, 0xf937094a, 0x110d0000, }, { // 8.0 MHz 0x0000fffe, 0xfff8fffd, 0x00190036, 0x001bffaf, 0xff4fff99, 0x00aa0198, 0x0112fef3, 0xfd09fdb9, 0x014d04be, 0x041bfecc, 0xf947f978, 0x00900897, 0x095a00b9, 0xf600f3c5, 0xfd650aa3, 0x0ebc04de, 0xf5aaef8e, 0xf8d5091c, 0x110d0000, }, { // 8.1 MHz 0x0000ffff, 0xfff7fff6, 0x000e0038, 0x0037ffd7, 0xff52ff56, 0x004b0184, 0x0186ffa1, 0xfd40fd16, 0x00440452, 0x04de0029, 0xf9f2f8b2, 0xfefe07b5, 0x0a05024d, 0xf6fef34d, 0xfc0a09b8, 0x0efa05cd, 0xf64eef7d, 0xf87308ed, 0x110d0000, }, { // 8.2 MHz 0x00010000, 0xfff8fff0, 0x00000031, 0x004c0005, 0xff6aff27, 0xffe4014a, 0x01d70057, 0xfdacfca6, 0xff3603a7, 0x05610184, 0xfadbf82e, 0xfd74069f, 0x0a7503d6, 0xf81ff2ff, 0xfab808b9, 0x0f2306b5, 0xf6f9ef72, 0xf81308bf, 0x110d0000, }, { // 8.3 MHz 0x00010001, 0xfffbffee, 0xfff30022, 0x00560032, 0xff95ff10, 0xff8000f0, 0x01fe0106, 0xfe46fc71, 0xfe3502c7, 0x059e02ce, 0xfbf9f7f2, 0xfbff055b, 0x0aa9054c, 0xf961f2db, 0xf97507aa, 0x0f350797, 0xf7a9ef6d, 0xf7b40890, 0x110d0000, }, { // 8.4 MHz 0x00010002, 0xfffeffee, 0xffe8000f, 0x00540058, 0xffcdff14, 0xff29007e, 0x01f6019e, 0xff01fc7c, 0xfd5101bf, 0x059203f6, 0xfd41f7fe, 0xfaa903f3, 0x0a9e06a9, 0xfabdf2e2, 0xf842068b, 0x0f320871, 0xf85eef6e, 0xf7560860, 0x110d0000, }, { // 8.5 MHz 0x00000003, 0x0002fff2, 0xffe1fff9, 0x00460073, 0x000bff34, 0xfee90000, 0x01c10215, 0xffd0fcc5, 0xfc99009d, 0x053d04f1, 0xfea5f853, 0xf97d0270, 0x0a5607e4, 0xfc2ef314, 0xf723055f, 0x0f180943, 0xf919ef75, 0xf6fa0830, 0x110d0000, }, { // 8.6 MHz 0x00000003, 0x0005fff8, 0xffdeffe4, 0x002f007f, 0x0048ff6b, 0xfec7ff82, 0x0163025f, 0x00a2fd47, 0xfc17ff73, 0x04a405b2, 0x0017f8ed, 0xf88500dc, 0x09d208f9, 0xfdaff370, 0xf61c0429, 0x0ee80a0b, 0xf9d8ef82, 0xf6a00800, 0x110d0000, }, { // 8.7 MHz 0x00000003, 0x0007ffff, 0xffe1ffd4, 0x0010007a, 0x007cffb2, 0xfec6ff10, 0x00e60277, 0x0168fdf9, 0xfbd3fe50, 0x03ce0631, 0x0188f9c8, 0xf7c7ff43, 0x091509e3, 0xff39f3f6, 0xf52d02ea, 0x0ea30ac9, 0xfa9bef95, 0xf64607d0, 0x110d0000, }, { // 8.8 MHz 0x00000002, 0x00090007, 0xffe9ffca, 0xfff00065, 0x00a10003, 0xfee6feb6, 0x0053025b, 0x0213fed0, 0xfbd3fd46, 0x02c70668, 0x02eafadb, 0xf74bfdae, 0x08230a9c, 0x00c7f4a3, 0xf45b01a6, 0x0e480b7c, 0xfb61efae, 0xf5ef079f, 0x110d0000, }, { // 8.9 MHz 0xffff0000, 0x0008000d, 0xfff5ffc8, 0xffd10043, 0x00b20053, 0xff24fe7c, 0xffb9020c, 0x0295ffbb, 0xfc17fc64, 0x019b0654, 0x042dfc1c, 0xf714fc2a, 0x07020b21, 0x0251f575, 0xf3a7005e, 0x0dd80c24, 0xfc2aefcd, 0xf599076e, 0x110d0000, }, { // 9.0 MHz 0xffffffff, 0x00060011, 0x0002ffcf, 0xffba0018, 0x00ad009a, 0xff79fe68, 0xff260192, 0x02e500ab, 0xfc99fbb6, 0x005b05f7, 0x0545fd81, 0xf723fabf, 0x05b80b70, 0x03d2f669, 0xf313ff15, 0x0d550cbf, 0xfcf6eff2, 0xf544073d, 0x110d0000, }, { // 9.1 MHz 0xfffffffe, 0x00030012, 0x000fffdd, 0xffacffea, 0x009300cf, 0xffdcfe7c, 0xfea600f7, 0x02fd0190, 0xfd51fb46, 0xff150554, 0x0627fefd, 0xf778f978, 0x044d0b87, 0x0543f77d, 0xf2a0fdcf, 0x0cbe0d4e, 0xfdc4f01d, 0xf4f2070b, 0x110d0000, }, { // 9.2 MHz 0x0000fffd, 0x00000010, 0x001afff0, 0xffaaffbf, 0x006700ed, 0x0043feb6, 0xfe460047, 0x02db0258, 0xfe35fb1b, 0xfddc0473, 0x06c90082, 0xf811f85e, 0x02c90b66, 0x069ff8ad, 0xf250fc8d, 0x0c140dcf, 0xfe93f04d, 0xf4a106d9, 0x110d0000, }, { // 9.3 MHz 0x0000fffd, 0xfffc000c, 0x00200006, 0xffb4ff9c, 0x002f00ef, 0x00a4ff10, 0xfe0dff92, 0x028102f7, 0xff36fb37, 0xfcbf035e, 0x07260202, 0xf8e8f778, 0x01340b0d, 0x07e1f9f4, 0xf223fb51, 0x0b590e42, 0xff64f083, 0xf45206a7, 0x110d0000, }, { // 9.4 MHz 0x0000fffd, 0xfff90005, 0x0022001a, 0xffc9ff86, 0xfff000d7, 0x00f2ff82, 0xfe01fee5, 0x01f60362, 0x0044fb99, 0xfbcc0222, 0x07380370, 0xf9f7f6cc, 0xff990a7e, 0x0902fb50, 0xf21afa1f, 0x0a8d0ea6, 0x0034f0bf, 0xf4050675, 0x110d0000, }, { // 9.5 MHz 0x0000fffe, 0xfff8fffe, 0x001e002b, 0xffe5ff81, 0xffb400a5, 0x01280000, 0xfe24fe50, 0x01460390, 0x014dfc3a, 0xfb1000ce, 0x070104bf, 0xfb37f65f, 0xfe0009bc, 0x0a00fcbb, 0xf235f8f8, 0x09b20efc, 0x0105f101, 0xf3ba0642, 0x110d0000, }, { // 9.6 MHz 0x0001ffff, 0xfff8fff7, 0x00150036, 0x0005ff8c, 0xff810061, 0x013d007e, 0xfe71fddf, 0x007c0380, 0x0241fd13, 0xfa94ff70, 0x068005e2, 0xfc9bf633, 0xfc7308ca, 0x0ad5fe30, 0xf274f7e0, 0x08c90f43, 0x01d4f147, 0xf371060f, 0x110d0000, }, { // 9.7 MHz 0x00010001, 0xfff9fff1, 0x00090038, 0x0025ffa7, 0xff5e0012, 0x013200f0, 0xfee3fd9b, 0xffaa0331, 0x0311fe15, 0xfa60fe18, 0x05bd06d1, 0xfe1bf64a, 0xfafa07ae, 0x0b7effab, 0xf2d5f6d7, 0x07d30f7a, 0x02a3f194, 0xf32905dc, 0x110d0000, }, { // 9.8 MHz 0x00010002, 0xfffcffee, 0xfffb0032, 0x003fffcd, 0xff4effc1, 0x0106014a, 0xff6efd8a, 0xfedd02aa, 0x03b0ff34, 0xfa74fcd7, 0x04bf0781, 0xffaaf6a3, 0xf99e066b, 0x0bf90128, 0xf359f5e1, 0x06d20fa2, 0x0370f1e5, 0xf2e405a8, 0x110d0000, }, { // 9.9 MHz 0x00000003, 0xffffffee, 0xffef0024, 0x0051fffa, 0xff54ff77, 0x00be0184, 0x0006fdad, 0xfe2701f3, 0x0413005e, 0xfad1fbba, 0x039007ee, 0x013bf73d, 0xf868050a, 0x0c4302a1, 0xf3fdf4fe, 0x05c70fba, 0x043bf23c, 0xf2a10575, 0x110d0000, }, { // 10.0 MHz 0x00000003, 0x0003fff1, 0xffe50011, 0x00570027, 0xff70ff3c, 0x00620198, 0x009efe01, 0xfd95011a, 0x04350183, 0xfb71fad0, 0x023c0812, 0x02c3f811, 0xf75e0390, 0x0c5c0411, 0xf4c1f432, 0x04b30fc1, 0x0503f297, 0xf2610541, 0x110d0000, }, { // 10.1 MHz 0x00000003, 0x0006fff7, 0xffdffffc, 0x00510050, 0xff9dff18, 0xfffc0184, 0x0128fe80, 0xfd32002e, 0x04130292, 0xfc4dfa21, 0x00d107ee, 0x0435f91c, 0xf6850205, 0x0c430573, 0xf5a1f37d, 0x03990fba, 0x05c7f2f8, 0xf222050d, 0x110d0000, }, { // 10.2 MHz 0x00000002, 0x0008fffe, 0xffdfffe7, 0x003f006e, 0xffd6ff0f, 0xff96014a, 0x0197ff1f, 0xfd05ff3e, 0x03b0037c, 0xfd59f9b7, 0xff5d0781, 0x0585fa56, 0xf5e4006f, 0x0bf906c4, 0xf69df2e0, 0x02790fa2, 0x0688f35d, 0xf1e604d8, 0x110d0000, }, { // 10.3 MHz 0xffff0001, 0x00090005, 0xffe4ffd6, 0x0025007e, 0x0014ff20, 0xff3c00f0, 0x01e1ffd0, 0xfd12fe5c, 0x03110433, 0xfe88f996, 0xfdf106d1, 0x06aafbb7, 0xf57efed8, 0x0b7e07ff, 0xf7b0f25e, 0x01560f7a, 0x0745f3c7, 0xf1ac04a4, 0x110d0000, }, { // 10.4 MHz 0xffffffff, 0x0008000c, 0xffedffcb, 0x0005007d, 0x0050ff4c, 0xfef6007e, 0x01ff0086, 0xfd58fd97, 0x024104ad, 0xffcaf9c0, 0xfc9905e2, 0x079afd35, 0xf555fd46, 0x0ad50920, 0xf8d9f1f6, 0x00310f43, 0x07fdf435, 0xf174046f, 0x110d0000, }, { // 10.5 MHz 0xfffffffe, 0x00050011, 0xfffaffc8, 0xffe5006b, 0x0082ff8c, 0xfecc0000, 0x01f00130, 0xfdd2fcfc, 0x014d04e3, 0x010efa32, 0xfb6404bf, 0x084efec5, 0xf569fbc2, 0x0a000a23, 0xfa15f1ab, 0xff0b0efc, 0x08b0f4a7, 0xf13f043a, 0x110d0000, }, { // 10.6 MHz 0x0000fffd, 0x00020012, 0x0007ffcd, 0xffc9004c, 0x00a4ffd9, 0xfec3ff82, 0x01b401c1, 0xfe76fc97, 0x004404d2, 0x0245fae8, 0xfa5f0370, 0x08c1005f, 0xf5bcfa52, 0x09020b04, 0xfb60f17b, 0xfde70ea6, 0x095df51e, 0xf10c0405, 0x110d0000, }, { // 10.7 MHz 0x0000fffd, 0xffff0011, 0x0014ffdb, 0xffb40023, 0x00b2002a, 0xfedbff10, 0x0150022d, 0xff38fc6f, 0xff36047b, 0x035efbda, 0xf9940202, 0x08ee01f5, 0xf649f8fe, 0x07e10bc2, 0xfcb6f169, 0xfcc60e42, 0x0a04f599, 0xf0db03d0, 0x110d0000, }, { // 10.8 MHz 0x0000fffd, 0xfffb000d, 0x001dffed, 0xffaafff5, 0x00aa0077, 0xff13feb6, 0x00ce026b, 0x000afc85, 0xfe3503e3, 0x044cfcfb, 0xf90c0082, 0x08d5037f, 0xf710f7cc, 0x069f0c59, 0xfe16f173, 0xfbaa0dcf, 0x0aa5f617, 0xf0ad039b, 0x110d0000, }, { // 10.9 MHz 0x0000fffe, 0xfff90006, 0x00210003, 0xffacffc8, 0x008e00b6, 0xff63fe7c, 0x003a0275, 0x00dafcda, 0xfd510313, 0x0501fe40, 0xf8cbfefd, 0x087604f0, 0xf80af6c2, 0x05430cc8, 0xff7af19a, 0xfa940d4e, 0x0b3ff699, 0xf0810365, 0x110d0000, }, { // 11.0 MHz 0x0001ffff, 0xfff8ffff, 0x00210018, 0xffbaffa3, 0x006000e1, 0xffc4fe68, 0xffa0024b, 0x019afd66, 0xfc990216, 0x0575ff99, 0xf8d4fd81, 0x07d40640, 0xf932f5e6, 0x03d20d0d, 0x00dff1de, 0xf9860cbf, 0x0bd1f71e, 0xf058032f, 0x110d0000, }, { // 11.1 MHz 0x00010000, 0xfff8fff8, 0x001b0029, 0xffd1ff8a, 0x002600f2, 0x002cfe7c, 0xff0f01f0, 0x023bfe20, 0xfc1700fa, 0x05a200f7, 0xf927fc1c, 0x06f40765, 0xfa82f53b, 0x02510d27, 0x0243f23d, 0xf8810c24, 0x0c5cf7a7, 0xf03102fa, 0x110d0000, }, { // 11.2 MHz 0x00010002, 0xfffafff2, 0x00110035, 0xfff0ff81, 0xffe700e7, 0x008ffeb6, 0xfe94016d, 0x02b0fefb, 0xfbd3ffd1, 0x05850249, 0xf9c1fadb, 0x05de0858, 0xfbf2f4c4, 0x00c70d17, 0x03a0f2b8, 0xf7870b7c, 0x0cdff833, 0xf00d02c4, 0x110d0000, }, { // 11.3 MHz 0x00000003, 0xfffdffee, 0x00040038, 0x0010ff88, 0xffac00c2, 0x00e2ff10, 0xfe3900cb, 0x02f1ffe9, 0xfbd3feaa, 0x05210381, 0xfa9cf9c8, 0x04990912, 0xfd7af484, 0xff390cdb, 0x04f4f34d, 0xf69a0ac9, 0x0d5af8c1, 0xefec028e, 0x110d0000, }, { // 11.4 MHz 0x00000003, 0x0000ffee, 0xfff60033, 0x002fff9f, 0xff7b0087, 0x011eff82, 0xfe080018, 0x02f900d8, 0xfc17fd96, 0x04790490, 0xfbadf8ed, 0x032f098e, 0xff10f47d, 0xfdaf0c75, 0x063cf3fc, 0xf5ba0a0b, 0x0dccf952, 0xefcd0258, 0x110d0000, }, { // 11.5 MHz 0x00000003, 0x0004fff1, 0xffea0026, 0x0046ffc3, 0xff5a003c, 0x013b0000, 0xfe04ff63, 0x02c801b8, 0xfc99fca6, 0x0397056a, 0xfcecf853, 0x01ad09c9, 0x00acf4ad, 0xfc2e0be7, 0x0773f4c2, 0xf4e90943, 0x0e35f9e6, 0xefb10221, 0x110d0000, }, { // 11.6 MHz 0x00000002, 0x0007fff6, 0xffe20014, 0x0054ffee, 0xff4effeb, 0x0137007e, 0xfe2efebb, 0x0260027a, 0xfd51fbe6, 0x02870605, 0xfe4af7fe, 0x001d09c1, 0x0243f515, 0xfabd0b32, 0x0897f59e, 0xf4280871, 0x0e95fa7c, 0xef9701eb, 0x110d0000, }, { // 11.7 MHz 0xffff0001, 0x0008fffd, 0xffdeffff, 0x0056001d, 0xff57ff9c, 0x011300f0, 0xfe82fe2e, 0x01ca0310, 0xfe35fb62, 0x0155065a, 0xffbaf7f2, 0xfe8c0977, 0x03cef5b2, 0xf9610a58, 0x09a5f68f, 0xf3790797, 0x0eebfb14, 0xef8001b5, 0x110d0000, }, { // 11.8 MHz 0xffff0000, 0x00080004, 0xffe0ffe9, 0x004c0047, 0xff75ff58, 0x00d1014a, 0xfef9fdc8, 0x0111036f, 0xff36fb21, 0x00120665, 0x012df82e, 0xfd0708ec, 0x0542f682, 0xf81f095c, 0x0a9af792, 0xf2db06b5, 0x0f38fbad, 0xef6c017e, 0x110d0000, }, { // 11.9 MHz 0xffffffff, 0x0007000b, 0xffe7ffd8, 0x00370068, 0xffa4ff28, 0x00790184, 0xff87fd91, 0x00430392, 0x0044fb26, 0xfece0626, 0x0294f8b2, 0xfb990825, 0x0698f77f, 0xf6fe0842, 0x0b73f8a7, 0xf25105cd, 0x0f7bfc48, 0xef5a0148, 0x110d0000, }, { // 12.0 MHz 0x0000fffe, 0x00050010, 0xfff2ffcc, 0x001b007b, 0xffdfff10, 0x00140198, 0x0020fd8e, 0xff710375, 0x014dfb73, 0xfd9a059f, 0x03e0f978, 0xfa4e0726, 0x07c8f8a7, 0xf600070c, 0x0c2ff9c9, 0xf1db04de, 0x0fb4fce5, 0xef4b0111, 0x110d0000, }, { // 12.1 MHz 0x0000fffd, 0x00010012, 0xffffffc8, 0xfffb007e, 0x001dff14, 0xffad0184, 0x00b7fdbe, 0xfea9031b, 0x0241fc01, 0xfc8504d6, 0x0504fa79, 0xf93005f6, 0x08caf9f2, 0xf52b05c0, 0x0ccbfaf9, 0xf17903eb, 0x0fe3fd83, 0xef3f00db, 0x110d0000, }, { // 12.2 MHz 0x0000fffd, 0xfffe0011, 0x000cffcc, 0xffdb0071, 0x0058ff32, 0xff4f014a, 0x013cfe1f, 0xfdfb028a, 0x0311fcc9, 0xfb9d03d6, 0x05f4fbad, 0xf848049d, 0x0999fb5b, 0xf4820461, 0x0d46fc32, 0xf12d02f4, 0x1007fe21, 0xef3600a4, 0x110d0000, }, { // 12.3 MHz 0x0000fffe, 0xfffa000e, 0x0017ffd9, 0xffc10055, 0x0088ff68, 0xff0400f0, 0x01a6fea7, 0xfd7501cc, 0x03b0fdc0, 0xfaef02a8, 0x06a7fd07, 0xf79d0326, 0x0a31fcda, 0xf40702f3, 0x0d9ffd72, 0xf0f601fa, 0x1021fec0, 0xef2f006d, 0x110d0000, }, { // 12.4 MHz 0x0001ffff, 0xfff80007, 0x001fffeb, 0xffaf002d, 0x00a8ffb0, 0xfed3007e, 0x01e9ff4c, 0xfd2000ee, 0x0413fed8, 0xfa82015c, 0x0715fe7d, 0xf7340198, 0x0a8dfe69, 0xf3bd017c, 0x0dd5feb8, 0xf0d500fd, 0x1031ff60, 0xef2b0037, 0x110d0000, }, { // 12.5 MHz 0x00010000, 0xfff70000, 0x00220000, 0xffa90000, 0x00b30000, 0xfec20000, 0x02000000, 0xfd030000, 0x04350000, 0xfa5e0000, 0x073b0000, 0xf7110000, 0x0aac0000, 0xf3a40000, 0x0de70000, 0xf0c90000, 0x10360000, 0xef290000, 0x110d0000, }, { // 12.6 MHz 0x00010001, 0xfff8fff9, 0x001f0015, 0xffafffd3, 0x00a80050, 0xfed3ff82, 0x01e900b4, 0xfd20ff12, 0x04130128, 0xfa82fea4, 0x07150183, 0xf734fe68, 0x0a8d0197, 0xf3bdfe84, 0x0dd50148, 0xf0d5ff03, 0x103100a0, 0xef2bffc9, 0x110d0000, }, { // 12.7 MHz 0x00000002, 0xfffafff2, 0x00170027, 0xffc1ffab, 0x00880098, 0xff04ff10, 0x01a60159, 0xfd75fe34, 0x03b00240, 0xfaeffd58, 0x06a702f9, 0xf79dfcda, 0x0a310326, 0xf407fd0d, 0x0d9f028e, 0xf0f6fe06, 0x10210140, 0xef2fff93, 0x110d0000, }, { // 12.8 MHz 0x00000003, 0xfffeffef, 0x000c0034, 0xffdbff8f, 0x005800ce, 0xff4ffeb6, 0x013c01e1, 0xfdfbfd76, 0x03110337, 0xfb9dfc2a, 0x05f40453, 0xf848fb63, 0x099904a5, 0xf482fb9f, 0x0d4603ce, 0xf12dfd0c, 0x100701df, 0xef36ff5c, 0x110d0000, }, { // 12.9 MHz 0x00000003, 0x0001ffee, 0xffff0038, 0xfffbff82, 0x001d00ec, 0xffadfe7c, 0x00b70242, 0xfea9fce5, 0x024103ff, 0xfc85fb2a, 0x05040587, 0xf930fa0a, 0x08ca060e, 0xf52bfa40, 0x0ccb0507, 0xf179fc15, 0x0fe3027d, 0xef3fff25, 0x110d0000, }, { // 13.0 MHz 0x00000002, 0x0005fff0, 0xfff20034, 0x001bff85, 0xffdf00f0, 0x0014fe68, 0x00200272, 0xff71fc8b, 0x014d048d, 0xfd9afa61, 0x03e00688, 0xfa4ef8da, 0x07c80759, 0xf600f8f4, 0x0c2f0637, 0xf1dbfb22, 0x0fb4031b, 0xef4bfeef, 0x110d0000, }, { // 13.1 MHz 0xffff0001, 0x0007fff5, 0xffe70028, 0x0037ff98, 0xffa400d8, 0x0079fe7c, 0xff87026f, 0x0043fc6e, 0x004404da, 0xfecef9da, 0x0294074e, 0xfb99f7db, 0x06980881, 0xf6fef7be, 0x0b730759, 0xf251fa33, 0x0f7b03b8, 0xef5afeb8, 0x110d0000, }, { // 13.2 MHz 0xffff0000, 0x0008fffc, 0xffe00017, 0x004cffb9, 0xff7500a8, 0x00d1feb6, 0xfef90238, 0x0111fc91, 0xff3604df, 0x0012f99b, 0x012d07d2, 0xfd07f714, 0x0542097e, 0xf81ff6a4, 0x0a9a086e, 0xf2dbf94b, 0x0f380453, 0xef6cfe82, 0x110d0000, }, { // 13.3 MHz 0xffffffff, 0x00080003, 0xffde0001, 0x0056ffe3, 0xff570064, 0x0113ff10, 0xfe8201d2, 0x01cafcf0, 0xfe35049e, 0x0155f9a6, 0xffba080e, 0xfe8cf689, 0x03ce0a4e, 0xf961f5a8, 0x09a50971, 0xf379f869, 0x0eeb04ec, 0xef80fe4b, 0x110d0000, }, { // 13.4 MHz 0x0000fffe, 0x0007000a, 0xffe2ffec, 0x00540012, 0xff4e0015, 0x0137ff82, 0xfe2e0145, 0x0260fd86, 0xfd51041a, 0x0287f9fb, 0xfe4a0802, 0x001df63f, 0x02430aeb, 0xfabdf4ce, 0x08970a62, 0xf428f78f, 0x0e950584, 0xef97fe15, 0x110d0000, }, { // 13.5 MHz 0x0000fffd, 0x0004000f, 0xffeaffda, 0x0046003d, 0xff5affc4, 0x013b0000, 0xfe04009d, 0x02c8fe48, 0xfc99035a, 0x0397fa96, 0xfcec07ad, 0x01adf637, 0x00ac0b53, 0xfc2ef419, 0x07730b3e, 0xf4e9f6bd, 0x0e35061a, 0xefb1fddf, 0x110d0000, }, { // 13.6 MHz 0x0000fffd, 0x00000012, 0xfff6ffcd, 0x002f0061, 0xff7bff79, 0x011e007e, 0xfe08ffe8, 0x02f9ff28, 0xfc17026a, 0x0479fb70, 0xfbad0713, 0x032ff672, 0xff100b83, 0xfdaff38b, 0x063c0c04, 0xf5baf5f5, 0x0dcc06ae, 0xefcdfda8, 0x110d0000, }, { // 13.7 MHz 0x0000fffd, 0xfffd0012, 0x0004ffc8, 0x00100078, 0xffacff3e, 0x00e200f0, 0xfe39ff35, 0x02f10017, 0xfbd30156, 0x0521fc7f, 0xfa9c0638, 0x0499f6ee, 0xfd7a0b7c, 0xff39f325, 0x04f40cb3, 0xf69af537, 0x0d5a073f, 0xefecfd72, 0x110d0000, }, { // 13.8 MHz 0x0001fffe, 0xfffa000e, 0x0011ffcb, 0xfff0007f, 0xffe7ff19, 0x008f014a, 0xfe94fe93, 0x02b00105, 0xfbd3002f, 0x0585fdb7, 0xf9c10525, 0x05def7a8, 0xfbf20b3c, 0x00c7f2e9, 0x03a00d48, 0xf787f484, 0x0cdf07cd, 0xf00dfd3c, 0x110d0000, }, { // 13.9 MHz 0x00010000, 0xfff80008, 0x001bffd7, 0xffd10076, 0x0026ff0e, 0x002c0184, 0xff0ffe10, 0x023b01e0, 0xfc17ff06, 0x05a2ff09, 0xf92703e4, 0x06f4f89b, 0xfa820ac5, 0x0251f2d9, 0x02430dc3, 0xf881f3dc, 0x0c5c0859, 0xf031fd06, 0x110d0000, }, { // 14.0 MHz 0x00010001, 0xfff80001, 0x0021ffe8, 0xffba005d, 0x0060ff1f, 0xffc40198, 0xffa0fdb5, 0x019a029a, 0xfc99fdea, 0x05750067, 0xf8d4027f, 0x07d4f9c0, 0xf9320a1a, 0x03d2f2f3, 0x00df0e22, 0xf986f341, 0x0bd108e2, 0xf058fcd1, 0x110d0000, }, { // 14.1 MHz 0x00000002, 0xfff9fffa, 0x0021fffd, 0xffac0038, 0x008eff4a, 0xff630184, 0x003afd8b, 0x00da0326, 0xfd51fced, 0x050101c0, 0xf8cb0103, 0x0876fb10, 0xf80a093e, 0x0543f338, 0xff7a0e66, 0xfa94f2b2, 0x0b3f0967, 0xf081fc9b, 0x110d0000, }, { // 14.2 MHz 0x00000003, 0xfffbfff3, 0x001d0013, 0xffaa000b, 0x00aaff89, 0xff13014a, 0x00cefd95, 0x000a037b, 0xfe35fc1d, 0x044c0305, 0xf90cff7e, 0x08d5fc81, 0xf7100834, 0x069ff3a7, 0xfe160e8d, 0xfbaaf231, 0x0aa509e9, 0xf0adfc65, 0x110d0000, }, { // 14.3 MHz 0x00000003, 0xffffffef, 0x00140025, 0xffb4ffdd, 0x00b2ffd6, 0xfedb00f0, 0x0150fdd3, 0xff380391, 0xff36fb85, 0x035e0426, 0xf994fdfe, 0x08eefe0b, 0xf6490702, 0x07e1f43e, 0xfcb60e97, 0xfcc6f1be, 0x0a040a67, 0xf0dbfc30, 0x110d0000, }, { // 14.4 MHz 0x00000003, 0x0002ffee, 0x00070033, 0xffc9ffb4, 0x00a40027, 0xfec3007e, 0x01b4fe3f, 0xfe760369, 0x0044fb2e, 0x02450518, 0xfa5ffc90, 0x08c1ffa1, 0xf5bc05ae, 0x0902f4fc, 0xfb600e85, 0xfde7f15a, 0x095d0ae2, 0xf10cfbfb, 0x110d0000, }, { // 14.5 MHz 0xffff0002, 0x0005ffef, 0xfffa0038, 0xffe5ff95, 0x00820074, 0xfecc0000, 0x01f0fed0, 0xfdd20304, 0x014dfb1d, 0x010e05ce, 0xfb64fb41, 0x084e013b, 0xf569043e, 0x0a00f5dd, 0xfa150e55, 0xff0bf104, 0x08b00b59, 0xf13ffbc6, 0x110d0000, }, { // 14.6 MHz 0xffff0001, 0x0008fff4, 0xffed0035, 0x0005ff83, 0x005000b4, 0xfef6ff82, 0x01ffff7a, 0xfd580269, 0x0241fb53, 0xffca0640, 0xfc99fa1e, 0x079a02cb, 0xf55502ba, 0x0ad5f6e0, 0xf8d90e0a, 0x0031f0bd, 0x07fd0bcb, 0xf174fb91, 0x110d0000, }, { // 14.7 MHz 0xffffffff, 0x0009fffb, 0xffe4002a, 0x0025ff82, 0x001400e0, 0xff3cff10, 0x01e10030, 0xfd1201a4, 0x0311fbcd, 0xfe88066a, 0xfdf1f92f, 0x06aa0449, 0xf57e0128, 0x0b7ef801, 0xf7b00da2, 0x0156f086, 0x07450c39, 0xf1acfb5c, 0x110d0000, }, { // 14.8 MHz 0x0000fffe, 0x00080002, 0xffdf0019, 0x003fff92, 0xffd600f1, 0xff96feb6, 0x019700e1, 0xfd0500c2, 0x03b0fc84, 0xfd590649, 0xff5df87f, 0x058505aa, 0xf5e4ff91, 0x0bf9f93c, 0xf69d0d20, 0x0279f05e, 0x06880ca3, 0xf1e6fb28, 0x110d0000, }, { // 14.9 MHz 0x0000fffd, 0x00060009, 0xffdf0004, 0x0051ffb0, 0xff9d00e8, 0xfffcfe7c, 0x01280180, 0xfd32ffd2, 0x0413fd6e, 0xfc4d05df, 0x00d1f812, 0x043506e4, 0xf685fdfb, 0x0c43fa8d, 0xf5a10c83, 0x0399f046, 0x05c70d08, 0xf222faf3, 0x110d0000, }, { // 15.0 MHz 0x0000fffd, 0x0003000f, 0xffe5ffef, 0x0057ffd9, 0xff7000c4, 0x0062fe68, 0x009e01ff, 0xfd95fee6, 0x0435fe7d, 0xfb710530, 0x023cf7ee, 0x02c307ef, 0xf75efc70, 0x0c5cfbef, 0xf4c10bce, 0x04b3f03f, 0x05030d69, 0xf261fabf, 0x110d0000, }, { // 15.1 MHz 0x0000fffd, 0xffff0012, 0xffefffdc, 0x00510006, 0xff540089, 0x00befe7c, 0x00060253, 0xfe27fe0d, 0x0413ffa2, 0xfad10446, 0x0390f812, 0x013b08c3, 0xf868faf6, 0x0c43fd5f, 0xf3fd0b02, 0x05c7f046, 0x043b0dc4, 0xf2a1fa8b, 0x110d0000, }, { // 15.2 MHz 0x0001fffe, 0xfffc0012, 0xfffbffce, 0x003f0033, 0xff4e003f, 0x0106feb6, 0xff6e0276, 0xfeddfd56, 0x03b000cc, 0xfa740329, 0x04bff87f, 0xffaa095d, 0xf99ef995, 0x0bf9fed8, 0xf3590a1f, 0x06d2f05e, 0x03700e1b, 0xf2e4fa58, 0x110d0000, }, { // 15.3 MHz 0x0001ffff, 0xfff9000f, 0x0009ffc8, 0x00250059, 0xff5effee, 0x0132ff10, 0xfee30265, 0xffaafccf, 0x031101eb, 0xfa6001e8, 0x05bdf92f, 0xfe1b09b6, 0xfafaf852, 0x0b7e0055, 0xf2d50929, 0x07d3f086, 0x02a30e6c, 0xf329fa24, 0x110d0000, }, { // 15.4 MHz 0x00010001, 0xfff80009, 0x0015ffca, 0x00050074, 0xff81ff9f, 0x013dff82, 0xfe710221, 0x007cfc80, 0x024102ed, 0xfa940090, 0x0680fa1e, 0xfc9b09cd, 0xfc73f736, 0x0ad501d0, 0xf2740820, 0x08c9f0bd, 0x01d40eb9, 0xf371f9f1, 0x110d0000, }, { // 15.5 MHz 0x00000002, 0xfff80002, 0x001effd5, 0xffe5007f, 0xffb4ff5b, 0x01280000, 0xfe2401b0, 0x0146fc70, 0x014d03c6, 0xfb10ff32, 0x0701fb41, 0xfb3709a1, 0xfe00f644, 0x0a000345, 0xf2350708, 0x09b2f104, 0x01050eff, 0xf3baf9be, 0x110d0000, }, { // 15.6 MHz 0x00000003, 0xfff9fffb, 0x0022ffe6, 0xffc9007a, 0xfff0ff29, 0x00f2007e, 0xfe01011b, 0x01f6fc9e, 0x00440467, 0xfbccfdde, 0x0738fc90, 0xf9f70934, 0xff99f582, 0x090204b0, 0xf21a05e1, 0x0a8df15a, 0x00340f41, 0xf405f98b, 0x110d0000, }, { // 15.7 MHz 0x00000003, 0xfffcfff4, 0x0020fffa, 0xffb40064, 0x002fff11, 0x00a400f0, 0xfe0d006e, 0x0281fd09, 0xff3604c9, 0xfcbffca2, 0x0726fdfe, 0xf8e80888, 0x0134f4f3, 0x07e1060c, 0xf22304af, 0x0b59f1be, 0xff640f7d, 0xf452f959, 0x110d0000, }, { // 15.8 MHz 0x00000003, 0x0000fff0, 0x001a0010, 0xffaa0041, 0x0067ff13, 0x0043014a, 0xfe46ffb9, 0x02dbfda8, 0xfe3504e5, 0xfddcfb8d, 0x06c9ff7e, 0xf81107a2, 0x02c9f49a, 0x069f0753, 0xf2500373, 0x0c14f231, 0xfe930fb3, 0xf4a1f927, 0x110d0000, }, { // 15.9 MHz 0xffff0002, 0x0003ffee, 0x000f0023, 0xffac0016, 0x0093ff31, 0xffdc0184, 0xfea6ff09, 0x02fdfe70, 0xfd5104ba, 0xff15faac, 0x06270103, 0xf7780688, 0x044df479, 0x05430883, 0xf2a00231, 0x0cbef2b2, 0xfdc40fe3, 0xf4f2f8f5, 0x110d0000, }, { // 16.0 MHz 0xffff0001, 0x0006ffef, 0x00020031, 0xffbaffe8, 0x00adff66, 0xff790198, 0xff26fe6e, 0x02e5ff55, 0xfc99044a, 0x005bfa09, 0x0545027f, 0xf7230541, 0x05b8f490, 0x03d20997, 0xf31300eb, 0x0d55f341, 0xfcf6100e, 0xf544f8c3, 0x110d0000, } }; static void cx23885_dif_setup(struct i2c_client *client, u32 ifHz) { u64 pll_freq; u32 pll_freq_word; const u32 *coeffs; v4l_dbg(1, cx25840_debug, client, "%s(%d)\n", __func__, ifHz); /* Assuming TV */ /* Calculate the PLL frequency word based on the adjusted ifHz */ pll_freq = div_u64((u64)ifHz * 268435456, 50000000); pll_freq_word = (u32)pll_freq; cx25840_write4(client, DIF_PLL_FREQ_WORD, pll_freq_word); /* Round down to the nearest 100KHz */ ifHz = (ifHz / 100000) * 100000; if (ifHz < 3000000) ifHz = 3000000; if (ifHz > 16000000) ifHz = 16000000; v4l_dbg(1, cx25840_debug, client, "%s(%d) again\n", __func__, ifHz); coeffs = ifhz_coeffs[(ifHz - 3000000) / 100000]; cx25840_write4(client, DIF_BPF_COEFF01, coeffs[0]); cx25840_write4(client, DIF_BPF_COEFF23, coeffs[1]); cx25840_write4(client, DIF_BPF_COEFF45, coeffs[2]); cx25840_write4(client, DIF_BPF_COEFF67, coeffs[3]); cx25840_write4(client, DIF_BPF_COEFF89, coeffs[4]); cx25840_write4(client, DIF_BPF_COEFF1011, coeffs[5]); cx25840_write4(client, DIF_BPF_COEFF1213, coeffs[6]); cx25840_write4(client, DIF_BPF_COEFF1415, coeffs[7]); cx25840_write4(client, DIF_BPF_COEFF1617, coeffs[8]); cx25840_write4(client, DIF_BPF_COEFF1819, coeffs[9]); cx25840_write4(client, DIF_BPF_COEFF2021, coeffs[10]); cx25840_write4(client, DIF_BPF_COEFF2223, coeffs[11]); cx25840_write4(client, DIF_BPF_COEFF2425, coeffs[12]); cx25840_write4(client, DIF_BPF_COEFF2627, coeffs[13]); cx25840_write4(client, DIF_BPF_COEFF2829, coeffs[14]); cx25840_write4(client, DIF_BPF_COEFF3031, coeffs[15]); cx25840_write4(client, DIF_BPF_COEFF3233, coeffs[16]); cx25840_write4(client, DIF_BPF_COEFF3435, coeffs[17]); cx25840_write4(client, DIF_BPF_COEFF36, coeffs[18]); } static void cx23888_std_setup(struct i2c_client *client) { struct cx25840_state *state = to_state(i2c_get_clientdata(client)); v4l2_std_id std = state->std; u32 ifHz; cx25840_write4(client, 0x478, 0x6628021F); cx25840_write4(client, 0x400, 0x0); cx25840_write4(client, 0x4b4, 0x20524030); cx25840_write4(client, 0x47c, 0x010a8263); if (std & V4L2_STD_525_60) { v4l_dbg(1, cx25840_debug, client, "%s() Selecting NTSC", __func__); /* Horiz / vert timing */ cx25840_write4(client, 0x428, 0x1e1e601a); cx25840_write4(client, 0x424, 0x5b2d007a); /* DIF NTSC */ cx25840_write4(client, 0x304, 0x6503bc0c); cx25840_write4(client, 0x308, 0xbd038c85); cx25840_write4(client, 0x30c, 0x1db4640a); cx25840_write4(client, 0x310, 0x00008800); cx25840_write4(client, 0x314, 0x44400400); cx25840_write4(client, 0x32c, 0x0c800800); cx25840_write4(client, 0x330, 0x27000100); cx25840_write4(client, 0x334, 0x1f296e1f); cx25840_write4(client, 0x338, 0x009f50c1); cx25840_write4(client, 0x340, 0x1befbf06); cx25840_write4(client, 0x344, 0x000035e8); /* DIF I/F */ ifHz = 5400000; } else { v4l_dbg(1, cx25840_debug, client, "%s() Selecting PAL-BG", __func__); /* Horiz / vert timing */ cx25840_write4(client, 0x428, 0x28244024); cx25840_write4(client, 0x424, 0x5d2d0084); /* DIF */ cx25840_write4(client, 0x304, 0x6503bc0c); cx25840_write4(client, 0x308, 0xbd038c85); cx25840_write4(client, 0x30c, 0x1db4640a); cx25840_write4(client, 0x310, 0x00008800); cx25840_write4(client, 0x314, 0x44400600); cx25840_write4(client, 0x32c, 0x0c800800); cx25840_write4(client, 0x330, 0x27000100); cx25840_write4(client, 0x334, 0x213530ec); cx25840_write4(client, 0x338, 0x00a65ba8); cx25840_write4(client, 0x340, 0x1befbf06); cx25840_write4(client, 0x344, 0x000035e8); /* DIF I/F */ ifHz = 6000000; } cx23885_dif_setup(client, ifHz); /* Explicitly ensure the inputs are reconfigured after * a standard change. */ set_input(client, state->vid_input, state->aud_input); } /* ----------------------------------------------------------------------- */ static const struct v4l2_ctrl_ops cx25840_ctrl_ops = { .s_ctrl = cx25840_s_ctrl, }; static const struct v4l2_subdev_core_ops cx25840_core_ops = { .log_status = cx25840_log_status, .reset = cx25840_reset, /* calling the (optional) init op will turn on the generic mode */ .init = cx25840_init, .load_fw = cx25840_load_fw, .s_io_pin_config = common_s_io_pin_config, #ifdef CONFIG_VIDEO_ADV_DEBUG .g_register = cx25840_g_register, .s_register = cx25840_s_register, #endif .interrupt_service_routine = cx25840_irq_handler, }; static const struct v4l2_subdev_tuner_ops cx25840_tuner_ops = { .s_frequency = cx25840_s_frequency, .s_radio = cx25840_s_radio, .g_tuner = cx25840_g_tuner, .s_tuner = cx25840_s_tuner, }; static const struct v4l2_subdev_audio_ops cx25840_audio_ops = { .s_clock_freq = cx25840_s_clock_freq, .s_routing = cx25840_s_audio_routing, .s_stream = cx25840_s_audio_stream, }; static const struct v4l2_subdev_video_ops cx25840_video_ops = { .g_std = cx25840_g_std, .s_std = cx25840_s_std, .querystd = cx25840_querystd, .s_routing = cx25840_s_video_routing, .s_stream = cx25840_s_stream, .g_input_status = cx25840_g_input_status, }; static const struct v4l2_subdev_vbi_ops cx25840_vbi_ops = { .decode_vbi_line = cx25840_decode_vbi_line, .s_raw_fmt = cx25840_s_raw_fmt, .s_sliced_fmt = cx25840_s_sliced_fmt, .g_sliced_fmt = cx25840_g_sliced_fmt, }; static const struct v4l2_subdev_pad_ops cx25840_pad_ops = { .set_fmt = cx25840_set_fmt, }; static const struct v4l2_subdev_ops cx25840_ops = { .core = &cx25840_core_ops, .tuner = &cx25840_tuner_ops, .audio = &cx25840_audio_ops, .video = &cx25840_video_ops, .vbi = &cx25840_vbi_ops, .pad = &cx25840_pad_ops, .ir = &cx25840_ir_ops, }; /* ----------------------------------------------------------------------- */ static u32 get_cx2388x_ident(struct i2c_client *client) { u32 ret; /* Come out of digital power down */ cx25840_write(client, 0x000, 0); /* * Detecting whether the part is cx23885/7/8 is more * difficult than it needs to be. No ID register. Instead we * probe certain registers indicated in the datasheets to look * for specific defaults that differ between the silicon designs. */ /* It's either 885/7 if the IR Tx Clk Divider register exists */ if (cx25840_read4(client, 0x204) & 0xffff) { /* * CX23885 returns bogus repetitive byte values for the DIF, * which doesn't exist for it. (Ex. 8a8a8a8a or 31313131) */ ret = cx25840_read4(client, 0x300); if (((ret & 0xffff0000) >> 16) == (ret & 0xffff)) { /* No DIF */ ret = CX23885_AV; } else { /* * CX23887 has a broken DIF, but the registers * appear valid (but unused), good enough to detect. */ ret = CX23887_AV; } } else if (cx25840_read4(client, 0x300) & 0x0fffffff) { /* DIF PLL Freq Word reg exists; chip must be a CX23888 */ ret = CX23888_AV; } else { v4l_err(client, "Unable to detect h/w, assuming cx23887\n"); ret = CX23887_AV; } /* Back into digital power down */ cx25840_write(client, 0x000, 2); return ret; } static int cx25840_probe(struct i2c_client *client) { struct cx25840_state *state; struct v4l2_subdev *sd; int default_volume; u32 id; u16 device_id; #if defined(CONFIG_MEDIA_CONTROLLER) int ret; #endif /* Check if the adapter supports the needed features */ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE_DATA)) return -EIO; v4l_dbg(1, cx25840_debug, client, "detecting cx25840 client on address 0x%x\n", client->addr << 1); device_id = cx25840_read(client, 0x101) << 8; device_id |= cx25840_read(client, 0x100); v4l_dbg(1, cx25840_debug, client, "device_id = 0x%04x\n", device_id); /* * The high byte of the device ID should be * 0x83 for the cx2583x and 0x84 for the cx2584x */ if ((device_id & 0xff00) == 0x8300) { id = CX25836 + ((device_id >> 4) & 0xf) - 6; } else if ((device_id & 0xff00) == 0x8400) { id = CX25840 + ((device_id >> 4) & 0xf); } else if (device_id == 0x0000) { id = get_cx2388x_ident(client); } else if ((device_id & 0xfff0) == 0x5A30) { /* The CX23100 (0x5A3C = 23100) doesn't have an A/V decoder */ id = CX2310X_AV; } else if ((device_id & 0xff) == (device_id >> 8)) { v4l_err(client, "likely a confused/unresponsive cx2388[578] A/V decoder found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); v4l_err(client, "A method to reset it from the cx25840 driver software is not known at this time\n"); return -ENODEV; } else { v4l_dbg(1, cx25840_debug, client, "cx25840 not found\n"); return -ENODEV; } state = devm_kzalloc(&client->dev, sizeof(*state), GFP_KERNEL); if (!state) return -ENOMEM; sd = &state->sd; v4l2_i2c_subdev_init(sd, client, &cx25840_ops); #if defined(CONFIG_MEDIA_CONTROLLER) /* * TODO: add media controller support for analog video inputs like * composite, svideo, etc. * A real input pad for this analog demod would be like: * ___________ * TUNER --------> | | * | | * SVIDEO .......> | cx25840 | * | | * COMPOSITE1 ...> |_________| * * However, at least for now, there's no much gain on modelling * those extra inputs. So, let's add it only when needed. */ state->pads[CX25840_PAD_INPUT].flags = MEDIA_PAD_FL_SINK; state->pads[CX25840_PAD_INPUT].sig_type = PAD_SIGNAL_ANALOG; state->pads[CX25840_PAD_VID_OUT].flags = MEDIA_PAD_FL_SOURCE; state->pads[CX25840_PAD_VID_OUT].sig_type = PAD_SIGNAL_DV; sd->entity.function = MEDIA_ENT_F_ATV_DECODER; ret = media_entity_pads_init(&sd->entity, ARRAY_SIZE(state->pads), state->pads); if (ret < 0) { v4l_info(client, "failed to initialize media entity!\n"); return ret; } #endif switch (id) { case CX23885_AV: v4l_info(client, "cx23885 A/V decoder found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); break; case CX23887_AV: v4l_info(client, "cx23887 A/V decoder found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); break; case CX23888_AV: v4l_info(client, "cx23888 A/V decoder found @ 0x%x (%s)\n", client->addr << 1, client->adapter->name); break; case CX2310X_AV: v4l_info(client, "cx%d A/V decoder found @ 0x%x (%s)\n", device_id, client->addr << 1, client->adapter->name); break; case CX25840: case CX25841: case CX25842: case CX25843: /* * Note: revision '(device_id & 0x0f) == 2' was never built. * The marking skips from 0x1 == 22 to 0x3 == 23. */ v4l_info(client, "cx25%3x-2%x found @ 0x%x (%s)\n", (device_id & 0xfff0) >> 4, (device_id & 0x0f) < 3 ? (device_id & 0x0f) + 1 : (device_id & 0x0f), client->addr << 1, client->adapter->name); break; case CX25836: case CX25837: default: v4l_info(client, "cx25%3x-%x found @ 0x%x (%s)\n", (device_id & 0xfff0) >> 4, device_id & 0x0f, client->addr << 1, client->adapter->name); break; } state->c = client; state->vid_input = CX25840_COMPOSITE7; state->aud_input = CX25840_AUDIO8; state->audclk_freq = 48000; state->audmode = V4L2_TUNER_MODE_LANG1; state->vbi_line_offset = 8; state->id = id; state->rev = device_id; state->vbi_regs_offset = id == CX23888_AV ? 0x500 - 0x424 : 0; state->std = V4L2_STD_NTSC_M; v4l2_ctrl_handler_init(&state->hdl, 9); v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops, V4L2_CID_BRIGHTNESS, 0, 255, 1, 128); v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops, V4L2_CID_CONTRAST, 0, 127, 1, 64); v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops, V4L2_CID_SATURATION, 0, 127, 1, 64); v4l2_ctrl_new_std(&state->hdl, &cx25840_ctrl_ops, V4L2_CID_HUE, -128, 127, 1, 0); if (!is_cx2583x(state)) { default_volume = cx25840_read(client, 0x8d4); /* * Enforce the legacy PVR-350/MSP3400 to PVR-150/CX25843 volume * scale mapping limits to avoid -ERANGE errors when * initializing the volume control */ if (default_volume > 228) { /* Bottom out at -96 dB, v4l2 vol range 0x2e00-0x2fff */ default_volume = 228; cx25840_write(client, 0x8d4, 228); } else if (default_volume < 20) { /* Top out at + 8 dB, v4l2 vol range 0xfe00-0xffff */ default_volume = 20; cx25840_write(client, 0x8d4, 20); } default_volume = (((228 - default_volume) >> 1) + 23) << 9; state->volume = v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_VOLUME, 0, 65535, 65535 / 100, default_volume); state->mute = v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_MUTE, 0, 1, 1, 0); v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_BALANCE, 0, 65535, 65535 / 100, 32768); v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_BASS, 0, 65535, 65535 / 100, 32768); v4l2_ctrl_new_std(&state->hdl, &cx25840_audio_ctrl_ops, V4L2_CID_AUDIO_TREBLE, 0, 65535, 65535 / 100, 32768); } sd->ctrl_handler = &state->hdl; if (state->hdl.error) { int err = state->hdl.error; v4l2_ctrl_handler_free(&state->hdl); return err; } if (!is_cx2583x(state)) v4l2_ctrl_cluster(2, &state->volume); v4l2_ctrl_handler_setup(&state->hdl); if (client->dev.platform_data) { struct cx25840_platform_data *pdata = client->dev.platform_data; state->pvr150_workaround = pdata->pvr150_workaround; } cx25840_ir_probe(sd); return 0; } static void cx25840_remove(struct i2c_client *client) { struct v4l2_subdev *sd = i2c_get_clientdata(client); struct cx25840_state *state = to_state(sd); cx25840_ir_remove(sd); v4l2_device_unregister_subdev(sd); v4l2_ctrl_handler_free(&state->hdl); } static const struct i2c_device_id cx25840_id[] = { { "cx25840" }, { } }; MODULE_DEVICE_TABLE(i2c, cx25840_id); static struct i2c_driver cx25840_driver = { .driver = { .name = "cx25840", }, .probe = cx25840_probe, .remove = cx25840_remove, .id_table = cx25840_id, }; module_i2c_driver(cx25840_driver);
14 4 11 15 2 11 12 12 11 2 2 2 4 14 6 4 5 5 5 5 5 5 5 5 4 4 4 4 2 10 8 10 10 4 6 6 4 6 870 868 3 8 6 6 6 7 1 1 5 6 8 2 2 4 149 1 4 1 4 138 2 6 5 12 12 2 11 4 9 12 3 3 3 3 3 229 226 7 89 88 8 1 10 2 8 1 2 6 7 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 // SPDX-License-Identifier: GPL-2.0-only /* * Memory merging support. * * This code enables dynamic sharing of identical pages found in different * memory areas, even if they are not shared by fork() * * Copyright (C) 2008-2009 Red Hat, Inc. * Authors: * Izik Eidus * Andrea Arcangeli * Chris Wright * Hugh Dickins */ #include <linux/errno.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/fs.h> #include <linux/mman.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/sched/cputime.h> #include <linux/rwsem.h> #include <linux/pagemap.h> #include <linux/rmap.h> #include <linux/spinlock.h> #include <linux/xxhash.h> #include <linux/delay.h> #include <linux/kthread.h> #include <linux/wait.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/memory.h> #include <linux/mmu_notifier.h> #include <linux/swap.h> #include <linux/ksm.h> #include <linux/hashtable.h> #include <linux/freezer.h> #include <linux/oom.h> #include <linux/numa.h> #include <linux/pagewalk.h> #include <asm/tlbflush.h> #include "internal.h" #include "mm_slot.h" #define CREATE_TRACE_POINTS #include <trace/events/ksm.h> #ifdef CONFIG_NUMA #define NUMA(x) (x) #define DO_NUMA(x) do { (x); } while (0) #else #define NUMA(x) (0) #define DO_NUMA(x) do { } while (0) #endif typedef u8 rmap_age_t; /** * DOC: Overview * * A few notes about the KSM scanning process, * to make it easier to understand the data structures below: * * In order to reduce excessive scanning, KSM sorts the memory pages by their * contents into a data structure that holds pointers to the pages' locations. * * Since the contents of the pages may change at any moment, KSM cannot just * insert the pages into a normal sorted tree and expect it to find anything. * Therefore KSM uses two data structures - the stable and the unstable tree. * * The stable tree holds pointers to all the merged pages (ksm pages), sorted * by their contents. Because each such page is write-protected, searching on * this tree is fully assured to be working (except when pages are unmapped), * and therefore this tree is called the stable tree. * * The stable tree node includes information required for reverse * mapping from a KSM page to virtual addresses that map this page. * * In order to avoid large latencies of the rmap walks on KSM pages, * KSM maintains two types of nodes in the stable tree: * * * the regular nodes that keep the reverse mapping structures in a * linked list * * the "chains" that link nodes ("dups") that represent the same * write protected memory content, but each "dup" corresponds to a * different KSM page copy of that content * * Internally, the regular nodes, "dups" and "chains" are represented * using the same struct ksm_stable_node structure. * * In addition to the stable tree, KSM uses a second data structure called the * unstable tree: this tree holds pointers to pages which have been found to * be "unchanged for a period of time". The unstable tree sorts these pages * by their contents, but since they are not write-protected, KSM cannot rely * upon the unstable tree to work correctly - the unstable tree is liable to * be corrupted as its contents are modified, and so it is called unstable. * * KSM solves this problem by several techniques: * * 1) The unstable tree is flushed every time KSM completes scanning all * memory areas, and then the tree is rebuilt again from the beginning. * 2) KSM will only insert into the unstable tree, pages whose hash value * has not changed since the previous scan of all memory areas. * 3) The unstable tree is a RedBlack Tree - so its balancing is based on the * colors of the nodes and not on their contents, assuring that even when * the tree gets "corrupted" it won't get out of balance, so scanning time * remains the same (also, searching and inserting nodes in an rbtree uses * the same algorithm, so we have no overhead when we flush and rebuild). * 4) KSM never flushes the stable tree, which means that even if it were to * take 10 attempts to find a page in the unstable tree, once it is found, * it is secured in the stable tree. (When we scan a new page, we first * compare it against the stable tree, and then against the unstable tree.) * * If the merge_across_nodes tunable is unset, then KSM maintains multiple * stable trees and multiple unstable trees: one of each for each NUMA node. */ /** * struct ksm_mm_slot - ksm information per mm that is being scanned * @slot: hash lookup from mm to mm_slot * @rmap_list: head for this mm_slot's singly-linked list of rmap_items */ struct ksm_mm_slot { struct mm_slot slot; struct ksm_rmap_item *rmap_list; }; /** * struct ksm_scan - cursor for scanning * @mm_slot: the current mm_slot we are scanning * @address: the next address inside that to be scanned * @rmap_list: link to the next rmap to be scanned in the rmap_list * @seqnr: count of completed full scans (needed when removing unstable node) * * There is only the one ksm_scan instance of this cursor structure. */ struct ksm_scan { struct ksm_mm_slot *mm_slot; unsigned long address; struct ksm_rmap_item **rmap_list; unsigned long seqnr; }; /** * struct ksm_stable_node - node of the stable rbtree * @node: rb node of this ksm page in the stable tree * @head: (overlaying parent) &migrate_nodes indicates temporarily on that list * @hlist_dup: linked into the stable_node->hlist with a stable_node chain * @list: linked into migrate_nodes, pending placement in the proper node tree * @hlist: hlist head of rmap_items using this ksm page * @kpfn: page frame number of this ksm page (perhaps temporarily on wrong nid) * @chain_prune_time: time of the last full garbage collection * @rmap_hlist_len: number of rmap_item entries in hlist or STABLE_NODE_CHAIN * @nid: NUMA node id of stable tree in which linked (may not match kpfn) */ struct ksm_stable_node { union { struct rb_node node; /* when node of stable tree */ struct { /* when listed for migration */ struct list_head *head; struct { struct hlist_node hlist_dup; struct list_head list; }; }; }; struct hlist_head hlist; union { unsigned long kpfn; unsigned long chain_prune_time; }; /* * STABLE_NODE_CHAIN can be any negative number in * rmap_hlist_len negative range, but better not -1 to be able * to reliably detect underflows. */ #define STABLE_NODE_CHAIN -1024 int rmap_hlist_len; #ifdef CONFIG_NUMA int nid; #endif }; /** * struct ksm_rmap_item - reverse mapping item for virtual addresses * @rmap_list: next rmap_item in mm_slot's singly-linked rmap_list * @anon_vma: pointer to anon_vma for this mm,address, when in stable tree * @nid: NUMA node id of unstable tree in which linked (may not match page) * @mm: the memory structure this rmap_item is pointing into * @address: the virtual address this rmap_item tracks (+ flags in low bits) * @oldchecksum: previous checksum of the page at that virtual address * @node: rb node of this rmap_item in the unstable tree * @head: pointer to stable_node heading this list in the stable tree * @hlist: link into hlist of rmap_items hanging off that stable_node * @age: number of scan iterations since creation * @remaining_skips: how many scans to skip */ struct ksm_rmap_item { struct ksm_rmap_item *rmap_list; union { struct anon_vma *anon_vma; /* when stable */ #ifdef CONFIG_NUMA int nid; /* when node of unstable tree */ #endif }; struct mm_struct *mm; unsigned long address; /* + low bits used for flags below */ unsigned int oldchecksum; /* when unstable */ rmap_age_t age; rmap_age_t remaining_skips; union { struct rb_node node; /* when node of unstable tree */ struct { /* when listed from stable tree */ struct ksm_stable_node *head; struct hlist_node hlist; }; }; }; #define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */ #define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */ #define STABLE_FLAG 0x200 /* is listed from the stable tree */ /* The stable and unstable tree heads */ static struct rb_root one_stable_tree[1] = { RB_ROOT }; static struct rb_root one_unstable_tree[1] = { RB_ROOT }; static struct rb_root *root_stable_tree = one_stable_tree; static struct rb_root *root_unstable_tree = one_unstable_tree; /* Recently migrated nodes of stable tree, pending proper placement */ static LIST_HEAD(migrate_nodes); #define STABLE_NODE_DUP_HEAD ((struct list_head *)&migrate_nodes.prev) #define MM_SLOTS_HASH_BITS 10 static DEFINE_HASHTABLE(mm_slots_hash, MM_SLOTS_HASH_BITS); static struct ksm_mm_slot ksm_mm_head = { .slot.mm_node = LIST_HEAD_INIT(ksm_mm_head.slot.mm_node), }; static struct ksm_scan ksm_scan = { .mm_slot = &ksm_mm_head, }; static struct kmem_cache *rmap_item_cache; static struct kmem_cache *stable_node_cache; static struct kmem_cache *mm_slot_cache; /* Default number of pages to scan per batch */ #define DEFAULT_PAGES_TO_SCAN 100 /* The number of pages scanned */ static unsigned long ksm_pages_scanned; /* The number of nodes in the stable tree */ static unsigned long ksm_pages_shared; /* The number of page slots additionally sharing those nodes */ static unsigned long ksm_pages_sharing; /* The number of nodes in the unstable tree */ static unsigned long ksm_pages_unshared; /* The number of rmap_items in use: to calculate pages_volatile */ static unsigned long ksm_rmap_items; /* The number of stable_node chains */ static unsigned long ksm_stable_node_chains; /* The number of stable_node dups linked to the stable_node chains */ static unsigned long ksm_stable_node_dups; /* Delay in pruning stale stable_node_dups in the stable_node_chains */ static unsigned int ksm_stable_node_chains_prune_millisecs = 2000; /* Maximum number of page slots sharing a stable node */ static int ksm_max_page_sharing = 256; /* Number of pages ksmd should scan in one batch */ static unsigned int ksm_thread_pages_to_scan = DEFAULT_PAGES_TO_SCAN; /* Milliseconds ksmd should sleep between batches */ static unsigned int ksm_thread_sleep_millisecs = 20; /* Checksum of an empty (zeroed) page */ static unsigned int zero_checksum __read_mostly; /* Whether to merge empty (zeroed) pages with actual zero pages */ static bool ksm_use_zero_pages __read_mostly; /* Skip pages that couldn't be de-duplicated previously */ /* Default to true at least temporarily, for testing */ static bool ksm_smart_scan = true; /* The number of zero pages which is placed by KSM */ atomic_long_t ksm_zero_pages = ATOMIC_LONG_INIT(0); /* The number of pages that have been skipped due to "smart scanning" */ static unsigned long ksm_pages_skipped; /* Don't scan more than max pages per batch. */ static unsigned long ksm_advisor_max_pages_to_scan = 30000; /* Min CPU for scanning pages per scan */ #define KSM_ADVISOR_MIN_CPU 10 /* Max CPU for scanning pages per scan */ static unsigned int ksm_advisor_max_cpu = 70; /* Target scan time in seconds to analyze all KSM candidate pages. */ static unsigned long ksm_advisor_target_scan_time = 200; /* Exponentially weighted moving average. */ #define EWMA_WEIGHT 30 /** * struct advisor_ctx - metadata for KSM advisor * @start_scan: start time of the current scan * @scan_time: scan time of previous scan * @change: change in percent to pages_to_scan parameter * @cpu_time: cpu time consumed by the ksmd thread in the previous scan */ struct advisor_ctx { ktime_t start_scan; unsigned long scan_time; unsigned long change; unsigned long long cpu_time; }; static struct advisor_ctx advisor_ctx; /* Define different advisor's */ enum ksm_advisor_type { KSM_ADVISOR_NONE, KSM_ADVISOR_SCAN_TIME, }; static enum ksm_advisor_type ksm_advisor; #ifdef CONFIG_SYSFS /* * Only called through the sysfs control interface: */ /* At least scan this many pages per batch. */ static unsigned long ksm_advisor_min_pages_to_scan = 500; static void set_advisor_defaults(void) { if (ksm_advisor == KSM_ADVISOR_NONE) { ksm_thread_pages_to_scan = DEFAULT_PAGES_TO_SCAN; } else if (ksm_advisor == KSM_ADVISOR_SCAN_TIME) { advisor_ctx = (const struct advisor_ctx){ 0 }; ksm_thread_pages_to_scan = ksm_advisor_min_pages_to_scan; } } #endif /* CONFIG_SYSFS */ static inline void advisor_start_scan(void) { if (ksm_advisor == KSM_ADVISOR_SCAN_TIME) advisor_ctx.start_scan = ktime_get(); } /* * Use previous scan time if available, otherwise use current scan time as an * approximation for the previous scan time. */ static inline unsigned long prev_scan_time(struct advisor_ctx *ctx, unsigned long scan_time) { return ctx->scan_time ? ctx->scan_time : scan_time; } /* Calculate exponential weighted moving average */ static unsigned long ewma(unsigned long prev, unsigned long curr) { return ((100 - EWMA_WEIGHT) * prev + EWMA_WEIGHT * curr) / 100; } /* * The scan time advisor is based on the current scan rate and the target * scan rate. * * new_pages_to_scan = pages_to_scan * (scan_time / target_scan_time) * * To avoid perturbations it calculates a change factor of previous changes. * A new change factor is calculated for each iteration and it uses an * exponentially weighted moving average. The new pages_to_scan value is * multiplied with that change factor: * * new_pages_to_scan *= change facor * * The new_pages_to_scan value is limited by the cpu min and max values. It * calculates the cpu percent for the last scan and calculates the new * estimated cpu percent cost for the next scan. That value is capped by the * cpu min and max setting. * * In addition the new pages_to_scan value is capped by the max and min * limits. */ static void scan_time_advisor(void) { unsigned int cpu_percent; unsigned long cpu_time; unsigned long cpu_time_diff; unsigned long cpu_time_diff_ms; unsigned long pages; unsigned long per_page_cost; unsigned long factor; unsigned long change; unsigned long last_scan_time; unsigned long scan_time; /* Convert scan time to seconds */ scan_time = div_s64(ktime_ms_delta(ktime_get(), advisor_ctx.start_scan), MSEC_PER_SEC); scan_time = scan_time ? scan_time : 1; /* Calculate CPU consumption of ksmd background thread */ cpu_time = task_sched_runtime(current); cpu_time_diff = cpu_time - advisor_ctx.cpu_time; cpu_time_diff_ms = cpu_time_diff / 1000 / 1000; cpu_percent = (cpu_time_diff_ms * 100) / (scan_time * 1000); cpu_percent = cpu_percent ? cpu_percent : 1; last_scan_time = prev_scan_time(&advisor_ctx, scan_time); /* Calculate scan time as percentage of target scan time */ factor = ksm_advisor_target_scan_time * 100 / scan_time; factor = factor ? factor : 1; /* * Calculate scan time as percentage of last scan time and use * exponentially weighted average to smooth it */ change = scan_time * 100 / last_scan_time; change = change ? change : 1; change = ewma(advisor_ctx.change, change); /* Calculate new scan rate based on target scan rate. */ pages = ksm_thread_pages_to_scan * 100 / factor; /* Update pages_to_scan by weighted change percentage. */ pages = pages * change / 100; /* Cap new pages_to_scan value */ per_page_cost = ksm_thread_pages_to_scan / cpu_percent; per_page_cost = per_page_cost ? per_page_cost : 1; pages = min(pages, per_page_cost * ksm_advisor_max_cpu); pages = max(pages, per_page_cost * KSM_ADVISOR_MIN_CPU); pages = min(pages, ksm_advisor_max_pages_to_scan); /* Update advisor context */ advisor_ctx.change = change; advisor_ctx.scan_time = scan_time; advisor_ctx.cpu_time = cpu_time; ksm_thread_pages_to_scan = pages; trace_ksm_advisor(scan_time, pages, cpu_percent); } static void advisor_stop_scan(void) { if (ksm_advisor == KSM_ADVISOR_SCAN_TIME) scan_time_advisor(); } #ifdef CONFIG_NUMA /* Zeroed when merging across nodes is not allowed */ static unsigned int ksm_merge_across_nodes = 1; static int ksm_nr_node_ids = 1; #else #define ksm_merge_across_nodes 1U #define ksm_nr_node_ids 1 #endif #define KSM_RUN_STOP 0 #define KSM_RUN_MERGE 1 #define KSM_RUN_UNMERGE 2 #define KSM_RUN_OFFLINE 4 static unsigned long ksm_run = KSM_RUN_STOP; static void wait_while_offlining(void); static DECLARE_WAIT_QUEUE_HEAD(ksm_thread_wait); static DECLARE_WAIT_QUEUE_HEAD(ksm_iter_wait); static DEFINE_MUTEX(ksm_thread_mutex); static DEFINE_SPINLOCK(ksm_mmlist_lock); static int __init ksm_slab_init(void) { rmap_item_cache = KMEM_CACHE(ksm_rmap_item, 0); if (!rmap_item_cache) goto out; stable_node_cache = KMEM_CACHE(ksm_stable_node, 0); if (!stable_node_cache) goto out_free1; mm_slot_cache = KMEM_CACHE(ksm_mm_slot, 0); if (!mm_slot_cache) goto out_free2; return 0; out_free2: kmem_cache_destroy(stable_node_cache); out_free1: kmem_cache_destroy(rmap_item_cache); out: return -ENOMEM; } static void __init ksm_slab_free(void) { kmem_cache_destroy(mm_slot_cache); kmem_cache_destroy(stable_node_cache); kmem_cache_destroy(rmap_item_cache); mm_slot_cache = NULL; } static __always_inline bool is_stable_node_chain(struct ksm_stable_node *chain) { return chain->rmap_hlist_len == STABLE_NODE_CHAIN; } static __always_inline bool is_stable_node_dup(struct ksm_stable_node *dup) { return dup->head == STABLE_NODE_DUP_HEAD; } static inline void stable_node_chain_add_dup(struct ksm_stable_node *dup, struct ksm_stable_node *chain) { VM_BUG_ON(is_stable_node_dup(dup)); dup->head = STABLE_NODE_DUP_HEAD; VM_BUG_ON(!is_stable_node_chain(chain)); hlist_add_head(&dup->hlist_dup, &chain->hlist); ksm_stable_node_dups++; } static inline void __stable_node_dup_del(struct ksm_stable_node *dup) { VM_BUG_ON(!is_stable_node_dup(dup)); hlist_del(&dup->hlist_dup); ksm_stable_node_dups--; } static inline void stable_node_dup_del(struct ksm_stable_node *dup) { VM_BUG_ON(is_stable_node_chain(dup)); if (is_stable_node_dup(dup)) __stable_node_dup_del(dup); else rb_erase(&dup->node, root_stable_tree + NUMA(dup->nid)); #ifdef CONFIG_DEBUG_VM dup->head = NULL; #endif } static inline struct ksm_rmap_item *alloc_rmap_item(void) { struct ksm_rmap_item *rmap_item; rmap_item = kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); if (rmap_item) ksm_rmap_items++; return rmap_item; } static inline void free_rmap_item(struct ksm_rmap_item *rmap_item) { ksm_rmap_items--; rmap_item->mm->ksm_rmap_items--; rmap_item->mm = NULL; /* debug safety */ kmem_cache_free(rmap_item_cache, rmap_item); } static inline struct ksm_stable_node *alloc_stable_node(void) { /* * The allocation can take too long with GFP_KERNEL when memory is under * pressure, which may lead to hung task warnings. Adding __GFP_HIGH * grants access to memory reserves, helping to avoid this problem. */ return kmem_cache_alloc(stable_node_cache, GFP_KERNEL | __GFP_HIGH); } static inline void free_stable_node(struct ksm_stable_node *stable_node) { VM_BUG_ON(stable_node->rmap_hlist_len && !is_stable_node_chain(stable_node)); kmem_cache_free(stable_node_cache, stable_node); } /* * ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's * page tables after it has passed through ksm_exit() - which, if necessary, * takes mmap_lock briefly to serialize against them. ksm_exit() does not set * a special flag: they can just back out as soon as mm_users goes to zero. * ksm_test_exit() is used throughout to make this test for exit: in some * places for correctness, in some places just to avoid unnecessary work. */ static inline bool ksm_test_exit(struct mm_struct *mm) { return atomic_read(&mm->mm_users) == 0; } /* * We use break_ksm to break COW on a ksm page by triggering unsharing, * such that the ksm page will get replaced by an exclusive anonymous page. * * We take great care only to touch a ksm page, in a VM_MERGEABLE vma, * in case the application has unmapped and remapped mm,addr meanwhile. * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP * mmap of /dev/mem, where we would not want to touch it. * * FAULT_FLAG_REMOTE/FOLL_REMOTE are because we do this outside the context * of the process that owns 'vma'. We also do not want to enforce * protection keys here anyway. */ static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma) { vm_fault_t ret = 0; if (lock_vma) vma_start_write(vma); do { bool ksm_page = false; struct folio_walk fw; struct folio *folio; cond_resched(); folio = folio_walk_start(&fw, vma, addr, FW_MIGRATION | FW_ZEROPAGE); if (folio) { /* Small folio implies FW_LEVEL_PTE. */ if (!folio_test_large(folio) && (folio_test_ksm(folio) || is_ksm_zero_pte(fw.pte))) ksm_page = true; folio_walk_end(&fw, vma); } if (!ksm_page) return 0; ret = handle_mm_fault(vma, addr, FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE, NULL); } while (!(ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); /* * We must loop until we no longer find a KSM page because * handle_mm_fault() may back out if there's any difficulty e.g. if * pte accessed bit gets updated concurrently. * * VM_FAULT_SIGBUS could occur if we race with truncation of the * backing file, which also invalidates anonymous pages: that's * okay, that truncation will have unmapped the KSM page for us. * * VM_FAULT_OOM: at the time of writing (late July 2009), setting * aside mem_cgroup limits, VM_FAULT_OOM would only be set if the * current task has TIF_MEMDIE set, and will be OOM killed on return * to user; and ksmd, having no mm, would never be chosen for that. * * But if the mm is in a limited mem_cgroup, then the fault may fail * with VM_FAULT_OOM even if the current task is not TIF_MEMDIE; and * even ksmd can fail in this way - though it's usually breaking ksm * just to undo a merge it made a moment before, so unlikely to oom. * * That's a pity: we might therefore have more kernel pages allocated * than we're counting as nodes in the stable tree; but ksm_do_scan * will retry to break_cow on each pass, so should recover the page * in due course. The important thing is to not let VM_MERGEABLE * be cleared while any such pages might remain in the area. */ return (ret & VM_FAULT_OOM) ? -ENOMEM : 0; } static bool vma_ksm_compatible(struct vm_area_struct *vma) { if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE | VM_PFNMAP | VM_IO | VM_DONTEXPAND | VM_HUGETLB | VM_MIXEDMAP| VM_DROPPABLE)) return false; /* just ignore the advice */ if (vma_is_dax(vma)) return false; #ifdef VM_SAO if (vma->vm_flags & VM_SAO) return false; #endif #ifdef VM_SPARC_ADI if (vma->vm_flags & VM_SPARC_ADI) return false; #endif return true; } static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm, unsigned long addr) { struct vm_area_struct *vma; if (ksm_test_exit(mm)) return NULL; vma = vma_lookup(mm, addr); if (!vma || !(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) return NULL; return vma; } static void break_cow(struct ksm_rmap_item *rmap_item) { struct mm_struct *mm = rmap_item->mm; unsigned long addr = rmap_item->address; struct vm_area_struct *vma; /* * It is not an accident that whenever we want to break COW * to undo, we also need to drop a reference to the anon_vma. */ put_anon_vma(rmap_item->anon_vma); mmap_read_lock(mm); vma = find_mergeable_vma(mm, addr); if (vma) break_ksm(vma, addr, false); mmap_read_unlock(mm); } static struct page *get_mergeable_page(struct ksm_rmap_item *rmap_item) { struct mm_struct *mm = rmap_item->mm; unsigned long addr = rmap_item->address; struct vm_area_struct *vma; struct page *page = NULL; struct folio_walk fw; struct folio *folio; mmap_read_lock(mm); vma = find_mergeable_vma(mm, addr); if (!vma) goto out; folio = folio_walk_start(&fw, vma, addr, 0); if (folio) { if (!folio_is_zone_device(folio) && folio_test_anon(folio)) { folio_get(folio); page = fw.page; } folio_walk_end(&fw, vma); } out: if (page) { flush_anon_page(vma, page, addr); flush_dcache_page(page); } mmap_read_unlock(mm); return page; } /* * This helper is used for getting right index into array of tree roots. * When merge_across_nodes knob is set to 1, there are only two rb-trees for * stable and unstable pages from all nodes with roots in index 0. Otherwise, * every node has its own stable and unstable tree. */ static inline int get_kpfn_nid(unsigned long kpfn) { return ksm_merge_across_nodes ? 0 : NUMA(pfn_to_nid(kpfn)); } static struct ksm_stable_node *alloc_stable_node_chain(struct ksm_stable_node *dup, struct rb_root *root) { struct ksm_stable_node *chain = alloc_stable_node(); VM_BUG_ON(is_stable_node_chain(dup)); if (likely(chain)) { INIT_HLIST_HEAD(&chain->hlist); chain->chain_prune_time = jiffies; chain->rmap_hlist_len = STABLE_NODE_CHAIN; #if defined (CONFIG_DEBUG_VM) && defined(CONFIG_NUMA) chain->nid = NUMA_NO_NODE; /* debug */ #endif ksm_stable_node_chains++; /* * Put the stable node chain in the first dimension of * the stable tree and at the same time remove the old * stable node. */ rb_replace_node(&dup->node, &chain->node, root); /* * Move the old stable node to the second dimension * queued in the hlist_dup. The invariant is that all * dup stable_nodes in the chain->hlist point to pages * that are write protected and have the exact same * content. */ stable_node_chain_add_dup(dup, chain); } return chain; } static inline void free_stable_node_chain(struct ksm_stable_node *chain, struct rb_root *root) { rb_erase(&chain->node, root); free_stable_node(chain); ksm_stable_node_chains--; } static void remove_node_from_stable_tree(struct ksm_stable_node *stable_node) { struct ksm_rmap_item *rmap_item; /* check it's not STABLE_NODE_CHAIN or negative */ BUG_ON(stable_node->rmap_hlist_len < 0); hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { if (rmap_item->hlist.next) { ksm_pages_sharing--; trace_ksm_remove_rmap_item(stable_node->kpfn, rmap_item, rmap_item->mm); } else { ksm_pages_shared--; } rmap_item->mm->ksm_merging_pages--; VM_BUG_ON(stable_node->rmap_hlist_len <= 0); stable_node->rmap_hlist_len--; put_anon_vma(rmap_item->anon_vma); rmap_item->address &= PAGE_MASK; cond_resched(); } /* * We need the second aligned pointer of the migrate_nodes * list_head to stay clear from the rb_parent_color union * (aligned and different than any node) and also different * from &migrate_nodes. This will verify that future list.h changes * don't break STABLE_NODE_DUP_HEAD. Only recent gcc can handle it. */ BUILD_BUG_ON(STABLE_NODE_DUP_HEAD <= &migrate_nodes); BUILD_BUG_ON(STABLE_NODE_DUP_HEAD >= &migrate_nodes + 1); trace_ksm_remove_ksm_page(stable_node->kpfn); if (stable_node->head == &migrate_nodes) list_del(&stable_node->list); else stable_node_dup_del(stable_node); free_stable_node(stable_node); } enum ksm_get_folio_flags { KSM_GET_FOLIO_NOLOCK, KSM_GET_FOLIO_LOCK, KSM_GET_FOLIO_TRYLOCK }; /* * ksm_get_folio: checks if the page indicated by the stable node * is still its ksm page, despite having held no reference to it. * In which case we can trust the content of the page, and it * returns the gotten page; but if the page has now been zapped, * remove the stale node from the stable tree and return NULL. * But beware, the stable node's page might be being migrated. * * You would expect the stable_node to hold a reference to the ksm page. * But if it increments the page's count, swapping out has to wait for * ksmd to come around again before it can free the page, which may take * seconds or even minutes: much too unresponsive. So instead we use a * "keyhole reference": access to the ksm page from the stable node peeps * out through its keyhole to see if that page still holds the right key, * pointing back to this stable node. This relies on freeing a PageAnon * page to reset its page->mapping to NULL, and relies on no other use of * a page to put something that might look like our key in page->mapping. * is on its way to being freed; but it is an anomaly to bear in mind. */ static struct folio *ksm_get_folio(struct ksm_stable_node *stable_node, enum ksm_get_folio_flags flags) { struct folio *folio; void *expected_mapping; unsigned long kpfn; expected_mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); again: kpfn = READ_ONCE(stable_node->kpfn); /* Address dependency. */ folio = pfn_folio(kpfn); if (READ_ONCE(folio->mapping) != expected_mapping) goto stale; /* * We cannot do anything with the page while its refcount is 0. * Usually 0 means free, or tail of a higher-order page: in which * case this node is no longer referenced, and should be freed; * however, it might mean that the page is under page_ref_freeze(). * The __remove_mapping() case is easy, again the node is now stale; * the same is in reuse_ksm_page() case; but if page is swapcache * in folio_migrate_mapping(), it might still be our page, * in which case it's essential to keep the node. */ while (!folio_try_get(folio)) { /* * Another check for folio->mapping != expected_mapping * would work here too. We have chosen to test the * swapcache flag to optimize the common case, when the * folio is or is about to be freed: the swapcache flag * is cleared (under spin_lock_irq) in the ref_freeze * section of __remove_mapping(); but anon folio->mapping * is reset to NULL later, in free_pages_prepare(). */ if (!folio_test_swapcache(folio)) goto stale; cpu_relax(); } if (READ_ONCE(folio->mapping) != expected_mapping) { folio_put(folio); goto stale; } if (flags == KSM_GET_FOLIO_TRYLOCK) { if (!folio_trylock(folio)) { folio_put(folio); return ERR_PTR(-EBUSY); } } else if (flags == KSM_GET_FOLIO_LOCK) folio_lock(folio); if (flags != KSM_GET_FOLIO_NOLOCK) { if (READ_ONCE(folio->mapping) != expected_mapping) { folio_unlock(folio); folio_put(folio); goto stale; } } return folio; stale: /* * We come here from above when folio->mapping or the swapcache flag * suggests that the node is stale; but it might be under migration. * We need smp_rmb(), matching the smp_wmb() in folio_migrate_ksm(), * before checking whether node->kpfn has been changed. */ smp_rmb(); if (READ_ONCE(stable_node->kpfn) != kpfn) goto again; remove_node_from_stable_tree(stable_node); return NULL; } /* * Removing rmap_item from stable or unstable tree. * This function will clean the information from the stable/unstable tree. */ static void remove_rmap_item_from_tree(struct ksm_rmap_item *rmap_item) { if (rmap_item->address & STABLE_FLAG) { struct ksm_stable_node *stable_node; struct folio *folio; stable_node = rmap_item->head; folio = ksm_get_folio(stable_node, KSM_GET_FOLIO_LOCK); if (!folio) goto out; hlist_del(&rmap_item->hlist); folio_unlock(folio); folio_put(folio); if (!hlist_empty(&stable_node->hlist)) ksm_pages_sharing--; else ksm_pages_shared--; rmap_item->mm->ksm_merging_pages--; VM_BUG_ON(stable_node->rmap_hlist_len <= 0); stable_node->rmap_hlist_len--; put_anon_vma(rmap_item->anon_vma); rmap_item->head = NULL; rmap_item->address &= PAGE_MASK; } else if (rmap_item->address & UNSTABLE_FLAG) { unsigned char age; /* * Usually ksmd can and must skip the rb_erase, because * root_unstable_tree was already reset to RB_ROOT. * But be careful when an mm is exiting: do the rb_erase * if this rmap_item was inserted by this scan, rather * than left over from before. */ age = (unsigned char)(ksm_scan.seqnr - rmap_item->address); BUG_ON(age > 1); if (!age) rb_erase(&rmap_item->node, root_unstable_tree + NUMA(rmap_item->nid)); ksm_pages_unshared--; rmap_item->address &= PAGE_MASK; } out: cond_resched(); /* we're called from many long loops */ } static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list) { while (*rmap_list) { struct ksm_rmap_item *rmap_item = *rmap_list; *rmap_list = rmap_item->rmap_list; remove_rmap_item_from_tree(rmap_item); free_rmap_item(rmap_item); } } /* * Though it's very tempting to unmerge rmap_items from stable tree rather * than check every pte of a given vma, the locking doesn't quite work for * that - an rmap_item is assigned to the stable tree after inserting ksm * page and upping mmap_lock. Nor does it fit with the way we skip dup'ing * rmap_items from parent to child at fork time (so as not to waste time * if exit comes before the next scan reaches it). * * Similarly, although we'd like to remove rmap_items (so updating counts * and freeing memory) when unmerging an area, it's easier to leave that * to the next pass of ksmd - consider, for example, how ksmd might be * in cmp_and_merge_page on one of the rmap_items we would be removing. */ static int unmerge_ksm_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end, bool lock_vma) { unsigned long addr; int err = 0; for (addr = start; addr < end && !err; addr += PAGE_SIZE) { if (ksm_test_exit(vma->vm_mm)) break; if (signal_pending(current)) err = -ERESTARTSYS; else err = break_ksm(vma, addr, lock_vma); } return err; } static inline struct ksm_stable_node *folio_stable_node(const struct folio *folio) { return folio_test_ksm(folio) ? folio_raw_mapping(folio) : NULL; } static inline struct ksm_stable_node *page_stable_node(struct page *page) { return folio_stable_node(page_folio(page)); } static inline void folio_set_stable_node(struct folio *folio, struct ksm_stable_node *stable_node) { VM_WARN_ON_FOLIO(folio_test_anon(folio) && PageAnonExclusive(&folio->page), folio); folio->mapping = (void *)((unsigned long)stable_node | PAGE_MAPPING_KSM); } #ifdef CONFIG_SYSFS /* * Only called through the sysfs control interface: */ static int remove_stable_node(struct ksm_stable_node *stable_node) { struct folio *folio; int err; folio = ksm_get_folio(stable_node, KSM_GET_FOLIO_LOCK); if (!folio) { /* * ksm_get_folio did remove_node_from_stable_tree itself. */ return 0; } /* * Page could be still mapped if this races with __mmput() running in * between ksm_exit() and exit_mmap(). Just refuse to let * merge_across_nodes/max_page_sharing be switched. */ err = -EBUSY; if (!folio_mapped(folio)) { /* * The stable node did not yet appear stale to ksm_get_folio(), * since that allows for an unmapped ksm folio to be recognized * right up until it is freed; but the node is safe to remove. * This folio might be in an LRU cache waiting to be freed, * or it might be in the swapcache (perhaps under writeback), * or it might have been removed from swapcache a moment ago. */ folio_set_stable_node(folio, NULL); remove_node_from_stable_tree(stable_node); err = 0; } folio_unlock(folio); folio_put(folio); return err; } static int remove_stable_node_chain(struct ksm_stable_node *stable_node, struct rb_root *root) { struct ksm_stable_node *dup; struct hlist_node *hlist_safe; if (!is_stable_node_chain(stable_node)) { VM_BUG_ON(is_stable_node_dup(stable_node)); if (remove_stable_node(stable_node)) return true; else return false; } hlist_for_each_entry_safe(dup, hlist_safe, &stable_node->hlist, hlist_dup) { VM_BUG_ON(!is_stable_node_dup(dup)); if (remove_stable_node(dup)) return true; } BUG_ON(!hlist_empty(&stable_node->hlist)); free_stable_node_chain(stable_node, root); return false; } static int remove_all_stable_nodes(void) { struct ksm_stable_node *stable_node, *next; int nid; int err = 0; for (nid = 0; nid < ksm_nr_node_ids; nid++) { while (root_stable_tree[nid].rb_node) { stable_node = rb_entry(root_stable_tree[nid].rb_node, struct ksm_stable_node, node); if (remove_stable_node_chain(stable_node, root_stable_tree + nid)) { err = -EBUSY; break; /* proceed to next nid */ } cond_resched(); } } list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { if (remove_stable_node(stable_node)) err = -EBUSY; cond_resched(); } return err; } static int unmerge_and_remove_all_rmap_items(void) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; struct mm_struct *mm; struct vm_area_struct *vma; int err = 0; spin_lock(&ksm_mmlist_lock); slot = list_entry(ksm_mm_head.slot.mm_node.next, struct mm_slot, mm_node); ksm_scan.mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); spin_unlock(&ksm_mmlist_lock); for (mm_slot = ksm_scan.mm_slot; mm_slot != &ksm_mm_head; mm_slot = ksm_scan.mm_slot) { VMA_ITERATOR(vmi, mm_slot->slot.mm, 0); mm = mm_slot->slot.mm; mmap_read_lock(mm); /* * Exit right away if mm is exiting to avoid lockdep issue in * the maple tree */ if (ksm_test_exit(mm)) goto mm_exiting; for_each_vma(vmi, vma) { if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma) continue; err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, false); if (err) goto error; } mm_exiting: remove_trailing_rmap_items(&mm_slot->rmap_list); mmap_read_unlock(mm); spin_lock(&ksm_mmlist_lock); slot = list_entry(mm_slot->slot.mm_node.next, struct mm_slot, mm_node); ksm_scan.mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); if (ksm_test_exit(mm)) { hash_del(&mm_slot->slot.hash); list_del(&mm_slot->slot.mm_node); spin_unlock(&ksm_mmlist_lock); mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags); clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmdrop(mm); } else spin_unlock(&ksm_mmlist_lock); } /* Clean up stable nodes, but don't worry if some are still busy */ remove_all_stable_nodes(); ksm_scan.seqnr = 0; return 0; error: mmap_read_unlock(mm); spin_lock(&ksm_mmlist_lock); ksm_scan.mm_slot = &ksm_mm_head; spin_unlock(&ksm_mmlist_lock); return err; } #endif /* CONFIG_SYSFS */ static u32 calc_checksum(struct page *page) { u32 checksum; void *addr = kmap_local_page(page); checksum = xxhash(addr, PAGE_SIZE, 0); kunmap_local(addr); return checksum; } static int write_protect_page(struct vm_area_struct *vma, struct folio *folio, pte_t *orig_pte) { struct mm_struct *mm = vma->vm_mm; DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, 0, 0); int swapped; int err = -EFAULT; struct mmu_notifier_range range; bool anon_exclusive; pte_t entry; if (WARN_ON_ONCE(folio_test_large(folio))) return err; pvmw.address = page_address_in_vma(folio, folio_page(folio, 0), vma); if (pvmw.address == -EFAULT) goto out; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, pvmw.address, pvmw.address + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); if (!page_vma_mapped_walk(&pvmw)) goto out_mn; if (WARN_ONCE(!pvmw.pte, "Unexpected PMD mapping?")) goto out_unlock; anon_exclusive = PageAnonExclusive(&folio->page); entry = ptep_get(pvmw.pte); if (pte_write(entry) || pte_dirty(entry) || anon_exclusive || mm_tlb_flush_pending(mm)) { swapped = folio_test_swapcache(folio); flush_cache_page(vma, pvmw.address, folio_pfn(folio)); /* * Ok this is tricky, when get_user_pages_fast() run it doesn't * take any lock, therefore the check that we are going to make * with the pagecount against the mapcount is racy and * O_DIRECT can happen right after the check. * So we clear the pte and flush the tlb before the check * this assure us that no O_DIRECT can happen after the check * or in the middle of the check. * * No need to notify as we are downgrading page table to read * only not changing it to point to a new page. * * See Documentation/mm/mmu_notifier.rst */ entry = ptep_clear_flush(vma, pvmw.address, pvmw.pte); /* * Check that no O_DIRECT or similar I/O is in progress on the * page */ if (folio_mapcount(folio) + 1 + swapped != folio_ref_count(folio)) { set_pte_at(mm, pvmw.address, pvmw.pte, entry); goto out_unlock; } /* See folio_try_share_anon_rmap_pte(): clear PTE first. */ if (anon_exclusive && folio_try_share_anon_rmap_pte(folio, &folio->page)) { set_pte_at(mm, pvmw.address, pvmw.pte, entry); goto out_unlock; } if (pte_dirty(entry)) folio_mark_dirty(folio); entry = pte_mkclean(entry); if (pte_write(entry)) entry = pte_wrprotect(entry); set_pte_at(mm, pvmw.address, pvmw.pte, entry); } *orig_pte = entry; err = 0; out_unlock: page_vma_mapped_walk_done(&pvmw); out_mn: mmu_notifier_invalidate_range_end(&range); out: return err; } /** * replace_page - replace page in vma by new ksm page * @vma: vma that holds the pte pointing to page * @page: the page we are replacing by kpage * @kpage: the ksm page we replace page by * @orig_pte: the original value of the pte * * Returns 0 on success, -EFAULT on failure. */ static int replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage, pte_t orig_pte) { struct folio *kfolio = page_folio(kpage); struct mm_struct *mm = vma->vm_mm; struct folio *folio = page_folio(page); pmd_t *pmd; pmd_t pmde; pte_t *ptep; pte_t newpte; spinlock_t *ptl; unsigned long addr; int err = -EFAULT; struct mmu_notifier_range range; addr = page_address_in_vma(folio, page, vma); if (addr == -EFAULT) goto out; pmd = mm_find_pmd(mm, addr); if (!pmd) goto out; /* * Some THP functions use the sequence pmdp_huge_clear_flush(), set_pmd_at() * without holding anon_vma lock for write. So when looking for a * genuine pmde (in which to find pte), test present and !THP together. */ pmde = pmdp_get_lockless(pmd); if (!pmd_present(pmde) || pmd_trans_huge(pmde)) goto out; mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, mm, addr, addr + PAGE_SIZE); mmu_notifier_invalidate_range_start(&range); ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!ptep) goto out_mn; if (!pte_same(ptep_get(ptep), orig_pte)) { pte_unmap_unlock(ptep, ptl); goto out_mn; } VM_BUG_ON_PAGE(PageAnonExclusive(page), page); VM_BUG_ON_FOLIO(folio_test_anon(kfolio) && PageAnonExclusive(kpage), kfolio); /* * No need to check ksm_use_zero_pages here: we can only have a * zero_page here if ksm_use_zero_pages was enabled already. */ if (!is_zero_pfn(page_to_pfn(kpage))) { folio_get(kfolio); folio_add_anon_rmap_pte(kfolio, kpage, vma, addr, RMAP_NONE); newpte = mk_pte(kpage, vma->vm_page_prot); } else { /* * Use pte_mkdirty to mark the zero page mapped by KSM, and then * we can easily track all KSM-placed zero pages by checking if * the dirty bit in zero page's PTE is set. */ newpte = pte_mkdirty(pte_mkspecial(pfn_pte(page_to_pfn(kpage), vma->vm_page_prot))); ksm_map_zero_page(mm); /* * We're replacing an anonymous page with a zero page, which is * not anonymous. We need to do proper accounting otherwise we * will get wrong values in /proc, and a BUG message in dmesg * when tearing down the mm. */ dec_mm_counter(mm, MM_ANONPAGES); } flush_cache_page(vma, addr, pte_pfn(ptep_get(ptep))); /* * No need to notify as we are replacing a read only page with another * read only page with the same content. * * See Documentation/mm/mmu_notifier.rst */ ptep_clear_flush(vma, addr, ptep); set_pte_at(mm, addr, ptep, newpte); folio_remove_rmap_pte(folio, page, vma); if (!folio_mapped(folio)) folio_free_swap(folio); folio_put(folio); pte_unmap_unlock(ptep, ptl); err = 0; out_mn: mmu_notifier_invalidate_range_end(&range); out: return err; } /* * try_to_merge_one_page - take two pages and merge them into one * @vma: the vma that holds the pte pointing to page * @page: the PageAnon page that we want to replace with kpage * @kpage: the KSM page that we want to map instead of page, * or NULL the first time when we want to use page as kpage. * * This function returns 0 if the pages were merged, -EFAULT otherwise. */ static int try_to_merge_one_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) { struct folio *folio = page_folio(page); pte_t orig_pte = __pte(0); int err = -EFAULT; if (page == kpage) /* ksm page forked */ return 0; if (!folio_test_anon(folio)) goto out; /* * We need the folio lock to read a stable swapcache flag in * write_protect_page(). We trylock because we don't want to wait * here - we prefer to continue scanning and merging different * pages, then come back to this page when it is unlocked. */ if (!folio_trylock(folio)) goto out; if (folio_test_large(folio)) { if (split_huge_page(page)) goto out_unlock; folio = page_folio(page); } /* * If this anonymous page is mapped only here, its pte may need * to be write-protected. If it's mapped elsewhere, all of its * ptes are necessarily already write-protected. But in either * case, we need to lock and check page_count is not raised. */ if (write_protect_page(vma, folio, &orig_pte) == 0) { if (!kpage) { /* * While we hold folio lock, upgrade folio from * anon to a NULL stable_node with the KSM flag set: * stable_tree_insert() will update stable_node. */ folio_set_stable_node(folio, NULL); folio_mark_accessed(folio); /* * Page reclaim just frees a clean folio with no dirty * ptes: make sure that the ksm page would be swapped. */ if (!folio_test_dirty(folio)) folio_mark_dirty(folio); err = 0; } else if (pages_identical(page, kpage)) err = replace_page(vma, page, kpage, orig_pte); } out_unlock: folio_unlock(folio); out: return err; } /* * This function returns 0 if the pages were merged or if they are * no longer merging candidates (e.g., VMA stale), -EFAULT otherwise. */ static int try_to_merge_with_zero_page(struct ksm_rmap_item *rmap_item, struct page *page) { struct mm_struct *mm = rmap_item->mm; int err = -EFAULT; /* * Same checksum as an empty page. We attempt to merge it with the * appropriate zero page if the user enabled this via sysfs. */ if (ksm_use_zero_pages && (rmap_item->oldchecksum == zero_checksum)) { struct vm_area_struct *vma; mmap_read_lock(mm); vma = find_mergeable_vma(mm, rmap_item->address); if (vma) { err = try_to_merge_one_page(vma, page, ZERO_PAGE(rmap_item->address)); trace_ksm_merge_one_page( page_to_pfn(ZERO_PAGE(rmap_item->address)), rmap_item, mm, err); } else { /* * If the vma is out of date, we do not need to * continue. */ err = 0; } mmap_read_unlock(mm); } return err; } /* * try_to_merge_with_ksm_page - like try_to_merge_two_pages, * but no new kernel page is allocated: kpage must already be a ksm page. * * This function returns 0 if the pages were merged, -EFAULT otherwise. */ static int try_to_merge_with_ksm_page(struct ksm_rmap_item *rmap_item, struct page *page, struct page *kpage) { struct mm_struct *mm = rmap_item->mm; struct vm_area_struct *vma; int err = -EFAULT; mmap_read_lock(mm); vma = find_mergeable_vma(mm, rmap_item->address); if (!vma) goto out; err = try_to_merge_one_page(vma, page, kpage); if (err) goto out; /* Unstable nid is in union with stable anon_vma: remove first */ remove_rmap_item_from_tree(rmap_item); /* Must get reference to anon_vma while still holding mmap_lock */ rmap_item->anon_vma = vma->anon_vma; get_anon_vma(vma->anon_vma); out: mmap_read_unlock(mm); trace_ksm_merge_with_ksm_page(kpage, page_to_pfn(kpage ? kpage : page), rmap_item, mm, err); return err; } /* * try_to_merge_two_pages - take two identical pages and prepare them * to be merged into one page. * * This function returns the kpage if we successfully merged two identical * pages into one ksm page, NULL otherwise. * * Note that this function upgrades page to ksm page: if one of the pages * is already a ksm page, try_to_merge_with_ksm_page should be used. */ static struct folio *try_to_merge_two_pages(struct ksm_rmap_item *rmap_item, struct page *page, struct ksm_rmap_item *tree_rmap_item, struct page *tree_page) { int err; err = try_to_merge_with_ksm_page(rmap_item, page, NULL); if (!err) { err = try_to_merge_with_ksm_page(tree_rmap_item, tree_page, page); /* * If that fails, we have a ksm page with only one pte * pointing to it: so break it. */ if (err) break_cow(rmap_item); } return err ? NULL : page_folio(page); } static __always_inline bool __is_page_sharing_candidate(struct ksm_stable_node *stable_node, int offset) { VM_BUG_ON(stable_node->rmap_hlist_len < 0); /* * Check that at least one mapping still exists, otherwise * there's no much point to merge and share with this * stable_node, as the underlying tree_page of the other * sharer is going to be freed soon. */ return stable_node->rmap_hlist_len && stable_node->rmap_hlist_len + offset < ksm_max_page_sharing; } static __always_inline bool is_page_sharing_candidate(struct ksm_stable_node *stable_node) { return __is_page_sharing_candidate(stable_node, 0); } static struct folio *stable_node_dup(struct ksm_stable_node **_stable_node_dup, struct ksm_stable_node **_stable_node, struct rb_root *root, bool prune_stale_stable_nodes) { struct ksm_stable_node *dup, *found = NULL, *stable_node = *_stable_node; struct hlist_node *hlist_safe; struct folio *folio, *tree_folio = NULL; int found_rmap_hlist_len; if (!prune_stale_stable_nodes || time_before(jiffies, stable_node->chain_prune_time + msecs_to_jiffies( ksm_stable_node_chains_prune_millisecs))) prune_stale_stable_nodes = false; else stable_node->chain_prune_time = jiffies; hlist_for_each_entry_safe(dup, hlist_safe, &stable_node->hlist, hlist_dup) { cond_resched(); /* * We must walk all stable_node_dup to prune the stale * stable nodes during lookup. * * ksm_get_folio can drop the nodes from the * stable_node->hlist if they point to freed pages * (that's why we do a _safe walk). The "dup" * stable_node parameter itself will be freed from * under us if it returns NULL. */ folio = ksm_get_folio(dup, KSM_GET_FOLIO_NOLOCK); if (!folio) continue; /* Pick the best candidate if possible. */ if (!found || (is_page_sharing_candidate(dup) && (!is_page_sharing_candidate(found) || dup->rmap_hlist_len > found_rmap_hlist_len))) { if (found) folio_put(tree_folio); found = dup; found_rmap_hlist_len = found->rmap_hlist_len; tree_folio = folio; /* skip put_page for found candidate */ if (!prune_stale_stable_nodes && is_page_sharing_candidate(found)) break; continue; } folio_put(folio); } if (found) { if (hlist_is_singular_node(&found->hlist_dup, &stable_node->hlist)) { /* * If there's not just one entry it would * corrupt memory, better BUG_ON. In KSM * context with no lock held it's not even * fatal. */ BUG_ON(stable_node->hlist.first->next); /* * There's just one entry and it is below the * deduplication limit so drop the chain. */ rb_replace_node(&stable_node->node, &found->node, root); free_stable_node(stable_node); ksm_stable_node_chains--; ksm_stable_node_dups--; /* * NOTE: the caller depends on the stable_node * to be equal to stable_node_dup if the chain * was collapsed. */ *_stable_node = found; /* * Just for robustness, as stable_node is * otherwise left as a stable pointer, the * compiler shall optimize it away at build * time. */ stable_node = NULL; } else if (stable_node->hlist.first != &found->hlist_dup && __is_page_sharing_candidate(found, 1)) { /* * If the found stable_node dup can accept one * more future merge (in addition to the one * that is underway) and is not at the head of * the chain, put it there so next search will * be quicker in the !prune_stale_stable_nodes * case. * * NOTE: it would be inaccurate to use nr > 1 * instead of checking the hlist.first pointer * directly, because in the * prune_stale_stable_nodes case "nr" isn't * the position of the found dup in the chain, * but the total number of dups in the chain. */ hlist_del(&found->hlist_dup); hlist_add_head(&found->hlist_dup, &stable_node->hlist); } } else { /* Its hlist must be empty if no one found. */ free_stable_node_chain(stable_node, root); } *_stable_node_dup = found; return tree_folio; } /* * Like for ksm_get_folio, this function can free the *_stable_node and * *_stable_node_dup if the returned tree_page is NULL. * * It can also free and overwrite *_stable_node with the found * stable_node_dup if the chain is collapsed (in which case * *_stable_node will be equal to *_stable_node_dup like if the chain * never existed). It's up to the caller to verify tree_page is not * NULL before dereferencing *_stable_node or *_stable_node_dup. * * *_stable_node_dup is really a second output parameter of this * function and will be overwritten in all cases, the caller doesn't * need to initialize it. */ static struct folio *__stable_node_chain(struct ksm_stable_node **_stable_node_dup, struct ksm_stable_node **_stable_node, struct rb_root *root, bool prune_stale_stable_nodes) { struct ksm_stable_node *stable_node = *_stable_node; if (!is_stable_node_chain(stable_node)) { *_stable_node_dup = stable_node; return ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK); } return stable_node_dup(_stable_node_dup, _stable_node, root, prune_stale_stable_nodes); } static __always_inline struct folio *chain_prune(struct ksm_stable_node **s_n_d, struct ksm_stable_node **s_n, struct rb_root *root) { return __stable_node_chain(s_n_d, s_n, root, true); } static __always_inline struct folio *chain(struct ksm_stable_node **s_n_d, struct ksm_stable_node **s_n, struct rb_root *root) { return __stable_node_chain(s_n_d, s_n, root, false); } /* * stable_tree_search - search for page inside the stable tree * * This function checks if there is a page inside the stable tree * with identical content to the page that we are scanning right now. * * This function returns the stable tree node of identical content if found, * -EBUSY if the stable node's page is being migrated, NULL otherwise. */ static struct folio *stable_tree_search(struct page *page) { int nid; struct rb_root *root; struct rb_node **new; struct rb_node *parent; struct ksm_stable_node *stable_node, *stable_node_dup; struct ksm_stable_node *page_node; struct folio *folio; folio = page_folio(page); page_node = folio_stable_node(folio); if (page_node && page_node->head != &migrate_nodes) { /* ksm page forked */ folio_get(folio); return folio; } nid = get_kpfn_nid(folio_pfn(folio)); root = root_stable_tree + nid; again: new = &root->rb_node; parent = NULL; while (*new) { struct folio *tree_folio; int ret; cond_resched(); stable_node = rb_entry(*new, struct ksm_stable_node, node); tree_folio = chain_prune(&stable_node_dup, &stable_node, root); if (!tree_folio) { /* * If we walked over a stale stable_node, * ksm_get_folio() will call rb_erase() and it * may rebalance the tree from under us. So * restart the search from scratch. Returning * NULL would be safe too, but we'd generate * false negative insertions just because some * stable_node was stale. */ goto again; } ret = memcmp_pages(page, &tree_folio->page); folio_put(tree_folio); parent = *new; if (ret < 0) new = &parent->rb_left; else if (ret > 0) new = &parent->rb_right; else { if (page_node) { VM_BUG_ON(page_node->head != &migrate_nodes); /* * If the mapcount of our migrated KSM folio is * at most 1, we can merge it with another * KSM folio where we know that we have space * for one more mapping without exceeding the * ksm_max_page_sharing limit: see * chain_prune(). This way, we can avoid adding * this stable node to the chain. */ if (folio_mapcount(folio) > 1) goto chain_append; } if (!is_page_sharing_candidate(stable_node_dup)) { /* * If the stable_node is a chain and * we got a payload match in memcmp * but we cannot merge the scanned * page in any of the existing * stable_node dups because they're * all full, we need to wait the * scanned page to find itself a match * in the unstable tree to create a * brand new KSM page to add later to * the dups of this stable_node. */ return NULL; } /* * Lock and unlock the stable_node's page (which * might already have been migrated) so that page * migration is sure to notice its raised count. * It would be more elegant to return stable_node * than kpage, but that involves more changes. */ tree_folio = ksm_get_folio(stable_node_dup, KSM_GET_FOLIO_TRYLOCK); if (PTR_ERR(tree_folio) == -EBUSY) return ERR_PTR(-EBUSY); if (unlikely(!tree_folio)) /* * The tree may have been rebalanced, * so re-evaluate parent and new. */ goto again; folio_unlock(tree_folio); if (get_kpfn_nid(stable_node_dup->kpfn) != NUMA(stable_node_dup->nid)) { folio_put(tree_folio); goto replace; } return tree_folio; } } if (!page_node) return NULL; list_del(&page_node->list); DO_NUMA(page_node->nid = nid); rb_link_node(&page_node->node, parent, new); rb_insert_color(&page_node->node, root); out: if (is_page_sharing_candidate(page_node)) { folio_get(folio); return folio; } else return NULL; replace: /* * If stable_node was a chain and chain_prune collapsed it, * stable_node has been updated to be the new regular * stable_node. A collapse of the chain is indistinguishable * from the case there was no chain in the stable * rbtree. Otherwise stable_node is the chain and * stable_node_dup is the dup to replace. */ if (stable_node_dup == stable_node) { VM_BUG_ON(is_stable_node_chain(stable_node_dup)); VM_BUG_ON(is_stable_node_dup(stable_node_dup)); /* there is no chain */ if (page_node) { VM_BUG_ON(page_node->head != &migrate_nodes); list_del(&page_node->list); DO_NUMA(page_node->nid = nid); rb_replace_node(&stable_node_dup->node, &page_node->node, root); if (is_page_sharing_candidate(page_node)) folio_get(folio); else folio = NULL; } else { rb_erase(&stable_node_dup->node, root); folio = NULL; } } else { VM_BUG_ON(!is_stable_node_chain(stable_node)); __stable_node_dup_del(stable_node_dup); if (page_node) { VM_BUG_ON(page_node->head != &migrate_nodes); list_del(&page_node->list); DO_NUMA(page_node->nid = nid); stable_node_chain_add_dup(page_node, stable_node); if (is_page_sharing_candidate(page_node)) folio_get(folio); else folio = NULL; } else { folio = NULL; } } stable_node_dup->head = &migrate_nodes; list_add(&stable_node_dup->list, stable_node_dup->head); return folio; chain_append: /* * If stable_node was a chain and chain_prune collapsed it, * stable_node has been updated to be the new regular * stable_node. A collapse of the chain is indistinguishable * from the case there was no chain in the stable * rbtree. Otherwise stable_node is the chain and * stable_node_dup is the dup to replace. */ if (stable_node_dup == stable_node) { VM_BUG_ON(is_stable_node_dup(stable_node_dup)); /* chain is missing so create it */ stable_node = alloc_stable_node_chain(stable_node_dup, root); if (!stable_node) return NULL; } /* * Add this stable_node dup that was * migrated to the stable_node chain * of the current nid for this page * content. */ VM_BUG_ON(!is_stable_node_dup(stable_node_dup)); VM_BUG_ON(page_node->head != &migrate_nodes); list_del(&page_node->list); DO_NUMA(page_node->nid = nid); stable_node_chain_add_dup(page_node, stable_node); goto out; } /* * stable_tree_insert - insert stable tree node pointing to new ksm page * into the stable tree. * * This function returns the stable tree node just allocated on success, * NULL otherwise. */ static struct ksm_stable_node *stable_tree_insert(struct folio *kfolio) { int nid; unsigned long kpfn; struct rb_root *root; struct rb_node **new; struct rb_node *parent; struct ksm_stable_node *stable_node, *stable_node_dup; bool need_chain = false; kpfn = folio_pfn(kfolio); nid = get_kpfn_nid(kpfn); root = root_stable_tree + nid; again: parent = NULL; new = &root->rb_node; while (*new) { struct folio *tree_folio; int ret; cond_resched(); stable_node = rb_entry(*new, struct ksm_stable_node, node); tree_folio = chain(&stable_node_dup, &stable_node, root); if (!tree_folio) { /* * If we walked over a stale stable_node, * ksm_get_folio() will call rb_erase() and it * may rebalance the tree from under us. So * restart the search from scratch. Returning * NULL would be safe too, but we'd generate * false negative insertions just because some * stable_node was stale. */ goto again; } ret = memcmp_pages(&kfolio->page, &tree_folio->page); folio_put(tree_folio); parent = *new; if (ret < 0) new = &parent->rb_left; else if (ret > 0) new = &parent->rb_right; else { need_chain = true; break; } } stable_node_dup = alloc_stable_node(); if (!stable_node_dup) return NULL; INIT_HLIST_HEAD(&stable_node_dup->hlist); stable_node_dup->kpfn = kpfn; stable_node_dup->rmap_hlist_len = 0; DO_NUMA(stable_node_dup->nid = nid); if (!need_chain) { rb_link_node(&stable_node_dup->node, parent, new); rb_insert_color(&stable_node_dup->node, root); } else { if (!is_stable_node_chain(stable_node)) { struct ksm_stable_node *orig = stable_node; /* chain is missing so create it */ stable_node = alloc_stable_node_chain(orig, root); if (!stable_node) { free_stable_node(stable_node_dup); return NULL; } } stable_node_chain_add_dup(stable_node_dup, stable_node); } folio_set_stable_node(kfolio, stable_node_dup); return stable_node_dup; } /* * unstable_tree_search_insert - search for identical page, * else insert rmap_item into the unstable tree. * * This function searches for a page in the unstable tree identical to the * page currently being scanned; and if no identical page is found in the * tree, we insert rmap_item as a new object into the unstable tree. * * This function returns pointer to rmap_item found to be identical * to the currently scanned page, NULL otherwise. * * This function does both searching and inserting, because they share * the same walking algorithm in an rbtree. */ static struct ksm_rmap_item *unstable_tree_search_insert(struct ksm_rmap_item *rmap_item, struct page *page, struct page **tree_pagep) { struct rb_node **new; struct rb_root *root; struct rb_node *parent = NULL; int nid; nid = get_kpfn_nid(page_to_pfn(page)); root = root_unstable_tree + nid; new = &root->rb_node; while (*new) { struct ksm_rmap_item *tree_rmap_item; struct page *tree_page; int ret; cond_resched(); tree_rmap_item = rb_entry(*new, struct ksm_rmap_item, node); tree_page = get_mergeable_page(tree_rmap_item); if (!tree_page) return NULL; /* * Don't substitute a ksm page for a forked page. */ if (page == tree_page) { put_page(tree_page); return NULL; } ret = memcmp_pages(page, tree_page); parent = *new; if (ret < 0) { put_page(tree_page); new = &parent->rb_left; } else if (ret > 0) { put_page(tree_page); new = &parent->rb_right; } else if (!ksm_merge_across_nodes && page_to_nid(tree_page) != nid) { /* * If tree_page has been migrated to another NUMA node, * it will be flushed out and put in the right unstable * tree next time: only merge with it when across_nodes. */ put_page(tree_page); return NULL; } else { *tree_pagep = tree_page; return tree_rmap_item; } } rmap_item->address |= UNSTABLE_FLAG; rmap_item->address |= (ksm_scan.seqnr & SEQNR_MASK); DO_NUMA(rmap_item->nid = nid); rb_link_node(&rmap_item->node, parent, new); rb_insert_color(&rmap_item->node, root); ksm_pages_unshared++; return NULL; } /* * stable_tree_append - add another rmap_item to the linked list of * rmap_items hanging off a given node of the stable tree, all sharing * the same ksm page. */ static void stable_tree_append(struct ksm_rmap_item *rmap_item, struct ksm_stable_node *stable_node, bool max_page_sharing_bypass) { /* * rmap won't find this mapping if we don't insert the * rmap_item in the right stable_node * duplicate. page_migration could break later if rmap breaks, * so we can as well crash here. We really need to check for * rmap_hlist_len == STABLE_NODE_CHAIN, but we can as well check * for other negative values as an underflow if detected here * for the first time (and not when decreasing rmap_hlist_len) * would be sign of memory corruption in the stable_node. */ BUG_ON(stable_node->rmap_hlist_len < 0); stable_node->rmap_hlist_len++; if (!max_page_sharing_bypass) /* possibly non fatal but unexpected overflow, only warn */ WARN_ON_ONCE(stable_node->rmap_hlist_len > ksm_max_page_sharing); rmap_item->head = stable_node; rmap_item->address |= STABLE_FLAG; hlist_add_head(&rmap_item->hlist, &stable_node->hlist); if (rmap_item->hlist.next) ksm_pages_sharing++; else ksm_pages_shared++; rmap_item->mm->ksm_merging_pages++; } /* * cmp_and_merge_page - first see if page can be merged into the stable tree; * if not, compare checksum to previous and if it's the same, see if page can * be inserted into the unstable tree, or merged with a page already there and * both transferred to the stable tree. * * @page: the page that we are searching identical page to. * @rmap_item: the reverse mapping into the virtual address of this page */ static void cmp_and_merge_page(struct page *page, struct ksm_rmap_item *rmap_item) { struct ksm_rmap_item *tree_rmap_item; struct page *tree_page = NULL; struct ksm_stable_node *stable_node; struct folio *kfolio; unsigned int checksum; int err; bool max_page_sharing_bypass = false; stable_node = page_stable_node(page); if (stable_node) { if (stable_node->head != &migrate_nodes && get_kpfn_nid(READ_ONCE(stable_node->kpfn)) != NUMA(stable_node->nid)) { stable_node_dup_del(stable_node); stable_node->head = &migrate_nodes; list_add(&stable_node->list, stable_node->head); } if (stable_node->head != &migrate_nodes && rmap_item->head == stable_node) return; /* * If it's a KSM fork, allow it to go over the sharing limit * without warnings. */ if (!is_page_sharing_candidate(stable_node)) max_page_sharing_bypass = true; } else { remove_rmap_item_from_tree(rmap_item); /* * If the hash value of the page has changed from the last time * we calculated it, this page is changing frequently: therefore we * don't want to insert it in the unstable tree, and we don't want * to waste our time searching for something identical to it there. */ checksum = calc_checksum(page); if (rmap_item->oldchecksum != checksum) { rmap_item->oldchecksum = checksum; return; } if (!try_to_merge_with_zero_page(rmap_item, page)) return; } /* Start by searching for the folio in the stable tree */ kfolio = stable_tree_search(page); if (&kfolio->page == page && rmap_item->head == stable_node) { folio_put(kfolio); return; } remove_rmap_item_from_tree(rmap_item); if (kfolio) { if (kfolio == ERR_PTR(-EBUSY)) return; err = try_to_merge_with_ksm_page(rmap_item, page, &kfolio->page); if (!err) { /* * The page was successfully merged: * add its rmap_item to the stable tree. */ folio_lock(kfolio); stable_tree_append(rmap_item, folio_stable_node(kfolio), max_page_sharing_bypass); folio_unlock(kfolio); } folio_put(kfolio); return; } tree_rmap_item = unstable_tree_search_insert(rmap_item, page, &tree_page); if (tree_rmap_item) { bool split; kfolio = try_to_merge_two_pages(rmap_item, page, tree_rmap_item, tree_page); /* * If both pages we tried to merge belong to the same compound * page, then we actually ended up increasing the reference * count of the same compound page twice, and split_huge_page * failed. * Here we set a flag if that happened, and we use it later to * try split_huge_page again. Since we call put_page right * afterwards, the reference count will be correct and * split_huge_page should succeed. */ split = PageTransCompound(page) && compound_head(page) == compound_head(tree_page); put_page(tree_page); if (kfolio) { /* * The pages were successfully merged: insert new * node in the stable tree and add both rmap_items. */ folio_lock(kfolio); stable_node = stable_tree_insert(kfolio); if (stable_node) { stable_tree_append(tree_rmap_item, stable_node, false); stable_tree_append(rmap_item, stable_node, false); } folio_unlock(kfolio); /* * If we fail to insert the page into the stable tree, * we will have 2 virtual addresses that are pointing * to a ksm page left outside the stable tree, * in which case we need to break_cow on both. */ if (!stable_node) { break_cow(tree_rmap_item); break_cow(rmap_item); } } else if (split) { /* * We are here if we tried to merge two pages and * failed because they both belonged to the same * compound page. We will split the page now, but no * merging will take place. * We do not want to add the cost of a full lock; if * the page is locked, it is better to skip it and * perhaps try again later. */ if (!trylock_page(page)) return; split_huge_page(page); unlock_page(page); } } } static struct ksm_rmap_item *get_next_rmap_item(struct ksm_mm_slot *mm_slot, struct ksm_rmap_item **rmap_list, unsigned long addr) { struct ksm_rmap_item *rmap_item; while (*rmap_list) { rmap_item = *rmap_list; if ((rmap_item->address & PAGE_MASK) == addr) return rmap_item; if (rmap_item->address > addr) break; *rmap_list = rmap_item->rmap_list; remove_rmap_item_from_tree(rmap_item); free_rmap_item(rmap_item); } rmap_item = alloc_rmap_item(); if (rmap_item) { /* It has already been zeroed */ rmap_item->mm = mm_slot->slot.mm; rmap_item->mm->ksm_rmap_items++; rmap_item->address = addr; rmap_item->rmap_list = *rmap_list; *rmap_list = rmap_item; } return rmap_item; } /* * Calculate skip age for the ksm page age. The age determines how often * de-duplicating has already been tried unsuccessfully. If the age is * smaller, the scanning of this page is skipped for less scans. * * @age: rmap_item age of page */ static unsigned int skip_age(rmap_age_t age) { if (age <= 3) return 1; if (age <= 5) return 2; if (age <= 8) return 4; return 8; } /* * Determines if a page should be skipped for the current scan. * * @folio: folio containing the page to check * @rmap_item: associated rmap_item of page */ static bool should_skip_rmap_item(struct folio *folio, struct ksm_rmap_item *rmap_item) { rmap_age_t age; if (!ksm_smart_scan) return false; /* * Never skip pages that are already KSM; pages cmp_and_merge_page() * will essentially ignore them, but we still have to process them * properly. */ if (folio_test_ksm(folio)) return false; age = rmap_item->age; if (age != U8_MAX) rmap_item->age++; /* * Smaller ages are not skipped, they need to get a chance to go * through the different phases of the KSM merging. */ if (age < 3) return false; /* * Are we still allowed to skip? If not, then don't skip it * and determine how much more often we are allowed to skip next. */ if (!rmap_item->remaining_skips) { rmap_item->remaining_skips = skip_age(age); return false; } /* Skip this page */ ksm_pages_skipped++; rmap_item->remaining_skips--; remove_rmap_item_from_tree(rmap_item); return true; } static struct ksm_rmap_item *scan_get_next_rmap_item(struct page **page) { struct mm_struct *mm; struct ksm_mm_slot *mm_slot; struct mm_slot *slot; struct vm_area_struct *vma; struct ksm_rmap_item *rmap_item; struct vma_iterator vmi; int nid; if (list_empty(&ksm_mm_head.slot.mm_node)) return NULL; mm_slot = ksm_scan.mm_slot; if (mm_slot == &ksm_mm_head) { advisor_start_scan(); trace_ksm_start_scan(ksm_scan.seqnr, ksm_rmap_items); /* * A number of pages can hang around indefinitely in per-cpu * LRU cache, raised page count preventing write_protect_page * from merging them. Though it doesn't really matter much, * it is puzzling to see some stuck in pages_volatile until * other activity jostles them out, and they also prevented * LTP's KSM test from succeeding deterministically; so drain * them here (here rather than on entry to ksm_do_scan(), * so we don't IPI too often when pages_to_scan is set low). */ lru_add_drain_all(); /* * Whereas stale stable_nodes on the stable_tree itself * get pruned in the regular course of stable_tree_search(), * those moved out to the migrate_nodes list can accumulate: * so prune them once before each full scan. */ if (!ksm_merge_across_nodes) { struct ksm_stable_node *stable_node, *next; struct folio *folio; list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { folio = ksm_get_folio(stable_node, KSM_GET_FOLIO_NOLOCK); if (folio) folio_put(folio); cond_resched(); } } for (nid = 0; nid < ksm_nr_node_ids; nid++) root_unstable_tree[nid] = RB_ROOT; spin_lock(&ksm_mmlist_lock); slot = list_entry(mm_slot->slot.mm_node.next, struct mm_slot, mm_node); mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); ksm_scan.mm_slot = mm_slot; spin_unlock(&ksm_mmlist_lock); /* * Although we tested list_empty() above, a racing __ksm_exit * of the last mm on the list may have removed it since then. */ if (mm_slot == &ksm_mm_head) return NULL; next_mm: ksm_scan.address = 0; ksm_scan.rmap_list = &mm_slot->rmap_list; } slot = &mm_slot->slot; mm = slot->mm; vma_iter_init(&vmi, mm, ksm_scan.address); mmap_read_lock(mm); if (ksm_test_exit(mm)) goto no_vmas; for_each_vma(vmi, vma) { if (!(vma->vm_flags & VM_MERGEABLE)) continue; if (ksm_scan.address < vma->vm_start) ksm_scan.address = vma->vm_start; if (!vma->anon_vma) ksm_scan.address = vma->vm_end; while (ksm_scan.address < vma->vm_end) { struct page *tmp_page = NULL; struct folio_walk fw; struct folio *folio; if (ksm_test_exit(mm)) break; folio = folio_walk_start(&fw, vma, ksm_scan.address, 0); if (folio) { if (!folio_is_zone_device(folio) && folio_test_anon(folio)) { folio_get(folio); tmp_page = fw.page; } folio_walk_end(&fw, vma); } if (tmp_page) { flush_anon_page(vma, tmp_page, ksm_scan.address); flush_dcache_page(tmp_page); rmap_item = get_next_rmap_item(mm_slot, ksm_scan.rmap_list, ksm_scan.address); if (rmap_item) { ksm_scan.rmap_list = &rmap_item->rmap_list; if (should_skip_rmap_item(folio, rmap_item)) { folio_put(folio); goto next_page; } ksm_scan.address += PAGE_SIZE; *page = tmp_page; } else { folio_put(folio); } mmap_read_unlock(mm); return rmap_item; } next_page: ksm_scan.address += PAGE_SIZE; cond_resched(); } } if (ksm_test_exit(mm)) { no_vmas: ksm_scan.address = 0; ksm_scan.rmap_list = &mm_slot->rmap_list; } /* * Nuke all the rmap_items that are above this current rmap: * because there were no VM_MERGEABLE vmas with such addresses. */ remove_trailing_rmap_items(ksm_scan.rmap_list); spin_lock(&ksm_mmlist_lock); slot = list_entry(mm_slot->slot.mm_node.next, struct mm_slot, mm_node); ksm_scan.mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); if (ksm_scan.address == 0) { /* * We've completed a full scan of all vmas, holding mmap_lock * throughout, and found no VM_MERGEABLE: so do the same as * __ksm_exit does to remove this mm from all our lists now. * This applies either when cleaning up after __ksm_exit * (but beware: we can reach here even before __ksm_exit), * or when all VM_MERGEABLE areas have been unmapped (and * mmap_lock then protects against race with MADV_MERGEABLE). */ hash_del(&mm_slot->slot.hash); list_del(&mm_slot->slot.mm_node); spin_unlock(&ksm_mmlist_lock); mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGEABLE, &mm->flags); clear_bit(MMF_VM_MERGE_ANY, &mm->flags); mmap_read_unlock(mm); mmdrop(mm); } else { mmap_read_unlock(mm); /* * mmap_read_unlock(mm) first because after * spin_unlock(&ksm_mmlist_lock) run, the "mm" may * already have been freed under us by __ksm_exit() * because the "mm_slot" is still hashed and * ksm_scan.mm_slot doesn't point to it anymore. */ spin_unlock(&ksm_mmlist_lock); } /* Repeat until we've completed scanning the whole list */ mm_slot = ksm_scan.mm_slot; if (mm_slot != &ksm_mm_head) goto next_mm; advisor_stop_scan(); trace_ksm_stop_scan(ksm_scan.seqnr, ksm_rmap_items); ksm_scan.seqnr++; return NULL; } /** * ksm_do_scan - the ksm scanner main worker function. * @scan_npages: number of pages we want to scan before we return. */ static void ksm_do_scan(unsigned int scan_npages) { struct ksm_rmap_item *rmap_item; struct page *page; while (scan_npages-- && likely(!freezing(current))) { cond_resched(); rmap_item = scan_get_next_rmap_item(&page); if (!rmap_item) return; cmp_and_merge_page(page, rmap_item); put_page(page); ksm_pages_scanned++; } } static int ksmd_should_run(void) { return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.slot.mm_node); } static int ksm_scan_thread(void *nothing) { unsigned int sleep_ms; set_freezable(); set_user_nice(current, 5); while (!kthread_should_stop()) { mutex_lock(&ksm_thread_mutex); wait_while_offlining(); if (ksmd_should_run()) ksm_do_scan(ksm_thread_pages_to_scan); mutex_unlock(&ksm_thread_mutex); if (ksmd_should_run()) { sleep_ms = READ_ONCE(ksm_thread_sleep_millisecs); wait_event_freezable_timeout(ksm_iter_wait, sleep_ms != READ_ONCE(ksm_thread_sleep_millisecs), msecs_to_jiffies(sleep_ms)); } else { wait_event_freezable(ksm_thread_wait, ksmd_should_run() || kthread_should_stop()); } } return 0; } static void __ksm_add_vma(struct vm_area_struct *vma) { unsigned long vm_flags = vma->vm_flags; if (vm_flags & VM_MERGEABLE) return; if (vma_ksm_compatible(vma)) vm_flags_set(vma, VM_MERGEABLE); } static int __ksm_del_vma(struct vm_area_struct *vma) { int err; if (!(vma->vm_flags & VM_MERGEABLE)) return 0; if (vma->anon_vma) { err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true); if (err) return err; } vm_flags_clear(vma, VM_MERGEABLE); return 0; } /** * ksm_add_vma - Mark vma as mergeable if compatible * * @vma: Pointer to vma */ void ksm_add_vma(struct vm_area_struct *vma) { struct mm_struct *mm = vma->vm_mm; if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) __ksm_add_vma(vma); } static void ksm_add_vmas(struct mm_struct *mm) { struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); for_each_vma(vmi, vma) __ksm_add_vma(vma); } static int ksm_del_vmas(struct mm_struct *mm) { struct vm_area_struct *vma; int err; VMA_ITERATOR(vmi, mm, 0); for_each_vma(vmi, vma) { err = __ksm_del_vma(vma); if (err) return err; } return 0; } /** * ksm_enable_merge_any - Add mm to mm ksm list and enable merging on all * compatible VMA's * * @mm: Pointer to mm * * Returns 0 on success, otherwise error code */ int ksm_enable_merge_any(struct mm_struct *mm) { int err; if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) return 0; if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { err = __ksm_enter(mm); if (err) return err; } set_bit(MMF_VM_MERGE_ANY, &mm->flags); ksm_add_vmas(mm); return 0; } /** * ksm_disable_merge_any - Disable merging on all compatible VMA's of the mm, * previously enabled via ksm_enable_merge_any(). * * Disabling merging implies unmerging any merged pages, like setting * MADV_UNMERGEABLE would. If unmerging fails, the whole operation fails and * merging on all compatible VMA's remains enabled. * * @mm: Pointer to mm * * Returns 0 on success, otherwise error code */ int ksm_disable_merge_any(struct mm_struct *mm) { int err; if (!test_bit(MMF_VM_MERGE_ANY, &mm->flags)) return 0; err = ksm_del_vmas(mm); if (err) { ksm_add_vmas(mm); return err; } clear_bit(MMF_VM_MERGE_ANY, &mm->flags); return 0; } int ksm_disable(struct mm_struct *mm) { mmap_assert_write_locked(mm); if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) return 0; if (test_bit(MMF_VM_MERGE_ANY, &mm->flags)) return ksm_disable_merge_any(mm); return ksm_del_vmas(mm); } int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags) { struct mm_struct *mm = vma->vm_mm; int err; switch (advice) { case MADV_MERGEABLE: if (vma->vm_flags & VM_MERGEABLE) return 0; if (!vma_ksm_compatible(vma)) return 0; if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { err = __ksm_enter(mm); if (err) return err; } *vm_flags |= VM_MERGEABLE; break; case MADV_UNMERGEABLE: if (!(*vm_flags & VM_MERGEABLE)) return 0; /* just ignore the advice */ if (vma->anon_vma) { err = unmerge_ksm_pages(vma, start, end, true); if (err) return err; } *vm_flags &= ~VM_MERGEABLE; break; } return 0; } EXPORT_SYMBOL_GPL(ksm_madvise); int __ksm_enter(struct mm_struct *mm) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; int needs_wakeup; mm_slot = mm_slot_alloc(mm_slot_cache); if (!mm_slot) return -ENOMEM; slot = &mm_slot->slot; /* Check ksm_run too? Would need tighter locking */ needs_wakeup = list_empty(&ksm_mm_head.slot.mm_node); spin_lock(&ksm_mmlist_lock); mm_slot_insert(mm_slots_hash, mm, slot); /* * When KSM_RUN_MERGE (or KSM_RUN_STOP), * insert just behind the scanning cursor, to let the area settle * down a little; when fork is followed by immediate exec, we don't * want ksmd to waste time setting up and tearing down an rmap_list. * * But when KSM_RUN_UNMERGE, it's important to insert ahead of its * scanning cursor, otherwise KSM pages in newly forked mms will be * missed: then we might as well insert at the end of the list. */ if (ksm_run & KSM_RUN_UNMERGE) list_add_tail(&slot->mm_node, &ksm_mm_head.slot.mm_node); else list_add_tail(&slot->mm_node, &ksm_scan.mm_slot->slot.mm_node); spin_unlock(&ksm_mmlist_lock); set_bit(MMF_VM_MERGEABLE, &mm->flags); mmgrab(mm); if (needs_wakeup) wake_up_interruptible(&ksm_thread_wait); trace_ksm_enter(mm); return 0; } void __ksm_exit(struct mm_struct *mm) { struct ksm_mm_slot *mm_slot; struct mm_slot *slot; int easy_to_free = 0; /* * This process is exiting: if it's straightforward (as is the * case when ksmd was never running), free mm_slot immediately. * But if it's at the cursor or has rmap_items linked to it, use * mmap_lock to synchronize with any break_cows before pagetables * are freed, and leave the mm_slot on the list for ksmd to free. * Beware: ksm may already have noticed it exiting and freed the slot. */ spin_lock(&ksm_mmlist_lock); slot = mm_slot_lookup(mm_slots_hash, mm); mm_slot = mm_slot_entry(slot, struct ksm_mm_slot, slot); if (mm_slot && ksm_scan.mm_slot != mm_slot) { if (!mm_slot->rmap_list) { hash_del(&slot->hash); list_del(&slot->mm_node); easy_to_free = 1; } else { list_move(&slot->mm_node, &ksm_scan.mm_slot->slot.mm_node); } } spin_unlock(&ksm_mmlist_lock); if (easy_to_free) { mm_slot_free(mm_slot_cache, mm_slot); clear_bit(MMF_VM_MERGE_ANY, &mm->flags); clear_bit(MMF_VM_MERGEABLE, &mm->flags); mmdrop(mm); } else if (mm_slot) { mmap_write_lock(mm); mmap_write_unlock(mm); } trace_ksm_exit(mm); } struct folio *ksm_might_need_to_copy(struct folio *folio, struct vm_area_struct *vma, unsigned long addr) { struct page *page = folio_page(folio, 0); struct anon_vma *anon_vma = folio_anon_vma(folio); struct folio *new_folio; if (folio_test_large(folio)) return folio; if (folio_test_ksm(folio)) { if (folio_stable_node(folio) && !(ksm_run & KSM_RUN_UNMERGE)) return folio; /* no need to copy it */ } else if (!anon_vma) { return folio; /* no need to copy it */ } else if (folio->index == linear_page_index(vma, addr) && anon_vma->root == vma->anon_vma->root) { return folio; /* still no need to copy it */ } if (PageHWPoison(page)) return ERR_PTR(-EHWPOISON); if (!folio_test_uptodate(folio)) return folio; /* let do_swap_page report the error */ new_folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, addr); if (new_folio && mem_cgroup_charge(new_folio, vma->vm_mm, GFP_KERNEL)) { folio_put(new_folio); new_folio = NULL; } if (new_folio) { if (copy_mc_user_highpage(folio_page(new_folio, 0), page, addr, vma)) { folio_put(new_folio); return ERR_PTR(-EHWPOISON); } folio_set_dirty(new_folio); __folio_mark_uptodate(new_folio); __folio_set_locked(new_folio); #ifdef CONFIG_SWAP count_vm_event(KSM_SWPIN_COPY); #endif } return new_folio; } void rmap_walk_ksm(struct folio *folio, struct rmap_walk_control *rwc) { struct ksm_stable_node *stable_node; struct ksm_rmap_item *rmap_item; int search_new_forks = 0; VM_BUG_ON_FOLIO(!folio_test_ksm(folio), folio); /* * Rely on the page lock to protect against concurrent modifications * to that page's node of the stable tree. */ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); stable_node = folio_stable_node(folio); if (!stable_node) return; again: hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *anon_vma = rmap_item->anon_vma; struct anon_vma_chain *vmac; struct vm_area_struct *vma; cond_resched(); if (!anon_vma_trylock_read(anon_vma)) { if (rwc->try_lock) { rwc->contended = true; return; } anon_vma_lock_read(anon_vma); } anon_vma_interval_tree_foreach(vmac, &anon_vma->rb_root, 0, ULONG_MAX) { unsigned long addr; cond_resched(); vma = vmac->vma; /* Ignore the stable/unstable/sqnr flags */ addr = rmap_item->address & PAGE_MASK; if (addr < vma->vm_start || addr >= vma->vm_end) continue; /* * Initially we examine only the vma which covers this * rmap_item; but later, if there is still work to do, * we examine covering vmas in other mms: in case they * were forked from the original since ksmd passed. */ if ((rmap_item->mm == vma->vm_mm) == search_new_forks) continue; if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg)) continue; if (!rwc->rmap_one(folio, vma, addr, rwc->arg)) { anon_vma_unlock_read(anon_vma); return; } if (rwc->done && rwc->done(folio)) { anon_vma_unlock_read(anon_vma); return; } } anon_vma_unlock_read(anon_vma); } if (!search_new_forks++) goto again; } #ifdef CONFIG_MEMORY_FAILURE /* * Collect processes when the error hit an ksm page. */ void collect_procs_ksm(const struct folio *folio, const struct page *page, struct list_head *to_kill, int force_early) { struct ksm_stable_node *stable_node; struct ksm_rmap_item *rmap_item; struct vm_area_struct *vma; struct task_struct *tsk; stable_node = folio_stable_node(folio); if (!stable_node) return; hlist_for_each_entry(rmap_item, &stable_node->hlist, hlist) { struct anon_vma *av = rmap_item->anon_vma; anon_vma_lock_read(av); rcu_read_lock(); for_each_process(tsk) { struct anon_vma_chain *vmac; unsigned long addr; struct task_struct *t = task_early_kill(tsk, force_early); if (!t) continue; anon_vma_interval_tree_foreach(vmac, &av->rb_root, 0, ULONG_MAX) { vma = vmac->vma; if (vma->vm_mm == t->mm) { addr = rmap_item->address & PAGE_MASK; add_to_kill_ksm(t, page, vma, to_kill, addr); } } } rcu_read_unlock(); anon_vma_unlock_read(av); } } #endif #ifdef CONFIG_MIGRATION void folio_migrate_ksm(struct folio *newfolio, struct folio *folio) { struct ksm_stable_node *stable_node; VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(!folio_test_locked(newfolio), newfolio); VM_BUG_ON_FOLIO(newfolio->mapping != folio->mapping, newfolio); stable_node = folio_stable_node(folio); if (stable_node) { VM_BUG_ON_FOLIO(stable_node->kpfn != folio_pfn(folio), folio); stable_node->kpfn = folio_pfn(newfolio); /* * newfolio->mapping was set in advance; now we need smp_wmb() * to make sure that the new stable_node->kpfn is visible * to ksm_get_folio() before it can see that folio->mapping * has gone stale (or that the swapcache flag has been cleared). */ smp_wmb(); folio_set_stable_node(folio, NULL); } } #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_HOTREMOVE static void wait_while_offlining(void) { while (ksm_run & KSM_RUN_OFFLINE) { mutex_unlock(&ksm_thread_mutex); wait_on_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE), TASK_UNINTERRUPTIBLE); mutex_lock(&ksm_thread_mutex); } } static bool stable_node_dup_remove_range(struct ksm_stable_node *stable_node, unsigned long start_pfn, unsigned long end_pfn) { if (stable_node->kpfn >= start_pfn && stable_node->kpfn < end_pfn) { /* * Don't ksm_get_folio, page has already gone: * which is why we keep kpfn instead of page* */ remove_node_from_stable_tree(stable_node); return true; } return false; } static bool stable_node_chain_remove_range(struct ksm_stable_node *stable_node, unsigned long start_pfn, unsigned long end_pfn, struct rb_root *root) { struct ksm_stable_node *dup; struct hlist_node *hlist_safe; if (!is_stable_node_chain(stable_node)) { VM_BUG_ON(is_stable_node_dup(stable_node)); return stable_node_dup_remove_range(stable_node, start_pfn, end_pfn); } hlist_for_each_entry_safe(dup, hlist_safe, &stable_node->hlist, hlist_dup) { VM_BUG_ON(!is_stable_node_dup(dup)); stable_node_dup_remove_range(dup, start_pfn, end_pfn); } if (hlist_empty(&stable_node->hlist)) { free_stable_node_chain(stable_node, root); return true; /* notify caller that tree was rebalanced */ } else return false; } static void ksm_check_stable_tree(unsigned long start_pfn, unsigned long end_pfn) { struct ksm_stable_node *stable_node, *next; struct rb_node *node; int nid; for (nid = 0; nid < ksm_nr_node_ids; nid++) { node = rb_first(root_stable_tree + nid); while (node) { stable_node = rb_entry(node, struct ksm_stable_node, node); if (stable_node_chain_remove_range(stable_node, start_pfn, end_pfn, root_stable_tree + nid)) node = rb_first(root_stable_tree + nid); else node = rb_next(node); cond_resched(); } } list_for_each_entry_safe(stable_node, next, &migrate_nodes, list) { if (stable_node->kpfn >= start_pfn && stable_node->kpfn < end_pfn) remove_node_from_stable_tree(stable_node); cond_resched(); } } static int ksm_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { struct memory_notify *mn = arg; switch (action) { case MEM_GOING_OFFLINE: /* * Prevent ksm_do_scan(), unmerge_and_remove_all_rmap_items() * and remove_all_stable_nodes() while memory is going offline: * it is unsafe for them to touch the stable tree at this time. * But unmerge_ksm_pages(), rmap lookups and other entry points * which do not need the ksm_thread_mutex are all safe. */ mutex_lock(&ksm_thread_mutex); ksm_run |= KSM_RUN_OFFLINE; mutex_unlock(&ksm_thread_mutex); break; case MEM_OFFLINE: /* * Most of the work is done by page migration; but there might * be a few stable_nodes left over, still pointing to struct * pages which have been offlined: prune those from the tree, * otherwise ksm_get_folio() might later try to access a * non-existent struct page. */ ksm_check_stable_tree(mn->start_pfn, mn->start_pfn + mn->nr_pages); fallthrough; case MEM_CANCEL_OFFLINE: mutex_lock(&ksm_thread_mutex); ksm_run &= ~KSM_RUN_OFFLINE; mutex_unlock(&ksm_thread_mutex); smp_mb(); /* wake_up_bit advises this */ wake_up_bit(&ksm_run, ilog2(KSM_RUN_OFFLINE)); break; } return NOTIFY_OK; } #else static void wait_while_offlining(void) { } #endif /* CONFIG_MEMORY_HOTREMOVE */ #ifdef CONFIG_PROC_FS long ksm_process_profit(struct mm_struct *mm) { return (long)(mm->ksm_merging_pages + mm_ksm_zero_pages(mm)) * PAGE_SIZE - mm->ksm_rmap_items * sizeof(struct ksm_rmap_item); } #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_SYSFS /* * This all compiles without CONFIG_SYSFS, but is a waste of space. */ #define KSM_ATTR_RO(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RO(_name) #define KSM_ATTR(_name) \ static struct kobj_attribute _name##_attr = __ATTR_RW(_name) static ssize_t sleep_millisecs_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_thread_sleep_millisecs); } static ssize_t sleep_millisecs_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned int msecs; int err; err = kstrtouint(buf, 10, &msecs); if (err) return -EINVAL; ksm_thread_sleep_millisecs = msecs; wake_up_interruptible(&ksm_iter_wait); return count; } KSM_ATTR(sleep_millisecs); static ssize_t pages_to_scan_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_thread_pages_to_scan); } static ssize_t pages_to_scan_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned int nr_pages; int err; if (ksm_advisor != KSM_ADVISOR_NONE) return -EINVAL; err = kstrtouint(buf, 10, &nr_pages); if (err) return -EINVAL; ksm_thread_pages_to_scan = nr_pages; return count; } KSM_ATTR(pages_to_scan); static ssize_t run_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_run); } static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned int flags; int err; err = kstrtouint(buf, 10, &flags); if (err) return -EINVAL; if (flags > KSM_RUN_UNMERGE) return -EINVAL; /* * KSM_RUN_MERGE sets ksmd running, and 0 stops it running. * KSM_RUN_UNMERGE stops it running and unmerges all rmap_items, * breaking COW to free the pages_shared (but leaves mm_slots * on the list for when ksmd may be set running again). */ mutex_lock(&ksm_thread_mutex); wait_while_offlining(); if (ksm_run != flags) { ksm_run = flags; if (flags & KSM_RUN_UNMERGE) { set_current_oom_origin(); err = unmerge_and_remove_all_rmap_items(); clear_current_oom_origin(); if (err) { ksm_run = KSM_RUN_STOP; count = err; } } } mutex_unlock(&ksm_thread_mutex); if (flags & KSM_RUN_MERGE) wake_up_interruptible(&ksm_thread_wait); return count; } KSM_ATTR(run); #ifdef CONFIG_NUMA static ssize_t merge_across_nodes_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_merge_across_nodes); } static ssize_t merge_across_nodes_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long knob; err = kstrtoul(buf, 10, &knob); if (err) return err; if (knob > 1) return -EINVAL; mutex_lock(&ksm_thread_mutex); wait_while_offlining(); if (ksm_merge_across_nodes != knob) { if (ksm_pages_shared || remove_all_stable_nodes()) err = -EBUSY; else if (root_stable_tree == one_stable_tree) { struct rb_root *buf; /* * This is the first time that we switch away from the * default of merging across nodes: must now allocate * a buffer to hold as many roots as may be needed. * Allocate stable and unstable together: * MAXSMP NODES_SHIFT 10 will use 16kB. */ buf = kcalloc(nr_node_ids + nr_node_ids, sizeof(*buf), GFP_KERNEL); /* Let us assume that RB_ROOT is NULL is zero */ if (!buf) err = -ENOMEM; else { root_stable_tree = buf; root_unstable_tree = buf + nr_node_ids; /* Stable tree is empty but not the unstable */ root_unstable_tree[0] = one_unstable_tree[0]; } } if (!err) { ksm_merge_across_nodes = knob; ksm_nr_node_ids = knob ? 1 : nr_node_ids; } } mutex_unlock(&ksm_thread_mutex); return err ? err : count; } KSM_ATTR(merge_across_nodes); #endif static ssize_t use_zero_pages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_use_zero_pages); } static ssize_t use_zero_pages_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; bool value; err = kstrtobool(buf, &value); if (err) return -EINVAL; ksm_use_zero_pages = value; return count; } KSM_ATTR(use_zero_pages); static ssize_t max_page_sharing_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_max_page_sharing); } static ssize_t max_page_sharing_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; int knob; err = kstrtoint(buf, 10, &knob); if (err) return err; /* * When a KSM page is created it is shared by 2 mappings. This * being a signed comparison, it implicitly verifies it's not * negative. */ if (knob < 2) return -EINVAL; if (READ_ONCE(ksm_max_page_sharing) == knob) return count; mutex_lock(&ksm_thread_mutex); wait_while_offlining(); if (ksm_max_page_sharing != knob) { if (ksm_pages_shared || remove_all_stable_nodes()) err = -EBUSY; else ksm_max_page_sharing = knob; } mutex_unlock(&ksm_thread_mutex); return err ? err : count; } KSM_ATTR(max_page_sharing); static ssize_t pages_scanned_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_pages_scanned); } KSM_ATTR_RO(pages_scanned); static ssize_t pages_shared_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_pages_shared); } KSM_ATTR_RO(pages_shared); static ssize_t pages_sharing_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_pages_sharing); } KSM_ATTR_RO(pages_sharing); static ssize_t pages_unshared_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_pages_unshared); } KSM_ATTR_RO(pages_unshared); static ssize_t pages_volatile_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { long ksm_pages_volatile; ksm_pages_volatile = ksm_rmap_items - ksm_pages_shared - ksm_pages_sharing - ksm_pages_unshared; /* * It was not worth any locking to calculate that statistic, * but it might therefore sometimes be negative: conceal that. */ if (ksm_pages_volatile < 0) ksm_pages_volatile = 0; return sysfs_emit(buf, "%ld\n", ksm_pages_volatile); } KSM_ATTR_RO(pages_volatile); static ssize_t pages_skipped_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_pages_skipped); } KSM_ATTR_RO(pages_skipped); static ssize_t ksm_zero_pages_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%ld\n", atomic_long_read(&ksm_zero_pages)); } KSM_ATTR_RO(ksm_zero_pages); static ssize_t general_profit_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { long general_profit; general_profit = (ksm_pages_sharing + atomic_long_read(&ksm_zero_pages)) * PAGE_SIZE - ksm_rmap_items * sizeof(struct ksm_rmap_item); return sysfs_emit(buf, "%ld\n", general_profit); } KSM_ATTR_RO(general_profit); static ssize_t stable_node_dups_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_stable_node_dups); } KSM_ATTR_RO(stable_node_dups); static ssize_t stable_node_chains_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_stable_node_chains); } KSM_ATTR_RO(stable_node_chains); static ssize_t stable_node_chains_prune_millisecs_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_stable_node_chains_prune_millisecs); } static ssize_t stable_node_chains_prune_millisecs_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { unsigned int msecs; int err; err = kstrtouint(buf, 10, &msecs); if (err) return -EINVAL; ksm_stable_node_chains_prune_millisecs = msecs; return count; } KSM_ATTR(stable_node_chains_prune_millisecs); static ssize_t full_scans_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_scan.seqnr); } KSM_ATTR_RO(full_scans); static ssize_t smart_scan_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_smart_scan); } static ssize_t smart_scan_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; bool value; err = kstrtobool(buf, &value); if (err) return -EINVAL; ksm_smart_scan = value; return count; } KSM_ATTR(smart_scan); static ssize_t advisor_mode_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { const char *output; if (ksm_advisor == KSM_ADVISOR_NONE) output = "[none] scan-time"; else if (ksm_advisor == KSM_ADVISOR_SCAN_TIME) output = "none [scan-time]"; return sysfs_emit(buf, "%s\n", output); } static ssize_t advisor_mode_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { enum ksm_advisor_type curr_advisor = ksm_advisor; if (sysfs_streq("scan-time", buf)) ksm_advisor = KSM_ADVISOR_SCAN_TIME; else if (sysfs_streq("none", buf)) ksm_advisor = KSM_ADVISOR_NONE; else return -EINVAL; /* Set advisor default values */ if (curr_advisor != ksm_advisor) set_advisor_defaults(); return count; } KSM_ATTR(advisor_mode); static ssize_t advisor_max_cpu_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%u\n", ksm_advisor_max_cpu); } static ssize_t advisor_max_cpu_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long value; err = kstrtoul(buf, 10, &value); if (err) return -EINVAL; ksm_advisor_max_cpu = value; return count; } KSM_ATTR(advisor_max_cpu); static ssize_t advisor_min_pages_to_scan_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_advisor_min_pages_to_scan); } static ssize_t advisor_min_pages_to_scan_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long value; err = kstrtoul(buf, 10, &value); if (err) return -EINVAL; ksm_advisor_min_pages_to_scan = value; return count; } KSM_ATTR(advisor_min_pages_to_scan); static ssize_t advisor_max_pages_to_scan_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_advisor_max_pages_to_scan); } static ssize_t advisor_max_pages_to_scan_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long value; err = kstrtoul(buf, 10, &value); if (err) return -EINVAL; ksm_advisor_max_pages_to_scan = value; return count; } KSM_ATTR(advisor_max_pages_to_scan); static ssize_t advisor_target_scan_time_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", ksm_advisor_target_scan_time); } static ssize_t advisor_target_scan_time_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { int err; unsigned long value; err = kstrtoul(buf, 10, &value); if (err) return -EINVAL; if (value < 1) return -EINVAL; ksm_advisor_target_scan_time = value; return count; } KSM_ATTR(advisor_target_scan_time); static struct attribute *ksm_attrs[] = { &sleep_millisecs_attr.attr, &pages_to_scan_attr.attr, &run_attr.attr, &pages_scanned_attr.attr, &pages_shared_attr.attr, &pages_sharing_attr.attr, &pages_unshared_attr.attr, &pages_volatile_attr.attr, &pages_skipped_attr.attr, &ksm_zero_pages_attr.attr, &full_scans_attr.attr, #ifdef CONFIG_NUMA &merge_across_nodes_attr.attr, #endif &max_page_sharing_attr.attr, &stable_node_chains_attr.attr, &stable_node_dups_attr.attr, &stable_node_chains_prune_millisecs_attr.attr, &use_zero_pages_attr.attr, &general_profit_attr.attr, &smart_scan_attr.attr, &advisor_mode_attr.attr, &advisor_max_cpu_attr.attr, &advisor_min_pages_to_scan_attr.attr, &advisor_max_pages_to_scan_attr.attr, &advisor_target_scan_time_attr.attr, NULL, }; static const struct attribute_group ksm_attr_group = { .attrs = ksm_attrs, .name = "ksm", }; #endif /* CONFIG_SYSFS */ static int __init ksm_init(void) { struct task_struct *ksm_thread; int err; /* The correct value depends on page size and endianness */ zero_checksum = calc_checksum(ZERO_PAGE(0)); /* Default to false for backwards compatibility */ ksm_use_zero_pages = false; err = ksm_slab_init(); if (err) goto out; ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd"); if (IS_ERR(ksm_thread)) { pr_err("ksm: creating kthread failed\n"); err = PTR_ERR(ksm_thread); goto out_free; } #ifdef CONFIG_SYSFS err = sysfs_create_group(mm_kobj, &ksm_attr_group); if (err) { pr_err("ksm: register sysfs failed\n"); kthread_stop(ksm_thread); goto out_free; } #else ksm_run = KSM_RUN_MERGE; /* no way for user to start it */ #endif /* CONFIG_SYSFS */ #ifdef CONFIG_MEMORY_HOTREMOVE /* There is no significance to this priority 100 */ hotplug_memory_notifier(ksm_memory_callback, KSM_CALLBACK_PRI); #endif return 0; out_free: ksm_slab_free(); out: return err; } subsys_initcall(ksm_init);
7 4 5 5 4 5 12 1 1 1 4 6 6 7 7 7 1 6 1 6 7 6 10 9 10 3 8 3 10 3 9 3 10 5 10 3 3 3 4 4 3 9 13 13 4 4 4 3 1 4 4 4 8 4 4 3 3 3 3 3 3 39 2 1 5 3 3 12 14 10 2 8 2 8 5 4 1 4 4 1 1 2 1 1 4 12 12 2 2 4 4 3 4 4 3 4 3 4 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 // SPDX-License-Identifier: MIT /* * vgaarb.c: Implements VGA arbitration. For details refer to * Documentation/gpu/vgaarbiter.rst * * (C) Copyright 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> * (C) Copyright 2007 Paulo R. Zanoni <przanoni@gmail.com> * (C) Copyright 2007, 2009 Tiago Vignatti <vignatti@freedesktop.org> */ #define pr_fmt(fmt) "vgaarb: " fmt #define vgaarb_dbg(dev, fmt, arg...) dev_dbg(dev, "vgaarb: " fmt, ##arg) #define vgaarb_info(dev, fmt, arg...) dev_info(dev, "vgaarb: " fmt, ##arg) #define vgaarb_err(dev, fmt, arg...) dev_err(dev, "vgaarb: " fmt, ##arg) #include <linux/module.h> #include <linux/kernel.h> #include <linux/pci.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/list.h> #include <linux/sched/signal.h> #include <linux/wait.h> #include <linux/spinlock.h> #include <linux/poll.h> #include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/screen_info.h> #include <linux/vt.h> #include <linux/console.h> #include <linux/acpi.h> #include <linux/uaccess.h> #include <linux/vgaarb.h> static void vga_arbiter_notify_clients(void); /* * We keep a list of all VGA devices in the system to speed * up the various operations of the arbiter */ struct vga_device { struct list_head list; struct pci_dev *pdev; unsigned int decodes; /* what it decodes */ unsigned int owns; /* what it owns */ unsigned int locks; /* what it locks */ unsigned int io_lock_cnt; /* legacy IO lock count */ unsigned int mem_lock_cnt; /* legacy MEM lock count */ unsigned int io_norm_cnt; /* normal IO count */ unsigned int mem_norm_cnt; /* normal MEM count */ bool bridge_has_one_vga; bool is_firmware_default; /* device selected by firmware */ unsigned int (*set_decode)(struct pci_dev *pdev, bool decode); }; static LIST_HEAD(vga_list); static int vga_count, vga_decode_count; static bool vga_arbiter_used; static DEFINE_SPINLOCK(vga_lock); static DECLARE_WAIT_QUEUE_HEAD(vga_wait_queue); static const char *vga_iostate_to_str(unsigned int iostate) { /* Ignore VGA_RSRC_IO and VGA_RSRC_MEM */ iostate &= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; switch (iostate) { case VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM: return "io+mem"; case VGA_RSRC_LEGACY_IO: return "io"; case VGA_RSRC_LEGACY_MEM: return "mem"; } return "none"; } static int vga_str_to_iostate(char *buf, int str_size, unsigned int *io_state) { /* * In theory, we could hand out locks on IO and MEM separately to * userspace, but this can cause deadlocks. */ if (strncmp(buf, "none", 4) == 0) { *io_state = VGA_RSRC_NONE; return 1; } /* XXX We're not checking the str_size! */ if (strncmp(buf, "io+mem", 6) == 0) goto both; else if (strncmp(buf, "io", 2) == 0) goto both; else if (strncmp(buf, "mem", 3) == 0) goto both; return 0; both: *io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; return 1; } /* This is only used as a cookie, it should not be dereferenced */ static struct pci_dev *vga_default; /* Find somebody in our list */ static struct vga_device *vgadev_find(struct pci_dev *pdev) { struct vga_device *vgadev; list_for_each_entry(vgadev, &vga_list, list) if (pdev == vgadev->pdev) return vgadev; return NULL; } /** * vga_default_device - return the default VGA device, for vgacon * * This can be defined by the platform. The default implementation is * rather dumb and will probably only work properly on single VGA card * setups and/or x86 platforms. * * If your VGA default device is not PCI, you'll have to return NULL here. * In this case, I assume it will not conflict with any PCI card. If this * is not true, I'll have to define two arch hooks for enabling/disabling * the VGA default device if that is possible. This may be a problem with * real _ISA_ VGA cards, in addition to a PCI one. I don't know at this * point how to deal with that card. Can their IOs be disabled at all? If * not, then I suppose it's a matter of having the proper arch hook telling * us about it, so we basically never allow anybody to succeed a vga_get(). */ struct pci_dev *vga_default_device(void) { return vga_default; } EXPORT_SYMBOL_GPL(vga_default_device); void vga_set_default_device(struct pci_dev *pdev) { if (vga_default == pdev) return; pci_dev_put(vga_default); vga_default = pci_dev_get(pdev); } /** * vga_remove_vgacon - deactivate VGA console * * Unbind and unregister vgacon in case pdev is the default VGA device. * Can be called by GPU drivers on initialization to make sure VGA register * access done by vgacon will not disturb the device. * * @pdev: PCI device. */ #if !defined(CONFIG_VGA_CONSOLE) int vga_remove_vgacon(struct pci_dev *pdev) { return 0; } #elif !defined(CONFIG_DUMMY_CONSOLE) int vga_remove_vgacon(struct pci_dev *pdev) { return -ENODEV; } #else int vga_remove_vgacon(struct pci_dev *pdev) { int ret = 0; if (pdev != vga_default) return 0; vgaarb_info(&pdev->dev, "deactivate vga console\n"); console_lock(); if (con_is_bound(&vga_con)) ret = do_take_over_console(&dummy_con, 0, MAX_NR_CONSOLES - 1, 1); if (ret == 0) { ret = do_unregister_con_driver(&vga_con); /* Ignore "already unregistered". */ if (ret == -ENODEV) ret = 0; } console_unlock(); return ret; } #endif EXPORT_SYMBOL(vga_remove_vgacon); /* * If we don't ever use VGA arbitration, we should avoid turning off * anything anywhere due to old X servers getting confused about the boot * device not being VGA. */ static void vga_check_first_use(void) { /* * Inform all GPUs in the system that VGA arbitration has occurred * so they can disable resources if possible. */ if (!vga_arbiter_used) { vga_arbiter_used = true; vga_arbiter_notify_clients(); } } static struct vga_device *__vga_tryget(struct vga_device *vgadev, unsigned int rsrc) { struct device *dev = &vgadev->pdev->dev; unsigned int wants, legacy_wants, match; struct vga_device *conflict; unsigned int pci_bits; u32 flags = 0; /* * Account for "normal" resources to lock. If we decode the legacy, * counterpart, we need to request it as well */ if ((rsrc & VGA_RSRC_NORMAL_IO) && (vgadev->decodes & VGA_RSRC_LEGACY_IO)) rsrc |= VGA_RSRC_LEGACY_IO; if ((rsrc & VGA_RSRC_NORMAL_MEM) && (vgadev->decodes & VGA_RSRC_LEGACY_MEM)) rsrc |= VGA_RSRC_LEGACY_MEM; vgaarb_dbg(dev, "%s: %d\n", __func__, rsrc); vgaarb_dbg(dev, "%s: owns: %d\n", __func__, vgadev->owns); /* Check what resources we need to acquire */ wants = rsrc & ~vgadev->owns; /* We already own everything, just mark locked & bye bye */ if (wants == 0) goto lock_them; /* * We don't need to request a legacy resource, we just enable * appropriate decoding and go. */ legacy_wants = wants & VGA_RSRC_LEGACY_MASK; if (legacy_wants == 0) goto enable_them; /* Ok, we don't, let's find out who we need to kick off */ list_for_each_entry(conflict, &vga_list, list) { unsigned int lwants = legacy_wants; unsigned int change_bridge = 0; /* Don't conflict with myself */ if (vgadev == conflict) continue; /* * We have a possible conflict. Before we go further, we must * check if we sit on the same bus as the conflicting device. * If we don't, then we must tie both IO and MEM resources * together since there is only a single bit controlling * VGA forwarding on P2P bridges. */ if (vgadev->pdev->bus != conflict->pdev->bus) { change_bridge = 1; lwants = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; } /* * Check if the guy has a lock on the resource. If he does, * return the conflicting entry. */ if (conflict->locks & lwants) return conflict; /* * Ok, now check if it owns the resource we want. We can * lock resources that are not decoded; therefore a device * can own resources it doesn't decode. */ match = lwants & conflict->owns; if (!match) continue; /* * Looks like he doesn't have a lock, we can steal them * from him. */ flags = 0; pci_bits = 0; /* * If we can't control legacy resources via the bridge, we * also need to disable normal decoding. */ if (!conflict->bridge_has_one_vga) { if ((match & conflict->decodes) & VGA_RSRC_LEGACY_MEM) pci_bits |= PCI_COMMAND_MEMORY; if ((match & conflict->decodes) & VGA_RSRC_LEGACY_IO) pci_bits |= PCI_COMMAND_IO; if (pci_bits) flags |= PCI_VGA_STATE_CHANGE_DECODES; } if (change_bridge) flags |= PCI_VGA_STATE_CHANGE_BRIDGE; pci_set_vga_state(conflict->pdev, false, pci_bits, flags); conflict->owns &= ~match; /* If we disabled normal decoding, reflect it in owns */ if (pci_bits & PCI_COMMAND_MEMORY) conflict->owns &= ~VGA_RSRC_NORMAL_MEM; if (pci_bits & PCI_COMMAND_IO) conflict->owns &= ~VGA_RSRC_NORMAL_IO; } enable_them: /* * Ok, we got it, everybody conflicting has been disabled, let's * enable us. Mark any bits in "owns" regardless of whether we * decoded them. We can lock resources we don't decode, therefore * we must track them via "owns". */ flags = 0; pci_bits = 0; if (!vgadev->bridge_has_one_vga) { flags |= PCI_VGA_STATE_CHANGE_DECODES; if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM)) pci_bits |= PCI_COMMAND_MEMORY; if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO)) pci_bits |= PCI_COMMAND_IO; } if (wants & VGA_RSRC_LEGACY_MASK) flags |= PCI_VGA_STATE_CHANGE_BRIDGE; pci_set_vga_state(vgadev->pdev, true, pci_bits, flags); vgadev->owns |= wants; lock_them: vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK); if (rsrc & VGA_RSRC_LEGACY_IO) vgadev->io_lock_cnt++; if (rsrc & VGA_RSRC_LEGACY_MEM) vgadev->mem_lock_cnt++; if (rsrc & VGA_RSRC_NORMAL_IO) vgadev->io_norm_cnt++; if (rsrc & VGA_RSRC_NORMAL_MEM) vgadev->mem_norm_cnt++; return NULL; } static void __vga_put(struct vga_device *vgadev, unsigned int rsrc) { struct device *dev = &vgadev->pdev->dev; unsigned int old_locks = vgadev->locks; vgaarb_dbg(dev, "%s\n", __func__); /* * Update our counters and account for equivalent legacy resources * if we decode them. */ if ((rsrc & VGA_RSRC_NORMAL_IO) && vgadev->io_norm_cnt > 0) { vgadev->io_norm_cnt--; if (vgadev->decodes & VGA_RSRC_LEGACY_IO) rsrc |= VGA_RSRC_LEGACY_IO; } if ((rsrc & VGA_RSRC_NORMAL_MEM) && vgadev->mem_norm_cnt > 0) { vgadev->mem_norm_cnt--; if (vgadev->decodes & VGA_RSRC_LEGACY_MEM) rsrc |= VGA_RSRC_LEGACY_MEM; } if ((rsrc & VGA_RSRC_LEGACY_IO) && vgadev->io_lock_cnt > 0) vgadev->io_lock_cnt--; if ((rsrc & VGA_RSRC_LEGACY_MEM) && vgadev->mem_lock_cnt > 0) vgadev->mem_lock_cnt--; /* * Just clear lock bits, we do lazy operations so we don't really * have to bother about anything else at this point. */ if (vgadev->io_lock_cnt == 0) vgadev->locks &= ~VGA_RSRC_LEGACY_IO; if (vgadev->mem_lock_cnt == 0) vgadev->locks &= ~VGA_RSRC_LEGACY_MEM; /* * Kick the wait queue in case somebody was waiting if we actually * released something. */ if (old_locks != vgadev->locks) wake_up_all(&vga_wait_queue); } /** * vga_get - acquire & lock VGA resources * @pdev: PCI device of the VGA card or NULL for the system default * @rsrc: bit mask of resources to acquire and lock * @interruptible: blocking should be interruptible by signals ? * * Acquire VGA resources for the given card and mark those resources * locked. If the resources requested are "normal" (and not legacy) * resources, the arbiter will first check whether the card is doing legacy * decoding for that type of resource. If yes, the lock is "converted" into * a legacy resource lock. * * The arbiter will first look for all VGA cards that might conflict and disable * their IOs and/or Memory access, including VGA forwarding on P2P bridges if * necessary, so that the requested resources can be used. Then, the card is * marked as locking these resources and the IO and/or Memory accesses are * enabled on the card (including VGA forwarding on parent P2P bridges if any). * * This function will block if some conflicting card is already locking one of * the required resources (or any resource on a different bus segment, since P2P * bridges don't differentiate VGA memory and IO afaik). You can indicate * whether this blocking should be interruptible by a signal (for userland * interface) or not. * * Must not be called at interrupt time or in atomic context. If the card * already owns the resources, the function succeeds. Nested calls are * supported (a per-resource counter is maintained) * * On success, release the VGA resource again with vga_put(). * * Returns: * * 0 on success, negative error code on failure. */ int vga_get(struct pci_dev *pdev, unsigned int rsrc, int interruptible) { struct vga_device *vgadev, *conflict; unsigned long flags; wait_queue_entry_t wait; int rc = 0; vga_check_first_use(); /* The caller should check for this, but let's be sure */ if (pdev == NULL) pdev = vga_default_device(); if (pdev == NULL) return 0; for (;;) { spin_lock_irqsave(&vga_lock, flags); vgadev = vgadev_find(pdev); if (vgadev == NULL) { spin_unlock_irqrestore(&vga_lock, flags); rc = -ENODEV; break; } conflict = __vga_tryget(vgadev, rsrc); spin_unlock_irqrestore(&vga_lock, flags); if (conflict == NULL) break; /* * We have a conflict; we wait until somebody kicks the * work queue. Currently we have one work queue that we * kick each time some resources are released, but it would * be fairly easy to have a per-device one so that we only * need to attach to the conflicting device. */ init_waitqueue_entry(&wait, current); add_wait_queue(&vga_wait_queue, &wait); set_current_state(interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE); if (interruptible && signal_pending(current)) { __set_current_state(TASK_RUNNING); remove_wait_queue(&vga_wait_queue, &wait); rc = -ERESTARTSYS; break; } schedule(); remove_wait_queue(&vga_wait_queue, &wait); } return rc; } EXPORT_SYMBOL(vga_get); /** * vga_tryget - try to acquire & lock legacy VGA resources * @pdev: PCI device of VGA card or NULL for system default * @rsrc: bit mask of resources to acquire and lock * * Perform the same operation as vga_get(), but return an error (-EBUSY) * instead of blocking if the resources are already locked by another card. * Can be called in any context. * * On success, release the VGA resource again with vga_put(). * * Returns: * * 0 on success, negative error code on failure. */ static int vga_tryget(struct pci_dev *pdev, unsigned int rsrc) { struct vga_device *vgadev; unsigned long flags; int rc = 0; vga_check_first_use(); /* The caller should check for this, but let's be sure */ if (pdev == NULL) pdev = vga_default_device(); if (pdev == NULL) return 0; spin_lock_irqsave(&vga_lock, flags); vgadev = vgadev_find(pdev); if (vgadev == NULL) { rc = -ENODEV; goto bail; } if (__vga_tryget(vgadev, rsrc)) rc = -EBUSY; bail: spin_unlock_irqrestore(&vga_lock, flags); return rc; } /** * vga_put - release lock on legacy VGA resources * @pdev: PCI device of VGA card or NULL for system default * @rsrc: bit mask of resource to release * * Release resources previously locked by vga_get() or vga_tryget(). The * resources aren't disabled right away, so that a subsequent vga_get() on * the same card will succeed immediately. Resources have a counter, so * locks are only released if the counter reaches 0. */ void vga_put(struct pci_dev *pdev, unsigned int rsrc) { struct vga_device *vgadev; unsigned long flags; /* The caller should check for this, but let's be sure */ if (pdev == NULL) pdev = vga_default_device(); if (pdev == NULL) return; spin_lock_irqsave(&vga_lock, flags); vgadev = vgadev_find(pdev); if (vgadev == NULL) goto bail; __vga_put(vgadev, rsrc); bail: spin_unlock_irqrestore(&vga_lock, flags); } EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { #if defined(CONFIG_X86) u64 base = screen_info.lfb_base; u64 size = screen_info.lfb_size; struct resource *r; u64 limit; /* Select the device owning the boot framebuffer if there is one */ if (screen_info.capabilities & VIDEO_CAPABILITY_64BIT_BASE) base |= (u64)screen_info.ext_lfb_base << 32; limit = base + size; /* Does firmware framebuffer belong to us? */ pci_dev_for_each_resource(pdev, r) { if (resource_type(r) != IORESOURCE_MEM) continue; if (!r->start || !r->end) continue; if (base < r->start || limit >= r->end) continue; return true; } #endif return false; } static bool vga_arb_integrated_gpu(struct device *dev) { #if defined(CONFIG_ACPI) struct acpi_device *adev = ACPI_COMPANION(dev); return adev && !strcmp(acpi_device_hid(adev), ACPI_VIDEO_HID); #else return false; #endif } /* * Return true if vgadev is a better default VGA device than the best one * we've seen so far. */ static bool vga_is_boot_device(struct vga_device *vgadev) { struct vga_device *boot_vga = vgadev_find(vga_default_device()); struct pci_dev *pdev = vgadev->pdev; u16 cmd, boot_cmd; /* * We select the default VGA device in this order: * Firmware framebuffer (see vga_arb_select_default_device()) * Legacy VGA device (owns VGA_RSRC_LEGACY_MASK) * Non-legacy integrated device (see vga_arb_select_default_device()) * Non-legacy discrete device (see vga_arb_select_default_device()) * Other device (see vga_arb_select_default_device()) */ /* * We always prefer a firmware default device, so if we've already * found one, there's no need to consider vgadev. */ if (boot_vga && boot_vga->is_firmware_default) return false; if (vga_is_firmware_default(pdev)) { vgadev->is_firmware_default = true; return true; } /* * A legacy VGA device has MEM and IO enabled and any bridges * leading to it have PCI_BRIDGE_CTL_VGA enabled so the legacy * resources ([mem 0xa0000-0xbffff], [io 0x3b0-0x3bb], etc) are * routed to it. * * We use the first one we find, so if we've already found one, * vgadev is no better. */ if (boot_vga && (boot_vga->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK) return false; if ((vgadev->owns & VGA_RSRC_LEGACY_MASK) == VGA_RSRC_LEGACY_MASK) return true; /* * If we haven't found a legacy VGA device, accept a non-legacy * device. It may have either IO or MEM enabled, and bridges may * not have PCI_BRIDGE_CTL_VGA enabled, so it may not be able to * use legacy VGA resources. Prefer an integrated GPU over others. */ pci_read_config_word(pdev, PCI_COMMAND, &cmd); if (cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { /* * An integrated GPU overrides a previous non-legacy * device. We expect only a single integrated GPU, but if * there are more, we use the *last* because that was the * previous behavior. */ if (vga_arb_integrated_gpu(&pdev->dev)) return true; /* * We prefer the first non-legacy discrete device we find. * If we already found one, vgadev is no better. */ if (boot_vga) { pci_read_config_word(boot_vga->pdev, PCI_COMMAND, &boot_cmd); if (boot_cmd & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) return false; } return true; } /* * Vgadev has neither IO nor MEM enabled. If we haven't found any * other VGA devices, it is the best candidate so far. */ if (!boot_vga) return true; return false; } /* * Rules for using a bridge to control a VGA descendant decoding: if a bridge * has only one VGA descendant then it can be used to control the VGA routing * for that device. It should always use the bridge closest to the device to * control it. If a bridge has a direct VGA descendant, but also have a sub- * bridge VGA descendant then we cannot use that bridge to control the direct * VGA descendant. So for every device we register, we need to iterate all * its parent bridges so we can invalidate any devices using them properly. */ static void vga_arbiter_check_bridge_sharing(struct vga_device *vgadev) { struct vga_device *same_bridge_vgadev; struct pci_bus *new_bus, *bus; struct pci_dev *new_bridge, *bridge; vgadev->bridge_has_one_vga = true; if (list_empty(&vga_list)) { vgaarb_info(&vgadev->pdev->dev, "bridge control possible\n"); return; } /* Iterate the new device's bridge hierarchy */ new_bus = vgadev->pdev->bus; while (new_bus) { new_bridge = new_bus->self; /* Go through list of devices already registered */ list_for_each_entry(same_bridge_vgadev, &vga_list, list) { bus = same_bridge_vgadev->pdev->bus; bridge = bus->self; /* See if it shares a bridge with this device */ if (new_bridge == bridge) { /* * If its direct parent bridge is the same * as any bridge of this device then it can't * be used for that device. */ same_bridge_vgadev->bridge_has_one_vga = false; } /* * Now iterate the previous device's bridge hierarchy. * If the new device's parent bridge is in the other * device's hierarchy, we can't use it to control this * device. */ while (bus) { bridge = bus->self; if (bridge && bridge == vgadev->pdev->bus->self) vgadev->bridge_has_one_vga = false; bus = bus->parent; } } new_bus = new_bus->parent; } if (vgadev->bridge_has_one_vga) vgaarb_info(&vgadev->pdev->dev, "bridge control possible\n"); else vgaarb_info(&vgadev->pdev->dev, "no bridge control possible\n"); } /* * Currently, we assume that the "initial" setup of the system is not sane, * that is, we come up with conflicting devices and let the arbiter's * client decide if devices decodes legacy things or not. */ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev) { struct vga_device *vgadev; unsigned long flags; struct pci_bus *bus; struct pci_dev *bridge; u16 cmd; /* Allocate structure */ vgadev = kzalloc(sizeof(struct vga_device), GFP_KERNEL); if (vgadev == NULL) { vgaarb_err(&pdev->dev, "failed to allocate VGA arbiter data\n"); /* * What to do on allocation failure? For now, let's just do * nothing, I'm not sure there is anything saner to be done. */ return false; } /* Take lock & check for duplicates */ spin_lock_irqsave(&vga_lock, flags); if (vgadev_find(pdev) != NULL) { BUG_ON(1); goto fail; } vgadev->pdev = pdev; /* By default, assume we decode everything */ vgadev->decodes = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; /* By default, mark it as decoding */ vga_decode_count++; /* * Mark that we "own" resources based on our enables, we will * clear that below if the bridge isn't forwarding. */ pci_read_config_word(pdev, PCI_COMMAND, &cmd); if (cmd & PCI_COMMAND_IO) vgadev->owns |= VGA_RSRC_LEGACY_IO; if (cmd & PCI_COMMAND_MEMORY) vgadev->owns |= VGA_RSRC_LEGACY_MEM; /* Check if VGA cycles can get down to us */ bus = pdev->bus; while (bus) { bridge = bus->self; if (bridge) { u16 l; pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, &l); if (!(l & PCI_BRIDGE_CTL_VGA)) { vgadev->owns = 0; break; } } bus = bus->parent; } if (vga_is_boot_device(vgadev)) { vgaarb_info(&pdev->dev, "setting as boot VGA device%s\n", vga_default_device() ? " (overriding previous)" : ""); vga_set_default_device(pdev); } vga_arbiter_check_bridge_sharing(vgadev); /* Add to the list */ list_add_tail(&vgadev->list, &vga_list); vga_count++; vgaarb_info(&pdev->dev, "VGA device added: decodes=%s,owns=%s,locks=%s\n", vga_iostate_to_str(vgadev->decodes), vga_iostate_to_str(vgadev->owns), vga_iostate_to_str(vgadev->locks)); spin_unlock_irqrestore(&vga_lock, flags); return true; fail: spin_unlock_irqrestore(&vga_lock, flags); kfree(vgadev); return false; } static bool vga_arbiter_del_pci_device(struct pci_dev *pdev) { struct vga_device *vgadev; unsigned long flags; bool ret = true; spin_lock_irqsave(&vga_lock, flags); vgadev = vgadev_find(pdev); if (vgadev == NULL) { ret = false; goto bail; } if (vga_default == pdev) vga_set_default_device(NULL); if (vgadev->decodes & (VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM)) vga_decode_count--; /* Remove entry from list */ list_del(&vgadev->list); vga_count--; /* Wake up all possible waiters */ wake_up_all(&vga_wait_queue); bail: spin_unlock_irqrestore(&vga_lock, flags); kfree(vgadev); return ret; } /* Called with the lock */ static void vga_update_device_decodes(struct vga_device *vgadev, unsigned int new_decodes) { struct device *dev = &vgadev->pdev->dev; unsigned int old_decodes = vgadev->decodes; unsigned int decodes_removed = ~new_decodes & old_decodes; unsigned int decodes_unlocked = vgadev->locks & decodes_removed; vgadev->decodes = new_decodes; vgaarb_info(dev, "VGA decodes changed: olddecodes=%s,decodes=%s:owns=%s\n", vga_iostate_to_str(old_decodes), vga_iostate_to_str(vgadev->decodes), vga_iostate_to_str(vgadev->owns)); /* If we removed locked decodes, lock count goes to zero, and release */ if (decodes_unlocked) { if (decodes_unlocked & VGA_RSRC_LEGACY_IO) vgadev->io_lock_cnt = 0; if (decodes_unlocked & VGA_RSRC_LEGACY_MEM) vgadev->mem_lock_cnt = 0; __vga_put(vgadev, decodes_unlocked); } /* Change decodes counter */ if (old_decodes & VGA_RSRC_LEGACY_MASK && !(new_decodes & VGA_RSRC_LEGACY_MASK)) vga_decode_count--; if (!(old_decodes & VGA_RSRC_LEGACY_MASK) && new_decodes & VGA_RSRC_LEGACY_MASK) vga_decode_count++; vgaarb_dbg(dev, "decoding count now is: %d\n", vga_decode_count); } static void __vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes, bool userspace) { struct vga_device *vgadev; unsigned long flags; decodes &= VGA_RSRC_LEGACY_MASK; spin_lock_irqsave(&vga_lock, flags); vgadev = vgadev_find(pdev); if (vgadev == NULL) goto bail; /* Don't let userspace futz with kernel driver decodes */ if (userspace && vgadev->set_decode) goto bail; /* Update the device decodes + counter */ vga_update_device_decodes(vgadev, decodes); /* * XXX If somebody is going from "doesn't decode" to "decodes" * state here, additional care must be taken as we may have pending * ownership of non-legacy region. */ bail: spin_unlock_irqrestore(&vga_lock, flags); } /** * vga_set_legacy_decoding * @pdev: PCI device of the VGA card * @decodes: bit mask of what legacy regions the card decodes * * Indicate to the arbiter if the card decodes legacy VGA IOs, legacy VGA * Memory, both, or none. All cards default to both, the card driver (fbdev for * example) should tell the arbiter if it has disabled legacy decoding, so the * card can be left out of the arbitration process (and can be safe to take * interrupts at any time. */ void vga_set_legacy_decoding(struct pci_dev *pdev, unsigned int decodes) { __vga_set_legacy_decoding(pdev, decodes, false); } EXPORT_SYMBOL(vga_set_legacy_decoding); /** * vga_client_register - register or unregister a VGA arbitration client * @pdev: PCI device of the VGA client * @set_decode: VGA decode change callback * * Clients have two callback mechanisms they can use. * * @set_decode callback: If a client can disable its GPU VGA resource, it * will get a callback from this to set the encode/decode state. * * Rationale: we cannot disable VGA decode resources unconditionally * because some single GPU laptops seem to require ACPI or BIOS access to * the VGA registers to control things like backlights etc. Hopefully newer * multi-GPU laptops do something saner, and desktops won't have any * special ACPI for this. The driver will get a callback when VGA * arbitration is first used by userspace since some older X servers have * issues. * * Does not check whether a client for @pdev has been registered already. * * To unregister, call vga_client_unregister(). * * Returns: 0 on success, -ENODEV on failure */ int vga_client_register(struct pci_dev *pdev, unsigned int (*set_decode)(struct pci_dev *pdev, bool decode)) { unsigned long flags; struct vga_device *vgadev; spin_lock_irqsave(&vga_lock, flags); vgadev = vgadev_find(pdev); if (vgadev) vgadev->set_decode = set_decode; spin_unlock_irqrestore(&vga_lock, flags); if (!vgadev) return -ENODEV; return 0; } EXPORT_SYMBOL(vga_client_register); /* * Char driver implementation * * Semantics is: * * open : Open user instance of the arbiter. By default, it's * attached to the default VGA device of the system. * * close : Close user instance, release locks * * read : Return a string indicating the status of the target. * An IO state string is of the form {io,mem,io+mem,none}, * mc and ic are respectively mem and io lock counts (for * debugging/diagnostic only). "decodes" indicate what the * card currently decodes, "owns" indicates what is currently * enabled on it, and "locks" indicates what is locked by this * card. If the card is unplugged, we get "invalid" then for * card_ID and an -ENODEV error is returned for any command * until a new card is targeted * * "<card_ID>,decodes=<io_state>,owns=<io_state>,locks=<io_state> (ic,mc)" * * write : write a command to the arbiter. List of commands is: * * target <card_ID> : switch target to card <card_ID> (see below) * lock <io_state> : acquire locks on target ("none" is invalid io_state) * trylock <io_state> : non-blocking acquire locks on target * unlock <io_state> : release locks on target * unlock all : release all locks on target held by this user * decodes <io_state> : set the legacy decoding attributes for the card * * poll : event if something change on any card (not just the target) * * card_ID is of the form "PCI:domain:bus:dev.fn". It can be set to "default" * to go back to the system default card (TODO: not implemented yet). * Currently, only PCI is supported as a prefix, but the userland API may * support other bus types in the future, even if the current kernel * implementation doesn't. * * Note about locks: * * The driver keeps track of which user has what locks on which card. It * supports stacking, like the kernel one. This complicates the implementation * a bit, but makes the arbiter more tolerant to userspace problems and able * to properly cleanup in all cases when a process dies. * Currently, a max of 16 cards simultaneously can have locks issued from * userspace for a given user (file descriptor instance) of the arbiter. * * If the device is hot-unplugged, there is a hook inside the module to notify * it being added/removed in the system and automatically added/removed in * the arbiter. */ #define MAX_USER_CARDS CONFIG_VGA_ARB_MAX_GPUS #define PCI_INVALID_CARD ((struct pci_dev *)-1UL) /* Each user has an array of these, tracking which cards have locks */ struct vga_arb_user_card { struct pci_dev *pdev; unsigned int mem_cnt; unsigned int io_cnt; }; struct vga_arb_private { struct list_head list; struct pci_dev *target; struct vga_arb_user_card cards[MAX_USER_CARDS]; spinlock_t lock; }; static LIST_HEAD(vga_user_list); static DEFINE_SPINLOCK(vga_user_lock); /* * Take a string in the format: "PCI:domain:bus:dev.fn" and return the * respective values. If the string is not in this format, return 0. */ static int vga_pci_str_to_vars(char *buf, int count, unsigned int *domain, unsigned int *bus, unsigned int *devfn) { int n; unsigned int slot, func; n = sscanf(buf, "PCI:%x:%x:%x.%x", domain, bus, &slot, &func); if (n != 4) return 0; *devfn = PCI_DEVFN(slot, func); return 1; } static ssize_t vga_arb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct vga_arb_private *priv = file->private_data; struct vga_device *vgadev; struct pci_dev *pdev; unsigned long flags; size_t len; int rc; char *lbuf; lbuf = kmalloc(1024, GFP_KERNEL); if (lbuf == NULL) return -ENOMEM; /* Protect vga_list */ spin_lock_irqsave(&vga_lock, flags); /* If we are targeting the default, use it */ pdev = priv->target; if (pdev == NULL || pdev == PCI_INVALID_CARD) { spin_unlock_irqrestore(&vga_lock, flags); len = sprintf(lbuf, "invalid"); goto done; } /* Find card vgadev structure */ vgadev = vgadev_find(pdev); if (vgadev == NULL) { /* * Wow, it's not in the list, that shouldn't happen, let's * fix us up and return invalid card. */ spin_unlock_irqrestore(&vga_lock, flags); len = sprintf(lbuf, "invalid"); goto done; } /* Fill the buffer with info */ len = snprintf(lbuf, 1024, "count:%d,PCI:%s,decodes=%s,owns=%s,locks=%s(%u:%u)\n", vga_decode_count, pci_name(pdev), vga_iostate_to_str(vgadev->decodes), vga_iostate_to_str(vgadev->owns), vga_iostate_to_str(vgadev->locks), vgadev->io_lock_cnt, vgadev->mem_lock_cnt); spin_unlock_irqrestore(&vga_lock, flags); done: /* Copy that to user */ if (len > count) len = count; rc = copy_to_user(buf, lbuf, len); kfree(lbuf); if (rc) return -EFAULT; return len; } /* * TODO: To avoid parsing inside kernel and to improve the speed we may * consider use ioctl here */ static ssize_t vga_arb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct vga_arb_private *priv = file->private_data; struct vga_arb_user_card *uc = NULL; struct pci_dev *pdev; unsigned int io_state; char kbuf[64], *curr_pos; size_t remaining = count; int ret_val; int i; if (count >= sizeof(kbuf)) return -EINVAL; if (copy_from_user(kbuf, buf, count)) return -EFAULT; curr_pos = kbuf; kbuf[count] = '\0'; if (strncmp(curr_pos, "lock ", 5) == 0) { curr_pos += 5; remaining -= 5; pr_debug("client 0x%p called 'lock'\n", priv); if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) { ret_val = -EPROTO; goto done; } if (io_state == VGA_RSRC_NONE) { ret_val = -EPROTO; goto done; } pdev = priv->target; if (priv->target == NULL) { ret_val = -ENODEV; goto done; } vga_get_uninterruptible(pdev, io_state); /* Update the client's locks lists */ for (i = 0; i < MAX_USER_CARDS; i++) { if (priv->cards[i].pdev == pdev) { if (io_state & VGA_RSRC_LEGACY_IO) priv->cards[i].io_cnt++; if (io_state & VGA_RSRC_LEGACY_MEM) priv->cards[i].mem_cnt++; break; } } ret_val = count; goto done; } else if (strncmp(curr_pos, "unlock ", 7) == 0) { curr_pos += 7; remaining -= 7; pr_debug("client 0x%p called 'unlock'\n", priv); if (strncmp(curr_pos, "all", 3) == 0) io_state = VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; else { if (!vga_str_to_iostate (curr_pos, remaining, &io_state)) { ret_val = -EPROTO; goto done; } /* TODO: Add this? if (io_state == VGA_RSRC_NONE) { ret_val = -EPROTO; goto done; } */ } pdev = priv->target; if (priv->target == NULL) { ret_val = -ENODEV; goto done; } for (i = 0; i < MAX_USER_CARDS; i++) { if (priv->cards[i].pdev == pdev) uc = &priv->cards[i]; } if (!uc) { ret_val = -EINVAL; goto done; } if (io_state & VGA_RSRC_LEGACY_IO && uc->io_cnt == 0) { ret_val = -EINVAL; goto done; } if (io_state & VGA_RSRC_LEGACY_MEM && uc->mem_cnt == 0) { ret_val = -EINVAL; goto done; } vga_put(pdev, io_state); if (io_state & VGA_RSRC_LEGACY_IO) uc->io_cnt--; if (io_state & VGA_RSRC_LEGACY_MEM) uc->mem_cnt--; ret_val = count; goto done; } else if (strncmp(curr_pos, "trylock ", 8) == 0) { curr_pos += 8; remaining -= 8; pr_debug("client 0x%p called 'trylock'\n", priv); if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) { ret_val = -EPROTO; goto done; } /* TODO: Add this? if (io_state == VGA_RSRC_NONE) { ret_val = -EPROTO; goto done; } */ pdev = priv->target; if (priv->target == NULL) { ret_val = -ENODEV; goto done; } if (vga_tryget(pdev, io_state)) { /* Update the client's locks lists... */ for (i = 0; i < MAX_USER_CARDS; i++) { if (priv->cards[i].pdev == pdev) { if (io_state & VGA_RSRC_LEGACY_IO) priv->cards[i].io_cnt++; if (io_state & VGA_RSRC_LEGACY_MEM) priv->cards[i].mem_cnt++; break; } } ret_val = count; goto done; } else { ret_val = -EBUSY; goto done; } } else if (strncmp(curr_pos, "target ", 7) == 0) { unsigned int domain, bus, devfn; struct vga_device *vgadev; curr_pos += 7; remaining -= 7; pr_debug("client 0x%p called 'target'\n", priv); /* If target is default */ if (!strncmp(curr_pos, "default", 7)) pdev = pci_dev_get(vga_default_device()); else { if (!vga_pci_str_to_vars(curr_pos, remaining, &domain, &bus, &devfn)) { ret_val = -EPROTO; goto done; } pdev = pci_get_domain_bus_and_slot(domain, bus, devfn); if (!pdev) { pr_debug("invalid PCI address %04x:%02x:%02x.%x\n", domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); ret_val = -ENODEV; goto done; } pr_debug("%s ==> %04x:%02x:%02x.%x pdev %p\n", curr_pos, domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn), pdev); } vgadev = vgadev_find(pdev); pr_debug("vgadev %p\n", vgadev); if (vgadev == NULL) { if (pdev) { vgaarb_dbg(&pdev->dev, "not a VGA device\n"); pci_dev_put(pdev); } ret_val = -ENODEV; goto done; } priv->target = pdev; for (i = 0; i < MAX_USER_CARDS; i++) { if (priv->cards[i].pdev == pdev) break; if (priv->cards[i].pdev == NULL) { priv->cards[i].pdev = pdev; priv->cards[i].io_cnt = 0; priv->cards[i].mem_cnt = 0; break; } } if (i == MAX_USER_CARDS) { vgaarb_dbg(&pdev->dev, "maximum user cards (%d) number reached, ignoring this one!\n", MAX_USER_CARDS); pci_dev_put(pdev); /* XXX: Which value to return? */ ret_val = -ENOMEM; goto done; } ret_val = count; pci_dev_put(pdev); goto done; } else if (strncmp(curr_pos, "decodes ", 8) == 0) { curr_pos += 8; remaining -= 8; pr_debug("client 0x%p called 'decodes'\n", priv); if (!vga_str_to_iostate(curr_pos, remaining, &io_state)) { ret_val = -EPROTO; goto done; } pdev = priv->target; if (priv->target == NULL) { ret_val = -ENODEV; goto done; } __vga_set_legacy_decoding(pdev, io_state, true); ret_val = count; goto done; } /* If we got here, the message written is not part of the protocol! */ return -EPROTO; done: return ret_val; } static __poll_t vga_arb_fpoll(struct file *file, poll_table *wait) { pr_debug("%s\n", __func__); poll_wait(file, &vga_wait_queue, wait); return EPOLLIN; } static int vga_arb_open(struct inode *inode, struct file *file) { struct vga_arb_private *priv; unsigned long flags; pr_debug("%s\n", __func__); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (priv == NULL) return -ENOMEM; spin_lock_init(&priv->lock); file->private_data = priv; spin_lock_irqsave(&vga_user_lock, flags); list_add(&priv->list, &vga_user_list); spin_unlock_irqrestore(&vga_user_lock, flags); /* Set the client's lists of locks */ priv->target = vga_default_device(); /* Maybe this is still null! */ priv->cards[0].pdev = priv->target; priv->cards[0].io_cnt = 0; priv->cards[0].mem_cnt = 0; return 0; } static int vga_arb_release(struct inode *inode, struct file *file) { struct vga_arb_private *priv = file->private_data; struct vga_arb_user_card *uc; unsigned long flags; int i; pr_debug("%s\n", __func__); spin_lock_irqsave(&vga_user_lock, flags); list_del(&priv->list); for (i = 0; i < MAX_USER_CARDS; i++) { uc = &priv->cards[i]; if (uc->pdev == NULL) continue; vgaarb_dbg(&uc->pdev->dev, "uc->io_cnt == %d, uc->mem_cnt == %d\n", uc->io_cnt, uc->mem_cnt); while (uc->io_cnt--) vga_put(uc->pdev, VGA_RSRC_LEGACY_IO); while (uc->mem_cnt--) vga_put(uc->pdev, VGA_RSRC_LEGACY_MEM); } spin_unlock_irqrestore(&vga_user_lock, flags); kfree(priv); return 0; } /* * Callback any registered clients to let them know we have a change in VGA * cards. */ static void vga_arbiter_notify_clients(void) { struct vga_device *vgadev; unsigned long flags; unsigned int new_decodes; bool new_state; if (!vga_arbiter_used) return; new_state = (vga_count > 1) ? false : true; spin_lock_irqsave(&vga_lock, flags); list_for_each_entry(vgadev, &vga_list, list) { if (vgadev->set_decode) { new_decodes = vgadev->set_decode(vgadev->pdev, new_state); vga_update_device_decodes(vgadev, new_decodes); } } spin_unlock_irqrestore(&vga_lock, flags); } static int pci_notify(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; struct pci_dev *pdev = to_pci_dev(dev); bool notify = false; vgaarb_dbg(dev, "%s\n", __func__); /* Only deal with VGA class devices */ if (!pci_is_vga(pdev)) return 0; /* * For now, we're only interested in devices added and removed. * I didn't test this thing here, so someone needs to double check * for the cases of hot-pluggable VGA cards. */ if (action == BUS_NOTIFY_ADD_DEVICE) notify = vga_arbiter_add_pci_device(pdev); else if (action == BUS_NOTIFY_DEL_DEVICE) notify = vga_arbiter_del_pci_device(pdev); if (notify) vga_arbiter_notify_clients(); return 0; } static struct notifier_block pci_notifier = { .notifier_call = pci_notify, }; static const struct file_operations vga_arb_device_fops = { .read = vga_arb_read, .write = vga_arb_write, .poll = vga_arb_fpoll, .open = vga_arb_open, .release = vga_arb_release, .llseek = noop_llseek, }; static struct miscdevice vga_arb_device = { MISC_DYNAMIC_MINOR, "vga_arbiter", &vga_arb_device_fops }; static int __init vga_arb_device_init(void) { int rc; struct pci_dev *pdev; rc = misc_register(&vga_arb_device); if (rc < 0) pr_err("error %d registering device\n", rc); bus_register_notifier(&pci_bus_type, &pci_notifier); /* Add all VGA class PCI devices by default */ pdev = NULL; while ((pdev = pci_get_subsys(PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, pdev)) != NULL) { if (pci_is_vga(pdev)) vga_arbiter_add_pci_device(pdev); } pr_info("loaded\n"); return rc; } subsys_initcall_sync(vga_arb_device_init);
292 2 208 30 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Authors: Lotsa people, from code originally in tcp */ #ifndef _INET6_HASHTABLES_H #define _INET6_HASHTABLES_H #if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/types.h> #include <linux/jhash.h> #include <net/inet_sock.h> #include <net/ipv6.h> #include <net/netns/hash.h> struct inet_hashinfo; static inline unsigned int __inet6_ehashfn(const u32 lhash, const u16 lport, const u32 fhash, const __be16 fport, const u32 initval) { const u32 ports = (((u32)lport) << 16) | (__force u32)fport; return jhash_3words(lhash, fhash, ports, initval); } /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif); typedef u32 (inet6_ehashfn_t)(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport); inet6_ehashfn_t inet6_ehashfn; INDIRECT_CALLABLE_DECLARE(inet6_ehashfn_t udp6_ehashfn); struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, inet6_ehashfn_t *ehashfn); struct sock *inet6_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif); struct sock *inet6_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, inet6_ehashfn_t *ehashfn); static inline struct sock *__inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif, bool *refcounted) { struct sock *sk = __inet6_lookup_established(net, hashinfo, saddr, sport, daddr, hnum, dif, sdif); *refcounted = true; if (sk) return sk; *refcounted = false; return inet6_lookup_listener(net, hashinfo, skb, doff, saddr, sport, daddr, hnum, dif, sdif); } static inline struct sock *inet6_steal_sock(struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, bool *refcounted, inet6_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool prefetched; sk = skb_steal_sock(skb, refcounted, &prefetched); if (!sk) return NULL; if (!prefetched || !sk_fullsock(sk)) return sk; if (sk->sk_protocol == IPPROTO_TCP) { if (sk->sk_state != TCP_LISTEN) return sk; } else if (sk->sk_protocol == IPPROTO_UDP) { if (sk->sk_state != TCP_CLOSE) return sk; } else { return sk; } reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, ntohs(dport), ehashfn); if (!reuse_sk) return sk; /* We've chosen a new reuseport sock which is never refcounted. This * implies that sk also isn't refcounted. */ WARN_ON_ONCE(*refcounted); return reuse_sk; } static inline struct sock *__inet6_lookup_skb(struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const __be16 sport, const __be16 dport, int iif, int sdif, bool *refcounted) { struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct sock *sk; sk = inet6_steal_sock(net, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, dport, refcounted, inet6_ehashfn); if (IS_ERR(sk)) return NULL; if (sk) return sk; return __inet6_lookup(net, hashinfo, skb, doff, &ip6h->saddr, sport, &ip6h->daddr, ntohs(dport), iif, sdif, refcounted); } struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif); int inet6_hash(struct sock *sk); static inline bool inet6_match(const struct net *net, const struct sock *sk, const struct in6_addr *saddr, const struct in6_addr *daddr, const __portpair ports, const int dif, const int sdif) { if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || sk->sk_portpair != ports || !ipv6_addr_equal(&sk->sk_v6_daddr, saddr) || !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, sdif); } #endif /* IS_ENABLED(CONFIG_IPV6) */ #endif /* _INET6_HASHTABLES_H */
4 9 12 5 13 12 8 17 11 16 13 12 2 6 5 1 22 579 579 8 6 6 6 578 1697 1702 579 8 5 3 11 11 11 8 2 3 6 6 6 5 5 1 4 1 2 2 2 2 13 1 7 9 12 12 23 1 1 22 21 14 11 3 8 9 2 3 2 11 2 5 4 17 1 1 1 2 1 1 1 1 5 2 1 1 33 5 1 32 32 2 31 1 1 1 1 1 4 1 1 1 52 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 // SPDX-License-Identifier: GPL-2.0-or-later /** -*- linux-c -*- *********************************************************** * Linux PPP over Ethernet (PPPoX/PPPoE) Sockets * * PPPoX --- Generic PPP encapsulation socket family * PPPoE --- PPP over Ethernet (RFC 2516) * * Version: 0.7.0 * * 070228 : Fix to allow multiple sessions with same remote MAC and same * session id by including the local device ifindex in the * tuple identifying a session. This also ensures packets can't * be injected into a session from interfaces other than the one * specified by userspace. Florian Zumbiehl <florz@florz.de> * (Oh, BTW, this one is YYMMDD, in case you were wondering ...) * 220102 : Fix module use count on failure in pppoe_create, pppox_sk -acme * 030700 : Fixed connect logic to allow for disconnect. * 270700 : Fixed potential SMP problems; we must protect against * simultaneous invocation of ppp_input * and ppp_unregister_channel. * 040800 : Respect reference count mechanisms on net-devices. * 200800 : fix kfree(skb) in pppoe_rcv (acme) * Module reference count is decremented in the right spot now, * guards against sock_put not actually freeing the sk * in pppoe_release. * 051000 : Initialization cleanup. * 111100 : Fix recvmsg. * 050101 : Fix PADT processing. * 140501 : Use pppoe_rcv_core to handle all backlog. (Alexey) * 170701 : Do not lock_sock with rwlock held. (DaveM) * Ignore discovery frames if user has socket * locked. (DaveM) * Ignore return value of dev_queue_xmit in __pppoe_xmit * or else we may kfree an SKB twice. (DaveM) * 190701 : When doing copies of skb's in __pppoe_xmit, always delete * the original skb that was passed in on success, never on * failure. Delete the copy of the skb on failure to avoid * a memory leak. * 081001 : Misc. cleanup (licence string, non-blocking, prevent * reference of device on close). * 121301 : New ppp channels interface; cannot unregister a channel * from interrupts. Thus, we mark the socket as a ZOMBIE * and do the unregistration later. * 081002 : seq_file support for proc stuff -acme * 111602 : Merge all 2.4 fixes into 2.5/2.6 tree. Label 2.5/2.6 * as version 0.7. Spacing cleanup. * Author: Michal Ostrowski <mostrows@speakeasy.net> * Contributors: * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * David S. Miller (davem@redhat.com) * * License: */ #include <linux/string.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/net.h> #include <linux/inetdevice.h> #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/if_ether.h> #include <linux/if_pppox.h> #include <linux/ppp_channel.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/notifier.h> #include <linux/file.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/nsproxy.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> #include <linux/uaccess.h> #define PPPOE_HASH_BITS CONFIG_PPPOE_HASH_BITS #define PPPOE_HASH_SIZE (1 << PPPOE_HASH_BITS) #define PPPOE_HASH_MASK (PPPOE_HASH_SIZE - 1) static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb); static const struct proto_ops pppoe_ops; static const struct ppp_channel_ops pppoe_chan_ops; /* per-net private data for this module */ static unsigned int pppoe_net_id __read_mostly; struct pppoe_net { /* * we could use _single_ hash table for all * nets by injecting net id into the hash but * it would increase hash chains and add * a few additional math comparisons messy * as well, moreover in case of SMP less locking * controversy here */ struct pppox_sock *hash_table[PPPOE_HASH_SIZE]; rwlock_t hash_lock; }; /* * PPPoE could be in the following stages: * 1) Discovery stage (to obtain remote MAC and Session ID) * 2) Session stage (MAC and SID are known) * * Ethernet frames have a special tag for this but * we use simpler approach based on session id */ static inline bool stage_session(__be16 sid) { return sid != 0; } static inline struct pppoe_net *pppoe_pernet(struct net *net) { return net_generic(net, pppoe_net_id); } static inline int cmp_2_addr(struct pppoe_addr *a, struct pppoe_addr *b) { return a->sid == b->sid && ether_addr_equal(a->remote, b->remote); } static inline int cmp_addr(struct pppoe_addr *a, __be16 sid, char *addr) { return a->sid == sid && ether_addr_equal(a->remote, addr); } #if 8 % PPPOE_HASH_BITS #error 8 must be a multiple of PPPOE_HASH_BITS #endif static int hash_item(__be16 sid, unsigned char *addr) { unsigned char hash = 0; unsigned int i; for (i = 0; i < ETH_ALEN; i++) hash ^= addr[i]; for (i = 0; i < sizeof(sid_t) * 8; i += 8) hash ^= (__force __u32)sid >> i; for (i = 8; (i >>= 1) >= PPPOE_HASH_BITS;) hash ^= hash >> i; return hash & PPPOE_HASH_MASK; } /********************************************************************** * * Set/get/delete/rehash items (internal versions) * **********************************************************************/ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret; ret = pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) return ret; ret = ret->next; } return NULL; } static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); struct pppox_sock *ret; ret = pn->hash_table[hash]; while (ret) { if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && ret->pppoe_ifindex == po->pppoe_ifindex) return -EALREADY; ret = ret->next; } po->next = pn->hash_table[hash]; pn->hash_table[hash] = po; return 0; } static void __delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { int hash = hash_item(sid, addr); struct pppox_sock *ret, **src; ret = pn->hash_table[hash]; src = &pn->hash_table[hash]; while (ret) { if (cmp_addr(&ret->pppoe_pa, sid, addr) && ret->pppoe_ifindex == ifindex) { *src = ret->next; break; } src = &ret->next; ret = ret->next; } } /********************************************************************** * * Set/get/delete/rehash items * **********************************************************************/ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid, unsigned char *addr, int ifindex) { struct pppox_sock *po; read_lock_bh(&pn->hash_lock); po = __get_item(pn, sid, addr, ifindex); if (po) sock_hold(sk_pppox(po)); read_unlock_bh(&pn->hash_lock); return po; } static inline struct pppox_sock *get_item_by_addr(struct net *net, struct sockaddr_pppox *sp) { struct net_device *dev; struct pppoe_net *pn; struct pppox_sock *pppox_sock = NULL; int ifindex; rcu_read_lock(); dev = dev_get_by_name_rcu(net, sp->sa_addr.pppoe.dev); if (dev) { ifindex = dev->ifindex; pn = pppoe_pernet(net); pppox_sock = get_item(pn, sp->sa_addr.pppoe.sid, sp->sa_addr.pppoe.remote, ifindex); } rcu_read_unlock(); return pppox_sock; } static inline void delete_item(struct pppoe_net *pn, __be16 sid, char *addr, int ifindex) { write_lock_bh(&pn->hash_lock); __delete_item(pn, sid, addr, ifindex); write_unlock_bh(&pn->hash_lock); } /*************************************************************************** * * Handler for device events. * Certain device events require that sockets be unconnected. * **************************************************************************/ static void pppoe_flush_dev(struct net_device *dev) { struct pppoe_net *pn; int i; pn = pppoe_pernet(dev_net(dev)); write_lock_bh(&pn->hash_lock); for (i = 0; i < PPPOE_HASH_SIZE; i++) { struct pppox_sock *po = pn->hash_table[i]; struct sock *sk; while (po) { while (po && po->pppoe_dev != dev) { po = po->next; } if (!po) break; sk = sk_pppox(po); /* We always grab the socket lock, followed by the * hash_lock, in that order. Since we should hold the * sock lock while doing any unbinding, we need to * release the lock we're holding. Hold a reference to * the sock so it doesn't disappear as we're jumping * between locks. */ sock_hold(sk); write_unlock_bh(&pn->hash_lock); lock_sock(sk); if (po->pppoe_dev == dev && sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) { pppox_unbind_sock(sk); sk->sk_state_change(sk); po->pppoe_dev = NULL; dev_put(dev); } release_sock(sk); sock_put(sk); /* Restart the process from the start of the current * hash chain. We dropped locks so the world may have * change from underneath us. */ BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); write_lock_bh(&pn->hash_lock); po = pn->hash_table[i]; } } write_unlock_bh(&pn->hash_lock); } static int pppoe_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* Only look at sockets that are using this specific device. */ switch (event) { case NETDEV_CHANGEADDR: case NETDEV_CHANGEMTU: /* A change in mtu or address is a bad thing, requiring * LCP re-negotiation. */ case NETDEV_GOING_DOWN: case NETDEV_DOWN: /* Find every socket on this device and kill it. */ pppoe_flush_dev(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block pppoe_notifier = { .notifier_call = pppoe_device_event, }; /************************************************************************ * * Do the real work of receiving a PPPoE Session frame. * ***********************************************************************/ static int pppoe_rcv_core(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct pppox_sock *relay_po; /* Backlog receive. Semantics of backlog rcv preclude any code from * executing in lock_sock()/release_sock() bounds; meaning sk->sk_state * can't change. */ if (skb->pkt_type == PACKET_OTHERHOST) goto abort_kfree; if (sk->sk_state & PPPOX_BOUND) { ppp_input(&po->chan, skb); } else if (sk->sk_state & PPPOX_RELAY) { relay_po = get_item_by_addr(sock_net(sk), &po->pppoe_relay); if (relay_po == NULL) goto abort_kfree; if ((sk_pppox(relay_po)->sk_state & PPPOX_CONNECTED) == 0) goto abort_put; if (!__pppoe_xmit(sk_pppox(relay_po), skb)) goto abort_put; sock_put(sk_pppox(relay_po)); } else { if (sock_queue_rcv_skb(sk, skb)) goto abort_kfree; } return NET_RX_SUCCESS; abort_put: sock_put(sk_pppox(relay_po)); abort_kfree: kfree_skb(skb); return NET_RX_DROP; } /************************************************************************ * * Receive wrapper called in BH context. * ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; int len; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb_mac_header_len(skb) < ETH_HLEN) goto drop; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto drop; ph = pppoe_hdr(skb); len = ntohs(ph->length); skb_pull_rcsum(skb, sizeof(*ph)); if (skb->len < len) goto drop; if (pskb_trim_rcsum(skb, len)) goto drop; ph = pppoe_hdr(skb); pn = pppoe_pernet(dev_net(dev)); /* Note that get_item does a sock_hold(), so sk_pppox(po) * is known to be safe. */ po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (!po) goto drop; return sk_receive_skb(sk_pppox(po), skb, 0); drop: kfree_skb(skb); out: return NET_RX_DROP; } static void pppoe_unbind_sock_work(struct work_struct *work) { struct pppox_sock *po = container_of(work, struct pppox_sock, proto.pppoe.padt_work); struct sock *sk = sk_pppox(po); lock_sock(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); release_sock(sk); sock_put(sk); } /************************************************************************ * * Receive a PPPoE Discovery frame. * This is solely for detection of PADT frames * ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct pppoe_hdr *ph; struct pppox_sock *po; struct pppoe_net *pn; skb = skb_share_check(skb, GFP_ATOMIC); if (!skb) goto out; if (skb->pkt_type != PACKET_HOST) goto abort; if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) goto abort; ph = pppoe_hdr(skb); if (ph->code != PADT_CODE) goto abort; pn = pppoe_pernet(dev_net(dev)); po = get_item(pn, ph->sid, eth_hdr(skb)->h_source, dev->ifindex); if (po) if (!schedule_work(&po->proto.pppoe.padt_work)) sock_put(sk_pppox(po)); abort: kfree_skb(skb); out: return NET_RX_SUCCESS; /* Lies... :-) */ } static struct packet_type pppoes_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_SES), .func = pppoe_rcv, }; static struct packet_type pppoed_ptype __read_mostly = { .type = cpu_to_be16(ETH_P_PPP_DISC), .func = pppoe_disc_rcv, }; static struct proto pppoe_sk_proto __read_mostly = { .name = "PPPOE", .owner = THIS_MODULE, .obj_size = sizeof(struct pppox_sock), }; /*********************************************************************** * * Initialize a new struct sock. * **********************************************************************/ static int pppoe_create(struct net *net, struct socket *sock, int kern) { struct sock *sk; sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppoe_sk_proto, kern); if (!sk) return -ENOMEM; sock_init_data(sock, sk); sock->state = SS_UNCONNECTED; sock->ops = &pppoe_ops; sk->sk_backlog_rcv = pppoe_rcv_core; sk->sk_state = PPPOX_NONE; sk->sk_type = SOCK_STREAM; sk->sk_family = PF_PPPOX; sk->sk_protocol = PX_PROTO_OE; INIT_WORK(&pppox_sk(sk)->proto.pppoe.padt_work, pppoe_unbind_sock_work); return 0; } static int pppoe_release(struct socket *sock) { struct sock *sk = sock->sk; struct pppox_sock *po; struct pppoe_net *pn; struct net *net = NULL; if (!sk) return 0; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD)) { release_sock(sk); return -EBADF; } po = pppox_sk(sk); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } pppox_unbind_sock(sk); /* Signal the death of the socket. */ sk->sk_state = PPPOX_DEAD; net = sock_net(sk); pn = pppoe_pernet(net); /* * protect "po" from concurrent updates * on pppoe_flush_dev */ delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_receive_queue); release_sock(sk); sock_put(sk); return 0; } static int pppoe_connect(struct socket *sock, struct sockaddr *uservaddr, int sockaddr_len, int flags) { struct sock *sk = sock->sk; struct sockaddr_pppox *sp = (struct sockaddr_pppox *)uservaddr; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = NULL; struct pppoe_net *pn; struct net *net = NULL; int error; lock_sock(sk); error = -EINVAL; if (sockaddr_len != sizeof(struct sockaddr_pppox)) goto end; if (sp->sa_protocol != PX_PROTO_OE) goto end; /* Check for already bound sockets */ error = -EBUSY; if ((sk->sk_state & PPPOX_CONNECTED) && stage_session(sp->sa_addr.pppoe.sid)) goto end; /* Check for already disconnected sockets, on attempts to disconnect */ error = -EALREADY; if ((sk->sk_state & PPPOX_DEAD) && !stage_session(sp->sa_addr.pppoe.sid)) goto end; error = 0; /* Delete the old binding */ if (stage_session(po->pppoe_pa.sid)) { pppox_unbind_sock(sk); pn = pppoe_pernet(sock_net(sk)); delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } po->pppoe_ifindex = 0; memset(&po->pppoe_pa, 0, sizeof(po->pppoe_pa)); memset(&po->pppoe_relay, 0, sizeof(po->pppoe_relay)); memset(&po->chan, 0, sizeof(po->chan)); po->next = NULL; po->num = 0; sk->sk_state = PPPOX_NONE; } /* Re-bind in session stage only */ if (stage_session(sp->sa_addr.pppoe.sid)) { error = -ENODEV; net = sock_net(sk); dev = dev_get_by_name(net, sp->sa_addr.pppoe.dev); if (!dev) goto err_put; po->pppoe_dev = dev; po->pppoe_ifindex = dev->ifindex; pn = pppoe_pernet(net); if (!(dev->flags & IFF_UP)) { goto err_put; } memcpy(&po->pppoe_pa, &sp->sa_addr.pppoe, sizeof(struct pppoe_addr)); write_lock_bh(&pn->hash_lock); error = __set_item(pn, po); write_unlock_bh(&pn->hash_lock); if (error < 0) goto err_put; po->chan.hdrlen = (sizeof(struct pppoe_hdr) + dev->hard_header_len); po->chan.mtu = dev->mtu - sizeof(struct pppoe_hdr) - 2; po->chan.private = sk; po->chan.ops = &pppoe_chan_ops; error = ppp_register_net_channel(dev_net(dev), &po->chan); if (error) { delete_item(pn, po->pppoe_pa.sid, po->pppoe_pa.remote, po->pppoe_ifindex); goto err_put; } sk->sk_state = PPPOX_CONNECTED; } po->num = sp->sa_addr.pppoe.sid; end: release_sock(sk); return error; err_put: if (po->pppoe_dev) { dev_put(po->pppoe_dev); po->pppoe_dev = NULL; } goto end; } static int pppoe_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { int len = sizeof(struct sockaddr_pppox); struct sockaddr_pppox sp; sp.sa_family = AF_PPPOX; sp.sa_protocol = PX_PROTO_OE; memcpy(&sp.sa_addr.pppoe, &pppox_sk(sock->sk)->pppoe_pa, sizeof(struct pppoe_addr)); memcpy(uaddr, &sp, len); return len; } static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int val; int err; switch (cmd) { case PPPIOCGMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (put_user(po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN, (int __user *)arg)) break; err = 0; break; case PPPIOCSMRU: err = -ENXIO; if (!(sk->sk_state & PPPOX_CONNECTED)) break; err = -EFAULT; if (get_user(val, (int __user *)arg)) break; if (val < (po->pppoe_dev->mtu - sizeof(struct pppoe_hdr) - PPP_HDRLEN)) err = 0; else err = -EINVAL; break; case PPPIOCSFLAGS: err = -EFAULT; if (get_user(val, (int __user *)arg)) break; err = 0; break; case PPPOEIOCSFWD: { struct pppox_sock *relay_po; err = -EBUSY; if (sk->sk_state & (PPPOX_BOUND | PPPOX_DEAD)) break; err = -ENOTCONN; if (!(sk->sk_state & PPPOX_CONNECTED)) break; /* PPPoE address from the user specifies an outbound PPPoE address which frames are forwarded to */ err = -EFAULT; if (copy_from_user(&po->pppoe_relay, (void __user *)arg, sizeof(struct sockaddr_pppox))) break; err = -EINVAL; if (po->pppoe_relay.sa_family != AF_PPPOX || po->pppoe_relay.sa_protocol != PX_PROTO_OE) break; /* Check that the socket referenced by the address actually exists. */ relay_po = get_item_by_addr(sock_net(sk), &po->pppoe_relay); if (!relay_po) break; sock_put(sk_pppox(relay_po)); sk->sk_state |= PPPOX_RELAY; err = 0; break; } case PPPOEIOCDFWD: err = -EALREADY; if (!(sk->sk_state & PPPOX_RELAY)) break; sk->sk_state &= ~PPPOX_RELAY; err = 0; break; default: err = -ENOTTY; } return err; } static int pppoe_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct sk_buff *skb; struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int error; struct pppoe_hdr hdr; struct pppoe_hdr *ph; struct net_device *dev; char *start; int hlen; lock_sock(sk); if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { error = -ENOTCONN; goto end; } hdr.ver = 1; hdr.type = 1; hdr.code = 0; hdr.sid = po->num; dev = po->pppoe_dev; error = -EMSGSIZE; if (total_len > (dev->mtu + dev->hard_header_len)) goto end; hlen = LL_RESERVED_SPACE(dev); skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + dev->needed_tailroom, 0, GFP_KERNEL); if (!skb) { error = -ENOMEM; goto end; } /* Reserve space for headers. */ skb_reserve(skb, hlen); skb_reset_network_header(skb); skb->dev = dev; skb->priority = READ_ONCE(sk->sk_priority); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); ph = skb_put(skb, total_len + sizeof(struct pppoe_hdr)); start = (char *)&ph->tag[0]; error = memcpy_from_msg(start, m, total_len); if (error < 0) { kfree_skb(skb); goto end; } error = total_len; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, total_len); memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); ph->length = htons(total_len); dev_queue_xmit(skb); end: release_sock(sk); return error; } /************************************************************************ * * xmit function for internal use. * ***********************************************************************/ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; struct pppoe_hdr *ph; int data_len = skb->len; /* The higher-level PPP code (ppp_unregister_channel()) ensures the PPP * xmit operations conclude prior to an unregistration call. Thus * sk->sk_state cannot change, so we don't need to do lock_sock(). * But, we also can't do a lock_sock since that introduces a potential * deadlock as we'd reverse the lock ordering used when calling * ppp_unregister_channel(). */ if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; if (!dev) goto abort; /* Copy the data if there is no space for the header or if it's * read-only. */ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) goto abort; __skb_push(skb, sizeof(*ph)); skb_reset_network_header(skb); ph = pppoe_hdr(skb); ph->ver = 1; ph->type = 1; ph->code = 0; ph->sid = po->num; ph->length = htons(data_len); skb->protocol = cpu_to_be16(ETH_P_PPP_SES); skb->dev = dev; dev_hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, data_len); dev_queue_xmit(skb); return 1; abort: kfree_skb(skb); return 1; } /************************************************************************ * * xmit function called by generic PPP driver * sends PPP frame over PPPoE socket * ***********************************************************************/ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = chan->private; return __pppoe_xmit(sk, skb); } static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path, const struct ppp_channel *chan) { struct sock *sk = chan->private; struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED) || !dev) return -1; path->type = DEV_PATH_PPPOE; path->encap.proto = htons(ETH_P_PPP_SES); path->encap.id = be16_to_cpu(po->num); memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN); memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN); path->dev = ctx->dev; ctx->dev = dev; return 0; } static const struct ppp_channel_ops pppoe_chan_ops = { .start_xmit = pppoe_xmit, .fill_forward_path = pppoe_fill_forward_path, }; static int pppoe_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; if (sk->sk_state & PPPOX_BOUND) return -EIO; skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; total_len = min_t(size_t, total_len, skb->len); error = skb_copy_datagram_msg(skb, 0, m, total_len); if (error == 0) { consume_skb(skb); return total_len; } kfree_skb(skb); return error; } #ifdef CONFIG_PROC_FS static int pppoe_seq_show(struct seq_file *seq, void *v) { struct pppox_sock *po; char *dev_name; if (v == SEQ_START_TOKEN) { seq_puts(seq, "Id Address Device\n"); goto out; } po = v; dev_name = po->pppoe_pa.dev; seq_printf(seq, "%08X %pM %8s\n", po->pppoe_pa.sid, po->pppoe_pa.remote, dev_name); out: return 0; } static inline struct pppox_sock *pppoe_get_idx(struct pppoe_net *pn, loff_t pos) { struct pppox_sock *po; int i; for (i = 0; i < PPPOE_HASH_SIZE; i++) { po = pn->hash_table[i]; while (po) { if (!pos--) goto out; po = po->next; } } out: return po; } static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) __acquires(pn->hash_lock) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); loff_t l = *pos; read_lock_bh(&pn->hash_lock); return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; } static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); struct pppox_sock *po; ++*pos; if (v == SEQ_START_TOKEN) { po = pppoe_get_idx(pn, 0); goto out; } po = v; if (po->next) po = po->next; else { int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); po = NULL; while (++hash < PPPOE_HASH_SIZE) { po = pn->hash_table[hash]; if (po) break; } } out: return po; } static void pppoe_seq_stop(struct seq_file *seq, void *v) __releases(pn->hash_lock) { struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); read_unlock_bh(&pn->hash_lock); } static const struct seq_operations pppoe_seq_ops = { .start = pppoe_seq_start, .next = pppoe_seq_next, .stop = pppoe_seq_stop, .show = pppoe_seq_show, }; #endif /* CONFIG_PROC_FS */ static const struct proto_ops pppoe_ops = { .family = AF_PPPOX, .owner = THIS_MODULE, .release = pppoe_release, .bind = sock_no_bind, .connect = pppoe_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = pppoe_getname, .poll = datagram_poll, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = pppoe_sendmsg, .recvmsg = pppoe_recvmsg, .mmap = sock_no_mmap, .ioctl = pppox_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = pppox_compat_ioctl, #endif }; static const struct pppox_proto pppoe_proto = { .create = pppoe_create, .ioctl = pppoe_ioctl, .owner = THIS_MODULE, }; static __net_init int pppoe_init_net(struct net *net) { struct pppoe_net *pn = pppoe_pernet(net); struct proc_dir_entry *pde; rwlock_init(&pn->hash_lock); pde = proc_create_net("pppoe", 0444, net->proc_net, &pppoe_seq_ops, sizeof(struct seq_net_private)); #ifdef CONFIG_PROC_FS if (!pde) return -ENOMEM; #endif return 0; } static __net_exit void pppoe_exit_net(struct net *net) { remove_proc_entry("pppoe", net->proc_net); } static struct pernet_operations pppoe_net_ops = { .init = pppoe_init_net, .exit = pppoe_exit_net, .id = &pppoe_net_id, .size = sizeof(struct pppoe_net), }; static int __init pppoe_init(void) { int err; err = register_pernet_device(&pppoe_net_ops); if (err) goto out; err = proto_register(&pppoe_sk_proto, 0); if (err) goto out_unregister_net_ops; err = register_pppox_proto(PX_PROTO_OE, &pppoe_proto); if (err) goto out_unregister_pppoe_proto; dev_add_pack(&pppoes_ptype); dev_add_pack(&pppoed_ptype); register_netdevice_notifier(&pppoe_notifier); return 0; out_unregister_pppoe_proto: proto_unregister(&pppoe_sk_proto); out_unregister_net_ops: unregister_pernet_device(&pppoe_net_ops); out: return err; } static void __exit pppoe_exit(void) { unregister_netdevice_notifier(&pppoe_notifier); dev_remove_pack(&pppoed_ptype); dev_remove_pack(&pppoes_ptype); unregister_pppox_proto(PX_PROTO_OE); proto_unregister(&pppoe_sk_proto); unregister_pernet_device(&pppoe_net_ops); } module_init(pppoe_init); module_exit(pppoe_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NET_PF_PROTO(PF_PPPOX, PX_PROTO_OE);
1 1 3 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 // SPDX-License-Identifier: GPL-2.0-only /* DVB USB compliant linux driver for MSI Mega Sky 580 DVB-T USB2.0 receiver * * Copyright (C) 2006 Aapo Tahkola (aet@rasterburn.org) * * see Documentation/driver-api/media/drivers/dvb-usb.rst for more information */ #include "m920x.h" #include "mt352.h" #include "mt352_priv.h" #include "qt1010.h" #include "tda1004x.h" #include "tda827x.h" #include "mt2060.h" #include <media/tuner.h> #include "tuner-simple.h" #include <linux/unaligned.h> /* debug */ static int dvb_usb_m920x_debug; module_param_named(debug,dvb_usb_m920x_debug, int, 0644); MODULE_PARM_DESC(debug, "set debugging level (1=rc (or-able))." DVB_USB_DEBUG_STATUS); DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr); static int m920x_set_filter(struct dvb_usb_device *d, int type, int idx, int pid); static inline int m920x_read(struct usb_device *udev, u8 request, u16 value, u16 index, void *data, int size) { int ret; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), request, USB_TYPE_VENDOR | USB_DIR_IN, value, index, data, size, 2000); if (ret < 0) { printk(KERN_INFO "m920x_read = error: %d\n", ret); return ret; } if (ret != size) { deb("m920x_read = no data\n"); return -EIO; } return 0; } static inline int m920x_write(struct usb_device *udev, u8 request, u16 value, u16 index) { return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), request, USB_TYPE_VENDOR | USB_DIR_OUT, value, index, NULL, 0, 2000); } static inline int m920x_write_seq(struct usb_device *udev, u8 request, struct m920x_inits *seq) { int ret; do { ret = m920x_write(udev, request, seq->data, seq->address); if (ret != 0) return ret; seq++; } while (seq->address); return 0; } static int m920x_init(struct dvb_usb_device *d, struct m920x_inits *rc_seq) { int ret, i, epi, flags = 0; int adap_enabled[M9206_MAX_ADAPTERS] = { 0 }; /* Remote controller init. */ if (d->props.rc.legacy.rc_query || d->props.rc.core.rc_query) { deb("Initialising remote control\n"); ret = m920x_write_seq(d->udev, M9206_CORE, rc_seq); if (ret != 0) { deb("Initialising remote control failed\n"); return ret; } deb("Initialising remote control success\n"); } for (i = 0; i < d->props.num_adapters; i++) flags |= d->adapter[i].props.fe[0].caps; /* Some devices(Dposh) might crash if we attempt touch at all. */ if (flags & DVB_USB_ADAP_HAS_PID_FILTER) { for (i = 0; i < d->props.num_adapters; i++) { epi = d->adapter[i].props.fe[0].stream.endpoint - 0x81; if (epi < 0 || epi >= M9206_MAX_ADAPTERS) { printk(KERN_INFO "m920x: Unexpected adapter endpoint!\n"); return -EINVAL; } adap_enabled[epi] = 1; } for (i = 0; i < M9206_MAX_ADAPTERS; i++) { if (adap_enabled[i]) continue; if ((ret = m920x_set_filter(d, 0x81 + i, 0, 0x0)) != 0) return ret; if ((ret = m920x_set_filter(d, 0x81 + i, 0, 0x02f5)) != 0) return ret; } } return 0; } static int m920x_init_ep(struct usb_interface *intf) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_host_interface *alt; if ((alt = usb_altnum_to_altsetting(intf, 1)) == NULL) { deb("No alt found!\n"); return -ENODEV; } return usb_set_interface(udev, alt->desc.bInterfaceNumber, alt->desc.bAlternateSetting); } static inline void m920x_parse_rc_state(struct dvb_usb_device *d, u8 rc_state, int *state) { struct m920x_state *m = d->priv; switch (rc_state) { case 0x80: *state = REMOTE_NO_KEY_PRESSED; break; case 0x88: /* framing error or "invalid code" */ case 0x99: case 0xc0: case 0xd8: *state = REMOTE_NO_KEY_PRESSED; m->rep_count = 0; break; case 0x93: case 0x92: case 0x83: /* pinnacle PCTV310e */ case 0x82: m->rep_count = 0; *state = REMOTE_KEY_PRESSED; break; case 0x91: case 0x81: /* pinnacle PCTV310e */ /* prevent immediate auto-repeat */ if (++m->rep_count > 2) *state = REMOTE_KEY_REPEAT; else *state = REMOTE_NO_KEY_PRESSED; break; default: deb("Unexpected rc state %02x\n", rc_state); *state = REMOTE_NO_KEY_PRESSED; break; } } static int m920x_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { int i, ret = 0; u8 *rc_state; rc_state = kmalloc(2, GFP_KERNEL); if (!rc_state) return -ENOMEM; ret = m920x_read(d->udev, M9206_CORE, 0x0, M9206_RC_STATE, rc_state, 1); if (ret != 0) goto out; ret = m920x_read(d->udev, M9206_CORE, 0x0, M9206_RC_KEY, rc_state + 1, 1); if (ret != 0) goto out; m920x_parse_rc_state(d, rc_state[0], state); for (i = 0; i < d->props.rc.legacy.rc_map_size; i++) if (rc5_data(&d->props.rc.legacy.rc_map_table[i]) == rc_state[1]) { *event = d->props.rc.legacy.rc_map_table[i].keycode; goto out; } if (rc_state[1] != 0) deb("Unknown rc key %02x\n", rc_state[1]); *state = REMOTE_NO_KEY_PRESSED; out: kfree(rc_state); return ret; } static int m920x_rc_core_query(struct dvb_usb_device *d) { int ret = 0; u8 *rc_state; int state; rc_state = kmalloc(2, GFP_KERNEL); if (!rc_state) return -ENOMEM; if ((ret = m920x_read(d->udev, M9206_CORE, 0x0, M9206_RC_STATE, &rc_state[0], 1)) != 0) goto out; if ((ret = m920x_read(d->udev, M9206_CORE, 0x0, M9206_RC_KEY, &rc_state[1], 1)) != 0) goto out; deb("state=0x%02x keycode=0x%02x\n", rc_state[0], rc_state[1]); m920x_parse_rc_state(d, rc_state[0], &state); if (state == REMOTE_NO_KEY_PRESSED) rc_keyup(d->rc_dev); else if (state == REMOTE_KEY_REPEAT) rc_repeat(d->rc_dev); else rc_keydown(d->rc_dev, RC_PROTO_UNKNOWN, rc_state[1], 0); out: kfree(rc_state); return ret; } /* I2C */ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int num) { struct dvb_usb_device *d = i2c_get_adapdata(adap); int i, j; int ret = 0; if (mutex_lock_interruptible(&d->i2c_mutex) < 0) return -EAGAIN; for (i = 0; i < num; i++) { if (msg[i].flags & (I2C_M_NO_RD_ACK | I2C_M_IGNORE_NAK | I2C_M_TEN) || msg[i].len == 0) { /* For a 0 byte message, I think sending the address * to index 0x80|0x40 would be the correct thing to * do. However, zero byte messages are only used for * probing, and since we don't know how to get the * slave's ack, we can't probe. */ ret = -ENOTSUPP; goto unlock; } /* Send START & address/RW bit */ if (!(msg[i].flags & I2C_M_NOSTART)) { if ((ret = m920x_write(d->udev, M9206_I2C, (msg[i].addr << 1) | (msg[i].flags & I2C_M_RD ? 0x01 : 0), 0x80)) != 0) goto unlock; /* Should check for ack here, if we knew how. */ } if (msg[i].flags & I2C_M_RD) { char *read = kmalloc(1, GFP_KERNEL); if (!read) { ret = -ENOMEM; goto unlock; } for (j = 0; j < msg[i].len; j++) { /* Last byte of transaction? * Send STOP, otherwise send ACK. */ int stop = (i+1 == num && j+1 == msg[i].len) ? 0x40 : 0x01; if ((ret = m920x_read(d->udev, M9206_I2C, 0x0, 0x20 | stop, read, 1)) != 0) { kfree(read); goto unlock; } msg[i].buf[j] = read[0]; } kfree(read); } else { for (j = 0; j < msg[i].len; j++) { /* Last byte of transaction? Then send STOP. */ int stop = (i+1 == num && j+1 == msg[i].len) ? 0x40 : 0x00; if ((ret = m920x_write(d->udev, M9206_I2C, msg[i].buf[j], stop)) != 0) goto unlock; /* Should check for ack here too. */ } } } ret = num; unlock: mutex_unlock(&d->i2c_mutex); return ret; } static u32 m920x_i2c_func(struct i2c_adapter *adapter) { return I2C_FUNC_I2C; } static struct i2c_algorithm m920x_i2c_algo = { .master_xfer = m920x_i2c_xfer, .functionality = m920x_i2c_func, }; /* pid filter */ static int m920x_set_filter(struct dvb_usb_device *d, int type, int idx, int pid) { int ret = 0; if (pid >= 0x8000) return -EINVAL; pid |= 0x8000; if ((ret = m920x_write(d->udev, M9206_FILTER, pid, (type << 8) | (idx * 4) )) != 0) return ret; if ((ret = m920x_write(d->udev, M9206_FILTER, 0, (type << 8) | (idx * 4) )) != 0) return ret; return ret; } static int m920x_update_filters(struct dvb_usb_adapter *adap) { struct m920x_state *m = adap->dev->priv; int enabled = m->filtering_enabled[adap->id]; int i, ret = 0, filter = 0; int ep = adap->props.fe[0].stream.endpoint; for (i = 0; i < M9206_MAX_FILTERS; i++) if (m->filters[adap->id][i] == 8192) enabled = 0; /* Disable all filters */ if ((ret = m920x_set_filter(adap->dev, ep, 1, enabled)) != 0) return ret; for (i = 0; i < M9206_MAX_FILTERS; i++) if ((ret = m920x_set_filter(adap->dev, ep, i + 2, 0)) != 0) return ret; /* Set */ if (enabled) { for (i = 0; i < M9206_MAX_FILTERS; i++) { if (m->filters[adap->id][i] == 0) continue; if ((ret = m920x_set_filter(adap->dev, ep, filter + 2, m->filters[adap->id][i])) != 0) return ret; filter++; } } return ret; } static int m920x_pid_filter_ctrl(struct dvb_usb_adapter *adap, int onoff) { struct m920x_state *m = adap->dev->priv; m->filtering_enabled[adap->id] = onoff ? 1 : 0; return m920x_update_filters(adap); } static int m920x_pid_filter(struct dvb_usb_adapter *adap, int index, u16 pid, int onoff) { struct m920x_state *m = adap->dev->priv; m->filters[adap->id][index] = onoff ? pid : 0; return m920x_update_filters(adap); } static int m920x_firmware_download(struct usb_device *udev, const struct firmware *fw) { u16 value, index, size; u8 *read, *buff; int i, pass, ret = 0; buff = kmalloc(65536, GFP_KERNEL); if (buff == NULL) return -ENOMEM; read = kmalloc(4, GFP_KERNEL); if (!read) { kfree(buff); return -ENOMEM; } if ((ret = m920x_read(udev, M9206_FILTER, 0x0, 0x8000, read, 4)) != 0) goto done; deb("%*ph\n", 4, read); if ((ret = m920x_read(udev, M9206_FW, 0x0, 0x0, read, 1)) != 0) goto done; deb("%x\n", read[0]); for (pass = 0; pass < 2; pass++) { for (i = 0; i + (sizeof(u16) * 3) < fw->size;) { value = get_unaligned_le16(fw->data + i); i += sizeof(u16); index = get_unaligned_le16(fw->data + i); i += sizeof(u16); size = get_unaligned_le16(fw->data + i); i += sizeof(u16); if (pass == 1) { /* Will stall if using fw->data ... */ memcpy(buff, fw->data + i, size); ret = usb_control_msg(udev, usb_sndctrlpipe(udev,0), M9206_FW, USB_TYPE_VENDOR | USB_DIR_OUT, value, index, buff, size, 20); if (ret != size) { deb("error while uploading fw!\n"); ret = -EIO; goto done; } msleep(3); } i += size; } if (i != fw->size) { deb("bad firmware file!\n"); ret = -EINVAL; goto done; } } msleep(36); /* m920x will disconnect itself from the bus after this. */ (void) m920x_write(udev, M9206_CORE, 0x01, M9206_FW_GO); deb("firmware uploaded!\n"); done: kfree(read); kfree(buff); return ret; } /* Callbacks for DVB USB */ static int m920x_identify_state(struct usb_device *udev, const struct dvb_usb_device_properties *props, const struct dvb_usb_device_description **desc, int *cold) { struct usb_host_interface *alt; alt = usb_altnum_to_altsetting(usb_ifnum_to_if(udev, 0), 1); *cold = (alt == NULL) ? 1 : 0; return 0; } /* demod configurations */ static int m920x_mt352_demod_init(struct dvb_frontend *fe) { int ret; static const u8 config[] = { CONFIG, 0x3d }; static const u8 clock[] = { CLOCK_CTL, 0x30 }; static const u8 reset[] = { RESET, 0x80 }; static const u8 adc_ctl[] = { ADC_CTL_1, 0x40 }; static const u8 agc[] = { AGC_TARGET, 0x1c, 0x20 }; static const u8 sec_agc[] = { 0x69, 0x00, 0xff, 0xff, 0x40, 0xff, 0x00, 0x40, 0x40 }; static const u8 unk1[] = { 0x93, 0x1a }; static const u8 unk2[] = { 0xb5, 0x7a }; deb("Demod init!\n"); if ((ret = mt352_write(fe, config, ARRAY_SIZE(config))) != 0) return ret; if ((ret = mt352_write(fe, clock, ARRAY_SIZE(clock))) != 0) return ret; if ((ret = mt352_write(fe, reset, ARRAY_SIZE(reset))) != 0) return ret; if ((ret = mt352_write(fe, adc_ctl, ARRAY_SIZE(adc_ctl))) != 0) return ret; if ((ret = mt352_write(fe, agc, ARRAY_SIZE(agc))) != 0) return ret; if ((ret = mt352_write(fe, sec_agc, ARRAY_SIZE(sec_agc))) != 0) return ret; if ((ret = mt352_write(fe, unk1, ARRAY_SIZE(unk1))) != 0) return ret; if ((ret = mt352_write(fe, unk2, ARRAY_SIZE(unk2))) != 0) return ret; return 0; } static struct mt352_config m920x_mt352_config = { .demod_address = 0x0f, .no_tuner = 1, .demod_init = m920x_mt352_demod_init, }; static struct tda1004x_config m920x_tda10046_08_config = { .demod_address = 0x08, .invert = 0, .invert_oclk = 0, .ts_mode = TDA10046_TS_SERIAL, .xtal_freq = TDA10046_XTAL_16M, .if_freq = TDA10046_FREQ_045, .agc_config = TDA10046_AGC_TDA827X, .gpio_config = TDA10046_GPTRI, .request_firmware = NULL, }; static struct tda1004x_config m920x_tda10046_0b_config = { .demod_address = 0x0b, .invert = 0, .invert_oclk = 0, .ts_mode = TDA10046_TS_SERIAL, .xtal_freq = TDA10046_XTAL_16M, .if_freq = TDA10046_FREQ_045, .agc_config = TDA10046_AGC_TDA827X, .gpio_config = TDA10046_GPTRI, .request_firmware = NULL, /* uses firmware EEPROM */ }; /* tuner configurations */ static struct qt1010_config m920x_qt1010_config = { .i2c_address = 0x62 }; static struct mt2060_config m920x_mt2060_config = { .i2c_address = 0x60, /* 0xc0 */ .clock_out = 0, }; /* Callbacks for DVB USB */ static int m920x_mt352_frontend_attach(struct dvb_usb_adapter *adap) { deb("%s\n",__func__); adap->fe_adap[0].fe = dvb_attach(mt352_attach, &m920x_mt352_config, &adap->dev->i2c_adap); if ((adap->fe_adap[0].fe) == NULL) return -EIO; return 0; } static int m920x_mt352_frontend_attach_vp7049(struct dvb_usb_adapter *adap) { struct m920x_inits vp7049_fe_init_seq[] = { /* XXX without these commands the frontend cannot be detected, * they must be sent BEFORE the frontend is attached */ { 0xff28, 0x00 }, { 0xff23, 0x00 }, { 0xff28, 0x00 }, { 0xff23, 0x00 }, { 0xff21, 0x20 }, { 0xff21, 0x60 }, { 0xff28, 0x00 }, { 0xff22, 0x00 }, { 0xff20, 0x30 }, { 0xff20, 0x20 }, { 0xff20, 0x30 }, { } /* terminating entry */ }; int ret; deb("%s\n", __func__); ret = m920x_write_seq(adap->dev->udev, M9206_CORE, vp7049_fe_init_seq); if (ret != 0) { deb("Initialization of vp7049 frontend failed."); return ret; } return m920x_mt352_frontend_attach(adap); } static int m920x_tda10046_08_frontend_attach(struct dvb_usb_adapter *adap) { deb("%s\n",__func__); adap->fe_adap[0].fe = dvb_attach(tda10046_attach, &m920x_tda10046_08_config, &adap->dev->i2c_adap); if ((adap->fe_adap[0].fe) == NULL) return -EIO; return 0; } static int m920x_tda10046_0b_frontend_attach(struct dvb_usb_adapter *adap) { deb("%s\n",__func__); adap->fe_adap[0].fe = dvb_attach(tda10046_attach, &m920x_tda10046_0b_config, &adap->dev->i2c_adap); if ((adap->fe_adap[0].fe) == NULL) return -EIO; return 0; } static int m920x_qt1010_tuner_attach(struct dvb_usb_adapter *adap) { deb("%s\n",__func__); if (dvb_attach(qt1010_attach, adap->fe_adap[0].fe, &adap->dev->i2c_adap, &m920x_qt1010_config) == NULL) return -ENODEV; return 0; } static int m920x_tda8275_60_tuner_attach(struct dvb_usb_adapter *adap) { deb("%s\n",__func__); if (dvb_attach(tda827x_attach, adap->fe_adap[0].fe, 0x60, &adap->dev->i2c_adap, NULL) == NULL) return -ENODEV; return 0; } static int m920x_tda8275_61_tuner_attach(struct dvb_usb_adapter *adap) { deb("%s\n",__func__); if (dvb_attach(tda827x_attach, adap->fe_adap[0].fe, 0x61, &adap->dev->i2c_adap, NULL) == NULL) return -ENODEV; return 0; } static int m920x_fmd1216me_tuner_attach(struct dvb_usb_adapter *adap) { dvb_attach(simple_tuner_attach, adap->fe_adap[0].fe, &adap->dev->i2c_adap, 0x61, TUNER_PHILIPS_FMD1216ME_MK3); return 0; } static int m920x_mt2060_tuner_attach(struct dvb_usb_adapter *adap) { deb("%s\n", __func__); if (dvb_attach(mt2060_attach, adap->fe_adap[0].fe, &adap->dev->i2c_adap, &m920x_mt2060_config, 1220) == NULL) return -ENODEV; return 0; } /* device-specific initialization */ static struct m920x_inits megasky_rc_init [] = { { M9206_RC_INIT2, 0xa8 }, { M9206_RC_INIT1, 0x51 }, { } /* terminating entry */ }; static struct m920x_inits tvwalkertwin_rc_init [] = { { M9206_RC_INIT2, 0x00 }, { M9206_RC_INIT1, 0xef }, { 0xff28, 0x00 }, { 0xff23, 0x00 }, { 0xff21, 0x30 }, { } /* terminating entry */ }; static struct m920x_inits pinnacle310e_init[] = { /* without these the tuner doesn't work */ { 0xff20, 0x9b }, { 0xff22, 0x70 }, /* rc settings */ { 0xff50, 0x80 }, { M9206_RC_INIT1, 0x00 }, { M9206_RC_INIT2, 0xff }, { } /* terminating entry */ }; static struct m920x_inits vp7049_rc_init[] = { { 0xff28, 0x00 }, { 0xff23, 0x00 }, { 0xff21, 0x70 }, { M9206_RC_INIT2, 0x00 }, { M9206_RC_INIT1, 0xff }, { } /* terminating entry */ }; /* ir keymaps */ static struct rc_map_table rc_map_megasky_table[] = { { 0x0012, KEY_POWER }, { 0x001e, KEY_CYCLEWINDOWS }, /* min/max */ { 0x0002, KEY_CHANNELUP }, { 0x0005, KEY_CHANNELDOWN }, { 0x0003, KEY_VOLUMEUP }, { 0x0006, KEY_VOLUMEDOWN }, { 0x0004, KEY_MUTE }, { 0x0007, KEY_OK }, /* TS */ { 0x0008, KEY_STOP }, { 0x0009, KEY_MENU }, /* swap */ { 0x000a, KEY_REWIND }, { 0x001b, KEY_PAUSE }, { 0x001f, KEY_FASTFORWARD }, { 0x000c, KEY_RECORD }, { 0x000d, KEY_CAMERA }, /* screenshot */ { 0x000e, KEY_COFFEE }, /* "MTS" */ }; static struct rc_map_table rc_map_tvwalkertwin_table[] = { { 0x0001, KEY_ZOOM }, /* Full Screen */ { 0x0002, KEY_CAMERA }, /* snapshot */ { 0x0003, KEY_MUTE }, { 0x0004, KEY_REWIND }, { 0x0005, KEY_PLAYPAUSE }, /* Play/Pause */ { 0x0006, KEY_FASTFORWARD }, { 0x0007, KEY_RECORD }, { 0x0008, KEY_STOP }, { 0x0009, KEY_TIME }, /* Timeshift */ { 0x000c, KEY_COFFEE }, /* Recall */ { 0x000e, KEY_CHANNELUP }, { 0x0012, KEY_POWER }, { 0x0015, KEY_MENU }, /* source */ { 0x0018, KEY_CYCLEWINDOWS }, /* TWIN PIP */ { 0x001a, KEY_CHANNELDOWN }, { 0x001b, KEY_VOLUMEDOWN }, { 0x001e, KEY_VOLUMEUP }, }; static struct rc_map_table rc_map_pinnacle310e_table[] = { { 0x16, KEY_POWER }, { 0x17, KEY_FAVORITES }, { 0x0f, KEY_TEXT }, { 0x48, KEY_PROGRAM }, /* preview */ { 0x1c, KEY_EPG }, { 0x04, KEY_LIST }, /* record list */ { 0x03, KEY_1 }, { 0x01, KEY_2 }, { 0x06, KEY_3 }, { 0x09, KEY_4 }, { 0x1d, KEY_5 }, { 0x1f, KEY_6 }, { 0x0d, KEY_7 }, { 0x19, KEY_8 }, { 0x1b, KEY_9 }, { 0x15, KEY_0 }, { 0x0c, KEY_CANCEL }, { 0x4a, KEY_CLEAR }, { 0x13, KEY_BACK }, { 0x00, KEY_TAB }, { 0x4b, KEY_UP }, { 0x4e, KEY_LEFT }, { 0x52, KEY_RIGHT }, { 0x51, KEY_DOWN }, { 0x4f, KEY_ENTER }, /* could also be KEY_OK */ { 0x1e, KEY_VOLUMEUP }, { 0x0a, KEY_VOLUMEDOWN }, { 0x05, KEY_CHANNELUP }, { 0x02, KEY_CHANNELDOWN }, { 0x11, KEY_RECORD }, { 0x14, KEY_PLAY }, { 0x4c, KEY_PAUSE }, { 0x1a, KEY_STOP }, { 0x40, KEY_REWIND }, { 0x12, KEY_FASTFORWARD }, { 0x41, KEY_PREVIOUSSONG }, /* Replay */ { 0x42, KEY_NEXTSONG }, /* Skip */ { 0x54, KEY_CAMERA }, /* Capture */ /* { 0x50, KEY_SAP }, */ /* Sap */ { 0x47, KEY_CYCLEWINDOWS }, /* Pip */ { 0x4d, KEY_SCREEN }, /* FullScreen */ { 0x08, KEY_SUBTITLE }, { 0x0e, KEY_MUTE }, /* { 0x49, KEY_LR }, */ /* L/R */ { 0x07, KEY_SLEEP }, /* Hibernate */ { 0x08, KEY_VIDEO }, /* A/V */ { 0x0e, KEY_MENU }, /* Recall */ { 0x45, KEY_ZOOMIN }, { 0x46, KEY_ZOOMOUT }, { 0x18, KEY_RED }, /* Red */ { 0x53, KEY_GREEN }, /* Green */ { 0x5e, KEY_YELLOW }, /* Yellow */ { 0x5f, KEY_BLUE }, /* Blue */ }; /* DVB USB Driver stuff */ static struct dvb_usb_device_properties megasky_properties; static struct dvb_usb_device_properties digivox_mini_ii_properties; static struct dvb_usb_device_properties tvwalkertwin_properties; static struct dvb_usb_device_properties dposh_properties; static struct dvb_usb_device_properties pinnacle_pctv310e_properties; static struct dvb_usb_device_properties vp7049_properties; static int m920x_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct dvb_usb_device *d = NULL; int ret; struct m920x_inits *rc_init_seq = NULL; int bInterfaceNumber = intf->cur_altsetting->desc.bInterfaceNumber; deb("Probing for m920x device at interface %d\n", bInterfaceNumber); if (bInterfaceNumber == 0) { /* Single-tuner device, or first interface on * multi-tuner device */ ret = dvb_usb_device_init(intf, &megasky_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { rc_init_seq = megasky_rc_init; goto found; } ret = dvb_usb_device_init(intf, &digivox_mini_ii_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { /* No remote control, so no rc_init_seq */ goto found; } /* This configures both tuners on the TV Walker Twin */ ret = dvb_usb_device_init(intf, &tvwalkertwin_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { rc_init_seq = tvwalkertwin_rc_init; goto found; } ret = dvb_usb_device_init(intf, &dposh_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { /* Remote controller not supported yet. */ goto found; } ret = dvb_usb_device_init(intf, &pinnacle_pctv310e_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { rc_init_seq = pinnacle310e_init; goto found; } ret = dvb_usb_device_init(intf, &vp7049_properties, THIS_MODULE, &d, adapter_nr); if (ret == 0) { rc_init_seq = vp7049_rc_init; goto found; } return ret; } else { /* Another interface on a multi-tuner device */ /* The LifeView TV Walker Twin gets here, but struct * tvwalkertwin_properties already configured both * tuners, so there is nothing for us to do here */ } found: if ((ret = m920x_init_ep(intf)) < 0) return ret; if (d && (ret = m920x_init(d, rc_init_seq)) != 0) return ret; return ret; } enum { MSI_MEGASKY580, ANUBIS_MSI_DIGI_VOX_MINI_II, ANUBIS_LIFEVIEW_TV_WALKER_TWIN_COLD, ANUBIS_LIFEVIEW_TV_WALKER_TWIN_WARM, DPOSH_M9206_COLD, DPOSH_M9206_WARM, VISIONPLUS_PINNACLE_PCTV310E, AZUREWAVE_TWINHAN_VP7049, }; static struct usb_device_id m920x_table[] = { DVB_USB_DEV(MSI, MSI_MEGASKY580), DVB_USB_DEV(ANUBIS_ELECTRONIC, ANUBIS_MSI_DIGI_VOX_MINI_II), DVB_USB_DEV(ANUBIS_ELECTRONIC, ANUBIS_LIFEVIEW_TV_WALKER_TWIN_COLD), DVB_USB_DEV(ANUBIS_ELECTRONIC, ANUBIS_LIFEVIEW_TV_WALKER_TWIN_WARM), DVB_USB_DEV(DPOSH, DPOSH_M9206_COLD), DVB_USB_DEV(DPOSH, DPOSH_M9206_WARM), DVB_USB_DEV(VISIONPLUS, VISIONPLUS_PINNACLE_PCTV310E), DVB_USB_DEV(AZUREWAVE, AZUREWAVE_TWINHAN_VP7049), { } }; MODULE_DEVICE_TABLE (usb, m920x_table); static struct dvb_usb_device_properties megasky_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, .firmware = "dvb-usb-megasky-02.fw", .download_firmware = m920x_firmware_download, .rc.legacy = { .rc_interval = 100, .rc_map_table = rc_map_megasky_table, .rc_map_size = ARRAY_SIZE(rc_map_megasky_table), .rc_query = m920x_rc_query, }, .size_of_priv = sizeof(struct m920x_state), .identify_state = m920x_identify_state, .num_adapters = 1, .adapter = {{ .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 8, .pid_filter = m920x_pid_filter, .pid_filter_ctrl = m920x_pid_filter_ctrl, .frontend_attach = m920x_mt352_frontend_attach, .tuner_attach = m920x_qt1010_tuner_attach, .stream = { .type = USB_BULK, .count = 8, .endpoint = 0x81, .u = { .bulk = { .buffersize = 512, } } }, }}, }}, .i2c_algo = &m920x_i2c_algo, .num_device_descs = 1, .devices = { { "MSI Mega Sky 580 DVB-T USB2.0", { &m920x_table[MSI_MEGASKY580], NULL }, { NULL }, } } }; static struct dvb_usb_device_properties digivox_mini_ii_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, .firmware = "dvb-usb-digivox-02.fw", .download_firmware = m920x_firmware_download, .size_of_priv = sizeof(struct m920x_state), .identify_state = m920x_identify_state, .num_adapters = 1, .adapter = {{ .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 8, .pid_filter = m920x_pid_filter, .pid_filter_ctrl = m920x_pid_filter_ctrl, .frontend_attach = m920x_tda10046_08_frontend_attach, .tuner_attach = m920x_tda8275_60_tuner_attach, .stream = { .type = USB_BULK, .count = 8, .endpoint = 0x81, .u = { .bulk = { .buffersize = 0x4000, } } }, }}, }}, .i2c_algo = &m920x_i2c_algo, .num_device_descs = 1, .devices = { { "MSI DIGI VOX mini II DVB-T USB2.0", { &m920x_table[ANUBIS_MSI_DIGI_VOX_MINI_II], NULL }, { NULL }, }, } }; /* LifeView TV Walker Twin support by Nick Andrew <nick@nick-andrew.net> * * LifeView TV Walker Twin has 1 x M9206, 2 x TDA10046, 2 x TDA8275A * TDA10046 #0 is located at i2c address 0x08 * TDA10046 #1 is located at i2c address 0x0b * TDA8275A #0 is located at i2c address 0x60 * TDA8275A #1 is located at i2c address 0x61 */ static struct dvb_usb_device_properties tvwalkertwin_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, .firmware = "dvb-usb-tvwalkert.fw", .download_firmware = m920x_firmware_download, .rc.legacy = { .rc_interval = 100, .rc_map_table = rc_map_tvwalkertwin_table, .rc_map_size = ARRAY_SIZE(rc_map_tvwalkertwin_table), .rc_query = m920x_rc_query, }, .size_of_priv = sizeof(struct m920x_state), .identify_state = m920x_identify_state, .num_adapters = 2, .adapter = {{ .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 8, .pid_filter = m920x_pid_filter, .pid_filter_ctrl = m920x_pid_filter_ctrl, .frontend_attach = m920x_tda10046_08_frontend_attach, .tuner_attach = m920x_tda8275_60_tuner_attach, .stream = { .type = USB_BULK, .count = 8, .endpoint = 0x81, .u = { .bulk = { .buffersize = 512, } } }}, }},{ .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 8, .pid_filter = m920x_pid_filter, .pid_filter_ctrl = m920x_pid_filter_ctrl, .frontend_attach = m920x_tda10046_0b_frontend_attach, .tuner_attach = m920x_tda8275_61_tuner_attach, .stream = { .type = USB_BULK, .count = 8, .endpoint = 0x82, .u = { .bulk = { .buffersize = 512, } } }}, }, }}, .i2c_algo = &m920x_i2c_algo, .num_device_descs = 1, .devices = { { .name = "LifeView TV Walker Twin DVB-T USB2.0", .cold_ids = { &m920x_table[ANUBIS_LIFEVIEW_TV_WALKER_TWIN_COLD], NULL }, .warm_ids = { &m920x_table[ANUBIS_LIFEVIEW_TV_WALKER_TWIN_WARM], NULL }, }, } }; static struct dvb_usb_device_properties dposh_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, .firmware = "dvb-usb-dposh-01.fw", .download_firmware = m920x_firmware_download, .size_of_priv = sizeof(struct m920x_state), .identify_state = m920x_identify_state, .num_adapters = 1, .adapter = {{ .num_frontends = 1, .fe = {{ /* Hardware pid filters don't work with this device/firmware */ .frontend_attach = m920x_mt352_frontend_attach, .tuner_attach = m920x_qt1010_tuner_attach, .stream = { .type = USB_BULK, .count = 8, .endpoint = 0x81, .u = { .bulk = { .buffersize = 512, } } }, }}, }}, .i2c_algo = &m920x_i2c_algo, .num_device_descs = 1, .devices = { { .name = "Dposh DVB-T USB2.0", .cold_ids = { &m920x_table[DPOSH_M9206_COLD], NULL }, .warm_ids = { &m920x_table[DPOSH_M9206_WARM], NULL }, }, } }; static struct dvb_usb_device_properties pinnacle_pctv310e_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, .download_firmware = NULL, .rc.legacy = { .rc_interval = 100, .rc_map_table = rc_map_pinnacle310e_table, .rc_map_size = ARRAY_SIZE(rc_map_pinnacle310e_table), .rc_query = m920x_rc_query, }, .size_of_priv = sizeof(struct m920x_state), .identify_state = m920x_identify_state, .num_adapters = 1, .adapter = {{ .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 8, .pid_filter = m920x_pid_filter, .pid_filter_ctrl = m920x_pid_filter_ctrl, .frontend_attach = m920x_mt352_frontend_attach, .tuner_attach = m920x_fmd1216me_tuner_attach, .stream = { .type = USB_ISOC, .count = 5, .endpoint = 0x84, .u = { .isoc = { .framesperurb = 128, .framesize = 564, .interval = 1, } } }, }}, } }, .i2c_algo = &m920x_i2c_algo, .num_device_descs = 1, .devices = { { "Pinnacle PCTV 310e", { &m920x_table[VISIONPLUS_PINNACLE_PCTV310E], NULL }, { NULL }, } } }; static struct dvb_usb_device_properties vp7049_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, .firmware = "dvb-usb-vp7049-0.95.fw", .download_firmware = m920x_firmware_download, .rc.core = { .rc_interval = 150, .rc_codes = RC_MAP_TWINHAN_VP1027_DVBS, .rc_query = m920x_rc_core_query, .allowed_protos = RC_PROTO_BIT_UNKNOWN, }, .size_of_priv = sizeof(struct m920x_state), .identify_state = m920x_identify_state, .num_adapters = 1, .adapter = {{ .num_frontends = 1, .fe = {{ .caps = DVB_USB_ADAP_HAS_PID_FILTER | DVB_USB_ADAP_PID_FILTER_CAN_BE_TURNED_OFF, .pid_filter_count = 8, .pid_filter = m920x_pid_filter, .pid_filter_ctrl = m920x_pid_filter_ctrl, .frontend_attach = m920x_mt352_frontend_attach_vp7049, .tuner_attach = m920x_mt2060_tuner_attach, .stream = { .type = USB_BULK, .count = 8, .endpoint = 0x81, .u = { .bulk = { .buffersize = 512, } } }, } }, } }, .i2c_algo = &m920x_i2c_algo, .num_device_descs = 1, .devices = { { "DTV-DVB UDTT7049", { &m920x_table[AZUREWAVE_TWINHAN_VP7049], NULL }, { NULL }, } } }; static struct usb_driver m920x_driver = { .name = "dvb_usb_m920x", .probe = m920x_probe, .disconnect = dvb_usb_device_exit, .id_table = m920x_table, }; module_usb_driver(m920x_driver); MODULE_AUTHOR("Aapo Tahkola <aet@rasterburn.org>"); MODULE_DESCRIPTION("DVB Driver for ULI M920x"); MODULE_VERSION("0.1"); MODULE_LICENSE("GPL");
11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 10 11 11 11 11 11 11 12 12 11 11 11 11 11 11 11 11 11 11 11 18 19 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 // SPDX-License-Identifier: GPL-2.0 #include "bcachefs.h" #include "bbpos.h" #include "bkey_buf.h" #include "btree_cache.h" #include "btree_io.h" #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" #include "errcode.h" #include "error.h" #include "journal.h" #include "trace.h" #include <linux/prefetch.h> #include <linux/sched/mm.h> #include <linux/swap.h> #define BTREE_CACHE_NOT_FREED_INCREMENT(counter) \ do { \ if (shrinker_counter) \ bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_##counter]++; \ } while (0) const char * const bch2_btree_node_flags[] = { #define x(f) #f, BTREE_FLAGS() #undef x NULL }; void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned reserve = 16; if (!c->btree_roots_known[0].b) reserve += 8; for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->b) reserve += min_t(unsigned, 1, r->b->c.level) * 8; } c->btree_cache.nr_reserve = reserve; } static inline size_t btree_cache_can_free(struct btree_cache_list *list) { struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); size_t can_free = list->nr; if (!list->idx) can_free = max_t(ssize_t, 0, can_free - bc->nr_reserve); return can_free; } static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); if (b->c.lock.readers) list_add(&b->list, &bc->freed_pcpu); else list_add(&b->list, &bc->freed_nonpcpu); } static void __bch2_btree_node_to_freelist(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(!b->data); bc->nr_freeable++; list_add(&b->list, &bc->freeable); } void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; mutex_lock(&bc->lock); __bch2_btree_node_to_freelist(bc, b); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } static void __btree_node_data_free(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); /* * This should really be done in slub/vmalloc, but we're using the * kmalloc_large() path, so we're working around a slub bug by doing * this here: */ if (b->data) mm_account_reclaimed_pages(btree_buf_bytes(b) / PAGE_SIZE); if (b->aux_data) mm_account_reclaimed_pages(btree_aux_data_bytes(b) / PAGE_SIZE); EBUG_ON(btree_node_write_in_flight(b)); clear_btree_node_just_written(b); kvfree(b->data); b->data = NULL; #ifdef __KERNEL__ kvfree(b->aux_data); #else munmap(b->aux_data, btree_aux_data_bytes(b)); #endif b->aux_data = NULL; btree_node_to_freedlist(bc, b); } static void btree_node_data_free(struct btree_cache *bc, struct btree *b) { BUG_ON(list_empty(&b->list)); list_del_init(&b->list); --bc->nr_freeable; __btree_node_data_free(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, const void *obj) { const struct btree *b = obj; const u64 *v = arg->key; return b->hash_val == *v ? 0 : 1; } static const struct rhashtable_params bch_btree_cache_params = { .head_offset = offsetof(struct btree, hash), .key_offset = offsetof(struct btree, hash_val), .key_len = sizeof(u64), .obj_cmpfn = bch2_btree_cache_cmp_fn, .automatic_shrinking = true, }; static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp) { BUG_ON(b->data || b->aux_data); gfp |= __GFP_ACCOUNT|__GFP_RECLAIMABLE; b->data = kvmalloc(btree_buf_bytes(b), gfp); if (!b->data) return -BCH_ERR_ENOMEM_btree_node_mem_alloc; #ifdef __KERNEL__ b->aux_data = kvmalloc(btree_aux_data_bytes(b), gfp); #else b->aux_data = mmap(NULL, btree_aux_data_bytes(b), PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); if (b->aux_data == MAP_FAILED) b->aux_data = NULL; #endif if (!b->aux_data) { kvfree(b->data); b->data = NULL; return -BCH_ERR_ENOMEM_btree_node_mem_alloc; } return 0; } static struct btree *__btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) { struct btree *b; b = kzalloc(sizeof(struct btree), gfp); if (!b) return NULL; bkey_btree_ptr_init(&b->key); INIT_LIST_HEAD(&b->list); INIT_LIST_HEAD(&b->write_blocked); b->byte_order = ilog2(c->opts.btree_node_size); return b; } struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) return NULL; if (btree_node_data_alloc(c, b, GFP_KERNEL)) { kfree(b); return NULL; } bch2_btree_lock_init(&b->c, 0); __bch2_btree_node_to_freelist(bc, b); return b; } static inline bool __btree_node_pinned(struct btree_cache *bc, struct btree *b) { struct bbpos pos = BBPOS(b->c.btree_id, b->key.k.p); u64 mask = bc->pinned_nodes_mask[!!b->c.level]; return ((mask & BIT_ULL(b->c.btree_id)) && bbpos_cmp(bc->pinned_nodes_start, pos) < 0 && bbpos_cmp(bc->pinned_nodes_end, pos) >= 0); } void bch2_node_pin(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; mutex_lock(&bc->lock); BUG_ON(!__btree_node_pinned(bc, b)); if (b != btree_node_root(c, b) && !btree_node_pinned(b)) { set_btree_node_pinned(b); list_move(&b->list, &bc->live[1].list); bc->live[0].nr--; bc->live[1].nr++; } mutex_unlock(&bc->lock); } void bch2_btree_cache_unpin(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b, *n; mutex_lock(&bc->lock); c->btree_cache.pinned_nodes_mask[0] = 0; c->btree_cache.pinned_nodes_mask[1] = 0; list_for_each_entry_safe(b, n, &bc->live[1].list, list) { clear_btree_node_pinned(b); list_move(&b->list, &bc->live[0].list); bc->live[0].nr++; bc->live[1].nr--; } mutex_unlock(&bc->lock); } /* Btree in memory cache - hash table */ void __bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { lockdep_assert_held(&bc->lock); int ret = rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); BUG_ON(ret); /* Cause future lookups for this node to fail: */ b->hash_val = 0; if (b->c.btree_id < BTREE_ID_NR) --bc->nr_by_btree[b->c.btree_id]; --bc->live[btree_node_pinned(b)].nr; list_del_init(&b->list); } void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) { __bch2_btree_node_hash_remove(bc, b); __bch2_btree_node_to_freelist(bc, b); } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) { BUG_ON(!list_empty(&b->list)); BUG_ON(b->hash_val); b->hash_val = btree_ptr_hash_val(&b->key); int ret = rhashtable_lookup_insert_fast(&bc->table, &b->hash, bch_btree_cache_params); if (ret) return ret; if (b->c.btree_id < BTREE_ID_NR) bc->nr_by_btree[b->c.btree_id]++; bool p = __btree_node_pinned(bc, b); mod_bit(BTREE_NODE_pinned, &b->flags, p); list_add_tail(&b->list, &bc->live[p].list); bc->live[p].nr++; return 0; } int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, unsigned level, enum btree_id id) { b->c.level = level; b->c.btree_id = id; mutex_lock(&bc->lock); int ret = __bch2_btree_node_hash_insert(bc, b); mutex_unlock(&bc->lock); return ret; } void bch2_btree_node_update_key_early(struct btree_trans *trans, enum btree_id btree, unsigned level, struct bkey_s_c old, struct bkey_i *new) { struct bch_fs *c = trans->c; struct btree *b; struct bkey_buf tmp; int ret; bch2_bkey_buf_init(&tmp); bch2_bkey_buf_reassemble(&tmp, c, old); b = bch2_btree_node_get_noiter(trans, tmp.k, btree, level, true); if (!IS_ERR_OR_NULL(b)) { mutex_lock(&c->btree_cache.lock); bch2_btree_node_hash_remove(&c->btree_cache, b); bkey_copy(&b->key, new); ret = __bch2_btree_node_hash_insert(&c->btree_cache, b); BUG_ON(ret); mutex_unlock(&c->btree_cache.lock); six_unlock_read(&b->c.lock); } bch2_bkey_buf_exit(&tmp, c); } __flatten static inline struct btree *btree_cache_find(struct btree_cache *bc, const struct bkey_i *k) { u64 v = btree_ptr_hash_val(k); return rhashtable_lookup_fast(&bc->table, &v, bch_btree_cache_params); } /* * this version is for btree nodes that have already been freed (we're not * reaping a real btree node) */ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush, bool shrinker_counter) { struct btree_cache *bc = &c->btree_cache; int ret = 0; lockdep_assert_held(&bc->lock); wait_on_io: if (b->flags & ((1U << BTREE_NODE_dirty)| (1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) { if (btree_node_dirty(b)) BTREE_CACHE_NOT_FREED_INCREMENT(dirty); else if (btree_node_read_in_flight(b)) BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); else if (btree_node_write_in_flight(b)) BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); return -BCH_ERR_ENOMEM_btree_node_reclaim; } /* XXX: waiting on IO with btree cache lock held */ bch2_btree_node_wait_on_read(b); bch2_btree_node_wait_on_write(b); } if (!six_trylock_intent(&b->c.lock)) { BTREE_CACHE_NOT_FREED_INCREMENT(lock_intent); return -BCH_ERR_ENOMEM_btree_node_reclaim; } if (!six_trylock_write(&b->c.lock)) { BTREE_CACHE_NOT_FREED_INCREMENT(lock_write); goto out_unlock_intent; } /* recheck under lock */ if (b->flags & ((1U << BTREE_NODE_read_in_flight)| (1U << BTREE_NODE_write_in_flight))) { if (!flush) { if (btree_node_read_in_flight(b)) BTREE_CACHE_NOT_FREED_INCREMENT(read_in_flight); else if (btree_node_write_in_flight(b)) BTREE_CACHE_NOT_FREED_INCREMENT(write_in_flight); goto out_unlock; } six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } if (btree_node_noevict(b)) { BTREE_CACHE_NOT_FREED_INCREMENT(noevict); goto out_unlock; } if (btree_node_write_blocked(b)) { BTREE_CACHE_NOT_FREED_INCREMENT(write_blocked); goto out_unlock; } if (btree_node_will_make_reachable(b)) { BTREE_CACHE_NOT_FREED_INCREMENT(will_make_reachable); goto out_unlock; } if (btree_node_dirty(b)) { if (!flush) { BTREE_CACHE_NOT_FREED_INCREMENT(dirty); goto out_unlock; } /* * Using the underscore version because we don't want to compact * bsets after the write, since this node is about to be evicted * - unless btree verify mode is enabled, since it runs out of * the post write cleanup: */ if (bch2_verify_btree_ondisk) bch2_btree_node_write(c, b, SIX_LOCK_intent, BTREE_WRITE_cache_reclaim); else __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } out: if (b->hash_val && !ret) trace_and_count(c, btree_cache_reap, c, b); return ret; out_unlock: six_unlock_write(&b->c.lock); out_unlock_intent: six_unlock_intent(&b->c.lock); ret = -BCH_ERR_ENOMEM_btree_node_reclaim; goto out; } static int btree_node_reclaim(struct bch_fs *c, struct btree *b, bool shrinker_counter) { return __btree_node_reclaim(c, b, false, shrinker_counter); } static int btree_node_write_and_reclaim(struct bch_fs *c, struct btree *b) { return __btree_node_reclaim(c, b, true, false); } static unsigned long bch2_btree_cache_scan(struct shrinker *shrink, struct shrink_control *sc) { struct btree_cache_list *list = shrink->private_data; struct btree_cache *bc = container_of(list, struct btree_cache, live[list->idx]); struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache); struct btree *b, *t; unsigned long nr = sc->nr_to_scan; unsigned long can_free = 0; unsigned long freed = 0; unsigned long touched = 0; unsigned i, flags; unsigned long ret = SHRINK_STOP; bool trigger_writes = atomic_long_read(&bc->nr_dirty) + nr >= list->nr * 3 / 4; if (bch2_btree_shrinker_disabled) return SHRINK_STOP; mutex_lock(&bc->lock); flags = memalloc_nofs_save(); /* * It's _really_ critical that we don't free too many btree nodes - we * have to always leave ourselves a reserve. The reserve is how we * guarantee that allocating memory for a new btree node can always * succeed, so that inserting keys into the btree can always succeed and * IO can always make forward progress: */ can_free = btree_cache_can_free(list); nr = min_t(unsigned long, nr, can_free); i = 0; list_for_each_entry_safe(b, t, &bc->freeable, list) { /* * Leave a few nodes on the freeable list, so that a btree split * won't have to hit the system allocator: */ if (++i <= 3) continue; touched++; if (touched >= nr) goto out; if (!btree_node_reclaim(c, b, true)) { btree_node_data_free(bc, b); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); freed++; bc->nr_freed++; } } restart: list_for_each_entry_safe(b, t, &list->list, list) { touched++; if (btree_node_accessed(b)) { clear_btree_node_accessed(b); bc->not_freed[BCH_BTREE_CACHE_NOT_FREED_access_bit]++; --touched;; } else if (!btree_node_reclaim(c, b, true)) { __bch2_btree_node_hash_remove(bc, b); __btree_node_data_free(bc, b); freed++; bc->nr_freed++; six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); if (freed == nr) goto out_rotate; } else if (trigger_writes && btree_node_dirty(b) && !btree_node_will_make_reachable(b) && !btree_node_write_blocked(b) && six_trylock_read(&b->c.lock)) { list_move(&list->list, &b->list); mutex_unlock(&bc->lock); __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_read(&b->c.lock); if (touched >= nr) goto out_nounlock; mutex_lock(&bc->lock); goto restart; } if (touched >= nr) break; } out_rotate: if (&t->list != &list->list) list_move_tail(&list->list, &t->list); out: mutex_unlock(&bc->lock); out_nounlock: ret = freed; memalloc_nofs_restore(flags); trace_and_count(c, btree_cache_scan, sc->nr_to_scan, can_free, ret); return ret; } static unsigned long bch2_btree_cache_count(struct shrinker *shrink, struct shrink_control *sc) { struct btree_cache_list *list = shrink->private_data; if (bch2_btree_shrinker_disabled) return 0; return btree_cache_can_free(list); } void bch2_fs_btree_cache_exit(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b, *t; unsigned long flags; shrinker_free(bc->live[1].shrink); shrinker_free(bc->live[0].shrink); /* vfree() can allocate memory: */ flags = memalloc_nofs_save(); mutex_lock(&bc->lock); if (c->verify_data) list_move(&c->verify_data->list, &bc->live[0].list); kvfree(c->verify_ondisk); for (unsigned i = 0; i < btree_id_nr_alive(c); i++) { struct btree_root *r = bch2_btree_id_root(c, i); if (r->b) list_add(&r->b->list, &bc->live[0].list); } list_for_each_entry_safe(b, t, &bc->live[1].list, list) bch2_btree_node_hash_remove(bc, b); list_for_each_entry_safe(b, t, &bc->live[0].list, list) bch2_btree_node_hash_remove(bc, b); list_for_each_entry_safe(b, t, &bc->freeable, list) { BUG_ON(btree_node_read_in_flight(b) || btree_node_write_in_flight(b)); btree_node_data_free(bc, b); } BUG_ON(!bch2_journal_error(&c->journal) && atomic_long_read(&c->btree_cache.nr_dirty)); list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); list_for_each_entry_safe(b, t, &bc->freed_nonpcpu, list) { list_del(&b->list); six_lock_exit(&b->c.lock); kfree(b); } mutex_unlock(&bc->lock); memalloc_nofs_restore(flags); for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) BUG_ON(bc->nr_by_btree[i]); BUG_ON(bc->live[0].nr); BUG_ON(bc->live[1].nr); BUG_ON(bc->nr_freeable); if (bc->table_init_done) rhashtable_destroy(&bc->table); } int bch2_fs_btree_cache_init(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct shrinker *shrink; unsigned i; int ret = 0; ret = rhashtable_init(&bc->table, &bch_btree_cache_params); if (ret) goto err; bc->table_init_done = true; bch2_recalc_btree_reserve(c); for (i = 0; i < bc->nr_reserve; i++) if (!__bch2_btree_node_mem_alloc(c)) goto err; list_splice_init(&bc->live[0].list, &bc->freeable); mutex_init(&c->verify_lock); shrink = shrinker_alloc(0, "%s-btree_cache", c->name); if (!shrink) goto err; bc->live[0].shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; shrink->seeks = 2; shrink->private_data = &bc->live[0]; shrinker_register(shrink); shrink = shrinker_alloc(0, "%s-btree_cache-pinned", c->name); if (!shrink) goto err; bc->live[1].shrink = shrink; shrink->count_objects = bch2_btree_cache_count; shrink->scan_objects = bch2_btree_cache_scan; shrink->seeks = 8; shrink->private_data = &bc->live[1]; shrinker_register(shrink); return 0; err: return -BCH_ERR_ENOMEM_fs_btree_cache_init; } void bch2_fs_btree_cache_init_early(struct btree_cache *bc) { mutex_init(&bc->lock); for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) { bc->live[i].idx = i; INIT_LIST_HEAD(&bc->live[i].list); } INIT_LIST_HEAD(&bc->freeable); INIT_LIST_HEAD(&bc->freed_pcpu); INIT_LIST_HEAD(&bc->freed_nonpcpu); } /* * We can only have one thread cannibalizing other cached btree nodes at a time, * or we'll deadlock. We use an open coded mutex to ensure that, which a * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ void bch2_btree_cache_cannibalize_unlock(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; if (bc->alloc_lock == current) { trace_and_count(c, btree_cache_cannibalize_unlock, trans); bc->alloc_lock = NULL; closure_wake_up(&bc->alloc_wait); } } int bch2_btree_cache_cannibalize_lock(struct btree_trans *trans, struct closure *cl) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct task_struct *old; old = NULL; if (try_cmpxchg(&bc->alloc_lock, &old, current) || old == current) goto success; if (!cl) { trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return -BCH_ERR_ENOMEM_btree_cache_cannibalize_lock; } closure_wait(&bc->alloc_wait, cl); /* Try again, after adding ourselves to waitlist */ old = NULL; if (try_cmpxchg(&bc->alloc_lock, &old, current) || old == current) { /* We raced */ closure_wake_up(&bc->alloc_wait); goto success; } trace_and_count(c, btree_cache_cannibalize_lock_fail, trans); return -BCH_ERR_btree_cache_cannibalize_lock_blocked; success: trace_and_count(c, btree_cache_cannibalize_lock, trans); return 0; } static struct btree *btree_node_cannibalize(struct bch_fs *c) { struct btree_cache *bc = &c->btree_cache; struct btree *b; for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) if (!btree_node_reclaim(c, b, false)) return b; while (1) { for (unsigned i = 0; i < ARRAY_SIZE(bc->live); i++) list_for_each_entry_reverse(b, &bc->live[i].list, list) if (!btree_node_write_and_reclaim(c, b)) return b; /* * Rare case: all nodes were intent-locked. * Just busy-wait. */ WARN_ONCE(1, "btree cache cannibalize failed\n"); cond_resched(); } } struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_read_locks) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct list_head *freed = pcpu_read_locks ? &bc->freed_pcpu : &bc->freed_nonpcpu; struct btree *b, *b2; u64 start_time = local_clock(); mutex_lock(&bc->lock); /* * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ list_for_each_entry(b, freed, list) if (!btree_node_reclaim(c, b, false)) { list_del_init(&b->list); goto got_node; } b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN); if (!b) { mutex_unlock(&bc->lock); bch2_trans_unlock(trans); b = __btree_node_mem_alloc(c, GFP_KERNEL); if (!b) goto err; mutex_lock(&bc->lock); } bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0); BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); got_node: /* * btree_free() doesn't free memory; it sticks the node on the end of * the list. Check if there's any freed nodes there: */ list_for_each_entry(b2, &bc->freeable, list) if (!btree_node_reclaim(c, b2, false)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); list_del_init(&b2->list); --bc->nr_freeable; btree_node_to_freedlist(bc, b2); mutex_unlock(&bc->lock); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; } mutex_unlock(&bc->lock); if (btree_node_data_alloc(c, b, GFP_NOWAIT|__GFP_NOWARN)) { bch2_trans_unlock(trans); if (btree_node_data_alloc(c, b, GFP_KERNEL|__GFP_NOWARN)) goto err; } got_mem: BUG_ON(!list_empty(&b->list)); BUG_ON(btree_node_hashed(b)); BUG_ON(btree_node_dirty(b)); BUG_ON(btree_node_write_in_flight(b)); out: b->flags = 0; b->written = 0; b->nsets = 0; b->sib_u64s[0] = 0; b->sib_u64s[1] = 0; b->whiteout_u64s = 0; bch2_btree_keys_init(b); set_btree_node_accessed(b); bch2_time_stats_update(&c->times[BCH_TIME_btree_node_mem_alloc], start_time); int ret = bch2_trans_relock(trans); if (unlikely(ret)) { bch2_btree_node_to_freelist(c, b); return ERR_PTR(ret); } return b; err: mutex_lock(&bc->lock); /* Try to cannibalize another cached btree node: */ if (bc->alloc_lock == current) { b2 = btree_node_cannibalize(c); clear_btree_node_just_written(b2); __bch2_btree_node_hash_remove(bc, b2); if (b) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); } else { b = b2; } BUG_ON(!list_empty(&b->list)); mutex_unlock(&bc->lock); trace_and_count(c, btree_cache_cannibalize, trans); goto out; } mutex_unlock(&bc->lock); return ERR_PTR(-BCH_ERR_ENOMEM_btree_node_mem_alloc); } /* Slowpath, don't want it inlined into btree_iter_traverse() */ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level, enum six_lock_type lock_type, bool sync) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; if (unlikely(level >= BTREE_MAX_DEPTH)) { int ret = bch2_fs_topology_error(c, "attempting to get btree node at level %u, >= max depth %u", level, BTREE_MAX_DEPTH); return ERR_PTR(ret); } if (unlikely(!bkey_is_btree_ptr(&k->k))) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with non-btree key %s", buf.buf); printbuf_exit(&buf); return ERR_PTR(ret); } if (unlikely(k->k.u64s > BKEY_BTREE_PTR_U64s_MAX)) { struct printbuf buf = PRINTBUF; bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); int ret = bch2_fs_topology_error(c, "attempting to get btree node with too big key %s", buf.buf); printbuf_exit(&buf); return ERR_PTR(ret); } /* * Parent node must be locked, else we could read in a btree node that's * been freed: */ if (path && !bch2_btree_node_relock(trans, path, level + 1)) { trace_and_count(c, trans_restart_relock_parent_for_fill, trans, _THIS_IP_, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_relock)); } b = bch2_btree_node_mem_alloc(trans, level != 0); if (bch2_err_matches(PTR_ERR_OR_ZERO(b), ENOMEM)) { if (!path) return b; trans->memory_allocation_failure = true; trace_and_count(c, trans_restart_memory_allocation_failure, trans, _THIS_IP_, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_fill_mem_alloc_fail)); } if (IS_ERR(b)) return b; bkey_copy(&b->key, k); if (bch2_btree_node_hash_insert(bc, b, level, btree_id)) { /* raced with another fill: */ /* mark as unhashed... */ b->hash_val = 0; mutex_lock(&bc->lock); __bch2_btree_node_to_freelist(bc, b); mutex_unlock(&bc->lock); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); return NULL; } set_btree_node_read_in_flight(b); six_unlock_write(&b->c.lock); if (path) { u32 seq = six_lock_seq(&b->c.lock); /* Unlock before doing IO: */ six_unlock_intent(&b->c.lock); bch2_trans_unlock_noassert(trans); bch2_btree_node_read(trans, b, sync); int ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); if (!sync) return NULL; if (!six_relock_type(&b->c.lock, lock_type, seq)) b = NULL; } else { bch2_btree_node_read(trans, b, sync); if (lock_type == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } return b; } static noinline void btree_bad_header(struct bch_fs *c, struct btree *b) { struct printbuf buf = PRINTBUF; if (c->curr_recovery_pass <= BCH_RECOVERY_PASS_check_allocations) return; prt_printf(&buf, "btree node header doesn't match ptr\n" "btree %s level %u\n" "ptr: ", bch2_btree_id_str(b->c.btree_id), b->c.level); bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key)); prt_printf(&buf, "\nheader: btree %s level %llu\n" "min ", bch2_btree_id_str(BTREE_NODE_ID(b->data)), BTREE_NODE_LEVEL(b->data)); bch2_bpos_to_text(&buf, b->data->min_key); prt_printf(&buf, "\nmax "); bch2_bpos_to_text(&buf, b->data->max_key); bch2_fs_topology_error(c, "%s", buf.buf); printbuf_exit(&buf); } static inline void btree_check_header(struct bch_fs *c, struct btree *b) { if (b->c.btree_id != BTREE_NODE_ID(b->data) || b->c.level != BTREE_NODE_LEVEL(b->data) || !bpos_eq(b->data->max_key, b->key.k.p) || (b->key.k.type == KEY_TYPE_btree_ptr_v2 && !bpos_eq(b->data->min_key, bkey_i_to_btree_ptr_v2(&b->key)->v.min_key))) btree_bad_header(c, b); } static struct btree *__bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; bool need_relock = false; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { /* * We must have the parent locked to call bch2_btree_node_fill(), * else we could read in a btree node from disk that's been * freed: */ b = bch2_btree_node_fill(trans, path, k, path->btree_id, level, lock_type, true); need_relock = true; /* We raced and found the btree node in the cache */ if (!b) goto retry; if (IS_ERR(b)) return b; } else { if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); if (bch2_btree_node_relock(trans, path, level + 1)) goto retry; trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); } if (unlikely(btree_node_read_in_flight(b))) { u32 seq = six_lock_seq(&b->c.lock); six_unlock_type(&b->c.lock, lock_type); bch2_trans_unlock(trans); need_relock = true; bch2_btree_node_wait_on_read(b); ret = bch2_trans_relock(trans); if (ret) return ERR_PTR(ret); /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: */ if (!six_relock_type(&b->c.lock, lock_type, seq)) goto retry; } if (unlikely(need_relock)) { ret = bch2_trans_relock(trans) ?: bch2_btree_path_relock_intent(trans, path); if (ret) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(ret); } } prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(-BCH_ERR_btree_node_read_error); } EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); return b; } /** * bch2_btree_node_get - find a btree node in the cache and lock it, reading it * in from disk if necessary. * * @trans: btree transaction object * @path: btree_path being traversed * @k: pointer to btree node (generally KEY_TYPE_btree_ptr_v2) * @level: level of btree node being looked up (0 == leaf node) * @lock_type: SIX_LOCK_read or SIX_LOCK_intent * @trace_ip: ip of caller of btree iterator code (i.e. caller of bch2_btree_iter_peek()) * * The btree node will have either a read or a write lock held, depending on * the @write parameter. * * Returns: btree node or ERR_PTR() */ struct btree *bch2_btree_node_get(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, unsigned level, enum six_lock_type lock_type, unsigned long trace_ip) { struct bch_fs *c = trans->c; struct btree *b; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); b = btree_node_mem_ptr(k); /* * Check b->hash_val _before_ calling btree_node_lock() - this might not * be the node we want anymore, and trying to lock the wrong node could * cause an unneccessary transaction restart: */ if (unlikely(!c->opts.btree_node_mem_ptr_optimization || !b || b->hash_val != btree_ptr_hash_val(k))) return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); if (btree_node_read_locked(path, level + 1)) btree_node_unlock(trans, path, level + 1); ret = btree_node_lock(trans, path, &b->c, level, lock_type, trace_ip); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.level != level || race_fault())) { six_unlock_type(&b->c.lock, lock_type); if (bch2_btree_node_relock(trans, path, level + 1)) return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); trace_and_count(c, trans_restart_btree_node_reused, trans, trace_ip, path); return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_lock_node_reused)); } if (unlikely(btree_node_read_in_flight(b))) { six_unlock_type(&b->c.lock, lock_type); return __bch2_btree_node_get(trans, path, k, level, lock_type, trace_ip); } prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); if (unlikely(btree_node_read_error(b))) { six_unlock_type(&b->c.lock, lock_type); return ERR_PTR(-BCH_ERR_btree_node_read_error); } EBUG_ON(b->c.btree_id != path->btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); return b; } struct btree *bch2_btree_node_get_noiter(struct btree_trans *trans, const struct bkey_i *k, enum btree_id btree_id, unsigned level, bool nofill) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; int ret; EBUG_ON(level >= BTREE_MAX_DEPTH); if (c->opts.btree_node_mem_ptr_optimization) { b = btree_node_mem_ptr(k); if (b) goto lock_node; } retry: b = btree_cache_find(bc, k); if (unlikely(!b)) { if (nofill) goto out; b = bch2_btree_node_fill(trans, NULL, k, btree_id, level, SIX_LOCK_read, true); /* We raced and found the btree node in the cache */ if (!b) goto retry; if (IS_ERR(b) && !bch2_btree_cache_cannibalize_lock(trans, NULL)) goto retry; if (IS_ERR(b)) goto out; } else { lock_node: ret = btree_node_lock_nopath(trans, &b->c, SIX_LOCK_read, _THIS_IP_); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ERR_PTR(ret); BUG_ON(ret); if (unlikely(b->hash_val != btree_ptr_hash_val(k) || b->c.btree_id != btree_id || b->c.level != level)) { six_unlock_read(&b->c.lock); goto retry; } } /* XXX: waiting on IO with btree locks held: */ __bch2_btree_node_wait_on_read(b); prefetch(b->aux_data); for_each_bset(b, t) { void *p = (u64 *) b->aux_data + t->aux_data_offset; prefetch(p + L1_CACHE_BYTES * 0); prefetch(p + L1_CACHE_BYTES * 1); prefetch(p + L1_CACHE_BYTES * 2); } /* avoid atomic set bit if it's not needed: */ if (!btree_node_accessed(b)) set_btree_node_accessed(b); if (unlikely(btree_node_read_error(b))) { six_unlock_read(&b->c.lock); b = ERR_PTR(-BCH_ERR_btree_node_read_error); goto out; } EBUG_ON(b->c.btree_id != btree_id); EBUG_ON(BTREE_NODE_LEVEL(b->data) != level); btree_check_header(c, b); out: bch2_btree_cache_cannibalize_unlock(trans); return b; } int bch2_btree_node_prefetch(struct btree_trans *trans, struct btree_path *path, const struct bkey_i *k, enum btree_id btree_id, unsigned level) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; BUG_ON(path && !btree_node_locked(path, level + 1)); BUG_ON(level >= BTREE_MAX_DEPTH); struct btree *b = btree_cache_find(bc, k); if (b) return 0; b = bch2_btree_node_fill(trans, path, k, btree_id, level, SIX_LOCK_read, false); int ret = PTR_ERR_OR_ZERO(b); if (ret) return ret; if (b) six_unlock_read(&b->c.lock); return 0; } void bch2_btree_node_evict(struct btree_trans *trans, const struct bkey_i *k) { struct bch_fs *c = trans->c; struct btree_cache *bc = &c->btree_cache; struct btree *b; b = btree_cache_find(bc, k); if (!b) return; BUG_ON(b == btree_node_root(trans->c, b)); wait_on_io: /* not allowed to wait on io with btree locks held: */ /* XXX we're called from btree_gc which will be holding other btree * nodes locked */ __bch2_btree_node_wait_on_read(b); __bch2_btree_node_wait_on_write(b); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_intent); btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_write); if (unlikely(b->hash_val != btree_ptr_hash_val(k))) goto out; if (btree_node_dirty(b)) { __bch2_btree_node_write(c, b, BTREE_WRITE_cache_reclaim); six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); goto wait_on_io; } BUG_ON(btree_node_dirty(b)); mutex_lock(&bc->lock); bch2_btree_node_hash_remove(bc, b); btree_node_data_free(bc, b); mutex_unlock(&bc->lock); out: six_unlock_write(&b->c.lock); six_unlock_intent(&b->c.lock); } const char *bch2_btree_id_str(enum btree_id btree) { return btree < BTREE_ID_NR ? __bch2_btree_ids[btree] : "(unknown)"; } void bch2_btree_id_to_text(struct printbuf *out, enum btree_id btree) { if (btree < BTREE_ID_NR) prt_str(out, __bch2_btree_ids[btree]); else prt_printf(out, "(unknown btree %u)", btree); } void bch2_btree_pos_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { prt_printf(out, "%s level %u/%u\n ", bch2_btree_id_str(b->c.btree_id), b->c.level, bch2_btree_id_root(c, b->c.btree_id)->level); bch2_bkey_val_to_text(out, c, bkey_i_to_s_c(&b->key)); } void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, const struct btree *b) { struct bset_stats stats; memset(&stats, 0, sizeof(stats)); bch2_btree_keys_stats(b, &stats); prt_printf(out, "l %u ", b->c.level); bch2_bpos_to_text(out, b->data->min_key); prt_printf(out, " - "); bch2_bpos_to_text(out, b->data->max_key); prt_printf(out, ":\n" " ptrs: "); bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); prt_newline(out); prt_printf(out, " format: "); bch2_bkey_format_to_text(out, &b->format); prt_printf(out, " unpack fn len: %u\n" " bytes used %zu/%zu (%zu%% full)\n" " sib u64s: %u, %u (merge threshold %u)\n" " nr packed keys %u\n" " nr unpacked keys %u\n" " floats %zu\n" " failed unpacked %zu\n", b->unpack_fn_len, b->nr.live_u64s * sizeof(u64), btree_buf_bytes(b) - sizeof(struct btree_node), b->nr.live_u64s * 100 / btree_max_u64s(c), b->sib_u64s[0], b->sib_u64s[1], c->btree_foreground_merge_threshold, b->nr.packed_keys, b->nr.unpacked_keys, stats.floats, stats.failed); } static void prt_btree_cache_line(struct printbuf *out, const struct bch_fs *c, const char *label, size_t nr) { prt_printf(out, "%s\t", label); prt_human_readable_u64(out, nr * c->opts.btree_node_size); prt_printf(out, " (%zu)\n", nr); } static const char * const bch2_btree_cache_not_freed_reasons_strs[] = { #define x(n) #n, BCH_BTREE_CACHE_NOT_FREED_REASONS() #undef x NULL }; void bch2_btree_cache_to_text(struct printbuf *out, const struct btree_cache *bc) { struct bch_fs *c = container_of(bc, struct bch_fs, btree_cache); if (!out->nr_tabstops) printbuf_tabstop_push(out, 32); prt_btree_cache_line(out, c, "live:", bc->live[0].nr); prt_btree_cache_line(out, c, "pinned:", bc->live[1].nr); prt_btree_cache_line(out, c, "freeable:", bc->nr_freeable); prt_btree_cache_line(out, c, "dirty:", atomic_long_read(&bc->nr_dirty)); prt_printf(out, "cannibalize lock:\t%p\n", bc->alloc_lock); prt_newline(out); for (unsigned i = 0; i < ARRAY_SIZE(bc->nr_by_btree); i++) prt_btree_cache_line(out, c, bch2_btree_id_str(i), bc->nr_by_btree[i]); prt_newline(out); prt_printf(out, "freed:\t%zu\n", bc->nr_freed); prt_printf(out, "not freed:\n"); for (unsigned i = 0; i < ARRAY_SIZE(bc->not_freed); i++) prt_printf(out, " %s\t%llu\n", bch2_btree_cache_not_freed_reasons_strs[i], bc->not_freed[i]); }
43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 // SPDX-License-Identifier: 0BSD /* * Branch/Call/Jump (BCJ) filter decoders * * Authors: Lasse Collin <lasse.collin@tukaani.org> * Igor Pavlov <https://7-zip.org/> */ #include "xz_private.h" /* * The rest of the file is inside this ifdef. It makes things a little more * convenient when building without support for any BCJ filters. */ #ifdef XZ_DEC_BCJ struct xz_dec_bcj { /* Type of the BCJ filter being used */ enum { BCJ_X86 = 4, /* x86 or x86-64 */ BCJ_POWERPC = 5, /* Big endian only */ BCJ_IA64 = 6, /* Big or little endian */ BCJ_ARM = 7, /* Little endian only */ BCJ_ARMTHUMB = 8, /* Little endian only */ BCJ_SPARC = 9, /* Big or little endian */ BCJ_ARM64 = 10, /* AArch64 */ BCJ_RISCV = 11 /* RV32GQC_Zfh, RV64GQC_Zfh */ } type; /* * Return value of the next filter in the chain. We need to preserve * this information across calls, because we must not call the next * filter anymore once it has returned XZ_STREAM_END. */ enum xz_ret ret; /* True if we are operating in single-call mode. */ bool single_call; /* * Absolute position relative to the beginning of the uncompressed * data (in a single .xz Block). We care only about the lowest 32 * bits so this doesn't need to be uint64_t even with big files. */ uint32_t pos; /* x86 filter state */ uint32_t x86_prev_mask; /* Temporary space to hold the variables from struct xz_buf */ uint8_t *out; size_t out_pos; size_t out_size; struct { /* Amount of already filtered data in the beginning of buf */ size_t filtered; /* Total amount of data currently stored in buf */ size_t size; /* * Buffer to hold a mix of filtered and unfiltered data. This * needs to be big enough to hold Alignment + 2 * Look-ahead: * * Type Alignment Look-ahead * x86 1 4 * PowerPC 4 0 * IA-64 16 0 * ARM 4 0 * ARM-Thumb 2 2 * SPARC 4 0 */ uint8_t buf[16]; } temp; }; #ifdef XZ_DEC_X86 /* * This is used to test the most significant byte of a memory address * in an x86 instruction. */ static inline int bcj_x86_test_msbyte(uint8_t b) { return b == 0x00 || b == 0xFF; } static size_t bcj_x86(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { static const bool mask_to_allowed_status[8] = { true, true, true, false, true, false, false, false }; static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; size_t i; size_t prev_pos = (size_t)-1; uint32_t prev_mask = s->x86_prev_mask; uint32_t src; uint32_t dest; uint32_t j; uint8_t b; if (size <= 4) return 0; size -= 4; for (i = 0; i < size; ++i) { if ((buf[i] & 0xFE) != 0xE8) continue; prev_pos = i - prev_pos; if (prev_pos > 3) { prev_mask = 0; } else { prev_mask = (prev_mask << (prev_pos - 1)) & 7; if (prev_mask != 0) { b = buf[i + 4 - mask_to_bit_num[prev_mask]]; if (!mask_to_allowed_status[prev_mask] || bcj_x86_test_msbyte(b)) { prev_pos = i; prev_mask = (prev_mask << 1) | 1; continue; } } } prev_pos = i; if (bcj_x86_test_msbyte(buf[i + 4])) { src = get_unaligned_le32(buf + i + 1); while (true) { dest = src - (s->pos + (uint32_t)i + 5); if (prev_mask == 0) break; j = mask_to_bit_num[prev_mask] * 8; b = (uint8_t)(dest >> (24 - j)); if (!bcj_x86_test_msbyte(b)) break; src = dest ^ (((uint32_t)1 << (32 - j)) - 1); } dest &= 0x01FFFFFF; dest |= (uint32_t)0 - (dest & 0x01000000); put_unaligned_le32(dest, buf + i + 1); i += 4; } else { prev_mask = (prev_mask << 1) | 1; } } prev_pos = i - prev_pos; s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); return i; } #endif #ifdef XZ_DEC_POWERPC static size_t bcj_powerpc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { size_t i; uint32_t instr; size &= ~(size_t)3; for (i = 0; i < size; i += 4) { instr = get_unaligned_be32(buf + i); if ((instr & 0xFC000003) == 0x48000001) { instr &= 0x03FFFFFC; instr -= s->pos + (uint32_t)i; instr &= 0x03FFFFFC; instr |= 0x48000001; put_unaligned_be32(instr, buf + i); } } return i; } #endif #ifdef XZ_DEC_IA64 static size_t bcj_ia64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { static const uint8_t branch_table[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 6, 6, 0, 0, 7, 7, 4, 4, 0, 0, 4, 4, 0, 0 }; /* * The local variables take a little bit stack space, but it's less * than what LZMA2 decoder takes, so it doesn't make sense to reduce * stack usage here without doing that for the LZMA2 decoder too. */ /* Loop counters */ size_t i; size_t j; /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ uint32_t slot; /* Bitwise offset of the instruction indicated by slot */ uint32_t bit_pos; /* bit_pos split into byte and bit parts */ uint32_t byte_pos; uint32_t bit_res; /* Address part of an instruction */ uint32_t addr; /* Mask used to detect which instructions to convert */ uint32_t mask; /* 41-bit instruction stored somewhere in the lowest 48 bits */ uint64_t instr; /* Instruction normalized with bit_res for easier manipulation */ uint64_t norm; size &= ~(size_t)15; for (i = 0; i < size; i += 16) { mask = branch_table[buf[i] & 0x1F]; for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { if (((mask >> slot) & 1) == 0) continue; byte_pos = bit_pos >> 3; bit_res = bit_pos & 7; instr = 0; for (j = 0; j < 6; ++j) instr |= (uint64_t)(buf[i + j + byte_pos]) << (8 * j); norm = instr >> bit_res; if (((norm >> 37) & 0x0F) == 0x05 && ((norm >> 9) & 0x07) == 0) { addr = (norm >> 13) & 0x0FFFFF; addr |= ((uint32_t)(norm >> 36) & 1) << 20; addr <<= 4; addr -= s->pos + (uint32_t)i; addr >>= 4; norm &= ~((uint64_t)0x8FFFFF << 13); norm |= (uint64_t)(addr & 0x0FFFFF) << 13; norm |= (uint64_t)(addr & 0x100000) << (36 - 20); instr &= (1 << bit_res) - 1; instr |= norm << bit_res; for (j = 0; j < 6; j++) buf[i + j + byte_pos] = (uint8_t)(instr >> (8 * j)); } } } return i; } #endif #ifdef XZ_DEC_ARM static size_t bcj_arm(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { size_t i; uint32_t addr; size &= ~(size_t)3; for (i = 0; i < size; i += 4) { if (buf[i + 3] == 0xEB) { addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) | ((uint32_t)buf[i + 2] << 16); addr <<= 2; addr -= s->pos + (uint32_t)i + 8; addr >>= 2; buf[i] = (uint8_t)addr; buf[i + 1] = (uint8_t)(addr >> 8); buf[i + 2] = (uint8_t)(addr >> 16); } } return i; } #endif #ifdef XZ_DEC_ARMTHUMB static size_t bcj_armthumb(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { size_t i; uint32_t addr; if (size < 4) return 0; size -= 4; for (i = 0; i <= size; i += 2) { if ((buf[i + 1] & 0xF8) == 0xF0 && (buf[i + 3] & 0xF8) == 0xF8) { addr = (((uint32_t)buf[i + 1] & 0x07) << 19) | ((uint32_t)buf[i] << 11) | (((uint32_t)buf[i + 3] & 0x07) << 8) | (uint32_t)buf[i + 2]; addr <<= 1; addr -= s->pos + (uint32_t)i + 4; addr >>= 1; buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); buf[i] = (uint8_t)(addr >> 11); buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); buf[i + 2] = (uint8_t)addr; i += 2; } } return i; } #endif #ifdef XZ_DEC_SPARC static size_t bcj_sparc(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { size_t i; uint32_t instr; size &= ~(size_t)3; for (i = 0; i < size; i += 4) { instr = get_unaligned_be32(buf + i); if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { instr <<= 2; instr -= s->pos + (uint32_t)i; instr >>= 2; instr = ((uint32_t)0x40000000 - (instr & 0x400000)) | 0x40000000 | (instr & 0x3FFFFF); put_unaligned_be32(instr, buf + i); } } return i; } #endif #ifdef XZ_DEC_ARM64 static size_t bcj_arm64(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { size_t i; uint32_t instr; uint32_t addr; size &= ~(size_t)3; for (i = 0; i < size; i += 4) { instr = get_unaligned_le32(buf + i); if ((instr >> 26) == 0x25) { /* BL instruction */ addr = instr - ((s->pos + (uint32_t)i) >> 2); instr = 0x94000000 | (addr & 0x03FFFFFF); put_unaligned_le32(instr, buf + i); } else if ((instr & 0x9F000000) == 0x90000000) { /* ADRP instruction */ addr = ((instr >> 29) & 3) | ((instr >> 3) & 0x1FFFFC); /* Only convert values in the range +/-512 MiB. */ if ((addr + 0x020000) & 0x1C0000) continue; addr -= (s->pos + (uint32_t)i) >> 12; instr &= 0x9000001F; instr |= (addr & 3) << 29; instr |= (addr & 0x03FFFC) << 3; instr |= (0U - (addr & 0x020000)) & 0xE00000; put_unaligned_le32(instr, buf + i); } } return i; } #endif #ifdef XZ_DEC_RISCV static size_t bcj_riscv(struct xz_dec_bcj *s, uint8_t *buf, size_t size) { size_t i; uint32_t b1; uint32_t b2; uint32_t b3; uint32_t instr; uint32_t instr2; uint32_t instr2_rs1; uint32_t addr; if (size < 8) return 0; size -= 8; for (i = 0; i <= size; i += 2) { instr = buf[i]; if (instr == 0xEF) { /* JAL */ b1 = buf[i + 1]; if ((b1 & 0x0D) != 0) continue; b2 = buf[i + 2]; b3 = buf[i + 3]; addr = ((b1 & 0xF0) << 13) | (b2 << 9) | (b3 << 1); addr -= s->pos + (uint32_t)i; buf[i + 1] = (uint8_t)((b1 & 0x0F) | ((addr >> 8) & 0xF0)); buf[i + 2] = (uint8_t)(((addr >> 16) & 0x0F) | ((addr >> 7) & 0x10) | ((addr << 4) & 0xE0)); buf[i + 3] = (uint8_t)(((addr >> 4) & 0x7F) | ((addr >> 13) & 0x80)); i += 4 - 2; } else if ((instr & 0x7F) == 0x17) { /* AUIPC */ instr |= (uint32_t)buf[i + 1] << 8; instr |= (uint32_t)buf[i + 2] << 16; instr |= (uint32_t)buf[i + 3] << 24; if (instr & 0xE80) { /* AUIPC's rd doesn't equal x0 or x2. */ instr2 = get_unaligned_le32(buf + i + 4); if (((instr << 8) ^ (instr2 - 3)) & 0xF8003) { i += 6 - 2; continue; } addr = (instr & 0xFFFFF000) + (instr2 >> 20); instr = 0x17 | (2 << 7) | (instr2 << 12); instr2 = addr; } else { /* AUIPC's rd equals x0 or x2. */ instr2_rs1 = instr >> 27; if ((uint32_t)((instr - 0x3117) << 18) >= (instr2_rs1 & 0x1D)) { i += 4 - 2; continue; } addr = get_unaligned_be32(buf + i + 4); addr -= s->pos + (uint32_t)i; instr2 = (instr >> 12) | (addr << 20); instr = 0x17 | (instr2_rs1 << 7) | ((addr + 0x800) & 0xFFFFF000); } put_unaligned_le32(instr, buf + i); put_unaligned_le32(instr2, buf + i + 4); i += 8 - 2; } } return i; } #endif /* * Apply the selected BCJ filter. Update *pos and s->pos to match the amount * of data that got filtered. * * NOTE: This is implemented as a switch statement to avoid using function * pointers, which could be problematic in the kernel boot code, which must * avoid pointers to static data (at least on x86). */ static void bcj_apply(struct xz_dec_bcj *s, uint8_t *buf, size_t *pos, size_t size) { size_t filtered; buf += *pos; size -= *pos; switch (s->type) { #ifdef XZ_DEC_X86 case BCJ_X86: filtered = bcj_x86(s, buf, size); break; #endif #ifdef XZ_DEC_POWERPC case BCJ_POWERPC: filtered = bcj_powerpc(s, buf, size); break; #endif #ifdef XZ_DEC_IA64 case BCJ_IA64: filtered = bcj_ia64(s, buf, size); break; #endif #ifdef XZ_DEC_ARM case BCJ_ARM: filtered = bcj_arm(s, buf, size); break; #endif #ifdef XZ_DEC_ARMTHUMB case BCJ_ARMTHUMB: filtered = bcj_armthumb(s, buf, size); break; #endif #ifdef XZ_DEC_SPARC case BCJ_SPARC: filtered = bcj_sparc(s, buf, size); break; #endif #ifdef XZ_DEC_ARM64 case BCJ_ARM64: filtered = bcj_arm64(s, buf, size); break; #endif #ifdef XZ_DEC_RISCV case BCJ_RISCV: filtered = bcj_riscv(s, buf, size); break; #endif default: /* Never reached but silence compiler warnings. */ filtered = 0; break; } *pos += filtered; s->pos += filtered; } /* * Flush pending filtered data from temp to the output buffer. * Move the remaining mixture of possibly filtered and unfiltered * data to the beginning of temp. */ static void bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) { size_t copy_size; copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); memcpy(b->out + b->out_pos, s->temp.buf, copy_size); b->out_pos += copy_size; s->temp.filtered -= copy_size; s->temp.size -= copy_size; memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); } /* * The BCJ filter functions are primitive in sense that they process the * data in chunks of 1-16 bytes. To hide this issue, this function does * some buffering. */ enum xz_ret xz_dec_bcj_run(struct xz_dec_bcj *s, struct xz_dec_lzma2 *lzma2, struct xz_buf *b) { size_t out_start; /* * Flush pending already filtered data to the output buffer. Return * immediately if we couldn't flush everything, or if the next * filter in the chain had already returned XZ_STREAM_END. */ if (s->temp.filtered > 0) { bcj_flush(s, b); if (s->temp.filtered > 0) return XZ_OK; if (s->ret == XZ_STREAM_END) return XZ_STREAM_END; } /* * If we have more output space than what is currently pending in * temp, copy the unfiltered data from temp to the output buffer * and try to fill the output buffer by decoding more data from the * next filter in the chain. Apply the BCJ filter on the new data * in the output buffer. If everything cannot be filtered, copy it * to temp and rewind the output buffer position accordingly. * * This needs to be always run when temp.size == 0 to handle a special * case where the output buffer is full and the next filter has no * more output coming but hasn't returned XZ_STREAM_END yet. */ if (s->temp.size < b->out_size - b->out_pos || s->temp.size == 0) { out_start = b->out_pos; memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); b->out_pos += s->temp.size; s->ret = xz_dec_lzma2_run(lzma2, b); if (s->ret != XZ_STREAM_END && (s->ret != XZ_OK || s->single_call)) return s->ret; bcj_apply(s, b->out, &out_start, b->out_pos); /* * As an exception, if the next filter returned XZ_STREAM_END, * we can do that too, since the last few bytes that remain * unfiltered are meant to remain unfiltered. */ if (s->ret == XZ_STREAM_END) return XZ_STREAM_END; s->temp.size = b->out_pos - out_start; b->out_pos -= s->temp.size; memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); /* * If there wasn't enough input to the next filter to fill * the output buffer with unfiltered data, there's no point * to try decoding more data to temp. */ if (b->out_pos + s->temp.size < b->out_size) return XZ_OK; } /* * We have unfiltered data in temp. If the output buffer isn't full * yet, try to fill the temp buffer by decoding more data from the * next filter. Apply the BCJ filter on temp. Then we hopefully can * fill the actual output buffer by copying filtered data from temp. * A mix of filtered and unfiltered data may be left in temp; it will * be taken care on the next call to this function. */ if (b->out_pos < b->out_size) { /* Make b->out{,_pos,_size} temporarily point to s->temp. */ s->out = b->out; s->out_pos = b->out_pos; s->out_size = b->out_size; b->out = s->temp.buf; b->out_pos = s->temp.size; b->out_size = sizeof(s->temp.buf); s->ret = xz_dec_lzma2_run(lzma2, b); s->temp.size = b->out_pos; b->out = s->out; b->out_pos = s->out_pos; b->out_size = s->out_size; if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) return s->ret; bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); /* * If the next filter returned XZ_STREAM_END, we mark that * everything is filtered, since the last unfiltered bytes * of the stream are meant to be left as is. */ if (s->ret == XZ_STREAM_END) s->temp.filtered = s->temp.size; bcj_flush(s, b); if (s->temp.filtered > 0) return XZ_OK; } return s->ret; } struct xz_dec_bcj *xz_dec_bcj_create(bool single_call) { struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s != NULL) s->single_call = single_call; return s; } enum xz_ret xz_dec_bcj_reset(struct xz_dec_bcj *s, uint8_t id) { switch (id) { #ifdef XZ_DEC_X86 case BCJ_X86: #endif #ifdef XZ_DEC_POWERPC case BCJ_POWERPC: #endif #ifdef XZ_DEC_IA64 case BCJ_IA64: #endif #ifdef XZ_DEC_ARM case BCJ_ARM: #endif #ifdef XZ_DEC_ARMTHUMB case BCJ_ARMTHUMB: #endif #ifdef XZ_DEC_SPARC case BCJ_SPARC: #endif #ifdef XZ_DEC_ARM64 case BCJ_ARM64: #endif #ifdef XZ_DEC_RISCV case BCJ_RISCV: #endif break; default: /* Unsupported Filter ID */ return XZ_OPTIONS_ERROR; } s->type = id; s->ret = XZ_OK; s->pos = 0; s->x86_prev_mask = 0; s->temp.filtered = 0; s->temp.size = 0; return XZ_OK; } #endif
72 72 72 5 1 4 3 1 2 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 // SPDX-License-Identifier: GPL-2.0-or-later /* * V4L2 controls framework Request API implementation. * * Copyright (C) 2018-2021 Hans Verkuil <hverkuil-cisco@xs4all.nl> */ #define pr_fmt(fmt) "v4l2-ctrls: " fmt #include <linux/export.h> #include <linux/slab.h> #include <media/v4l2-ctrls.h> #include <media/v4l2-dev.h> #include <media/v4l2-ioctl.h> #include "v4l2-ctrls-priv.h" /* Initialize the request-related fields in a control handler */ void v4l2_ctrl_handler_init_request(struct v4l2_ctrl_handler *hdl) { INIT_LIST_HEAD(&hdl->requests); INIT_LIST_HEAD(&hdl->requests_queued); hdl->request_is_queued = false; media_request_object_init(&hdl->req_obj); } /* Free the request-related fields in a control handler */ void v4l2_ctrl_handler_free_request(struct v4l2_ctrl_handler *hdl) { struct v4l2_ctrl_handler *req, *next_req; /* * Do nothing if this isn't the main handler or the main * handler is not used in any request. * * The main handler can be identified by having a NULL ops pointer in * the request object. */ if (hdl->req_obj.ops || list_empty(&hdl->requests)) return; /* * If the main handler is freed and it is used by handler objects in * outstanding requests, then unbind and put those objects before * freeing the main handler. */ list_for_each_entry_safe(req, next_req, &hdl->requests, requests) { media_request_object_unbind(&req->req_obj); media_request_object_put(&req->req_obj); } } static int v4l2_ctrl_request_clone(struct v4l2_ctrl_handler *hdl, const struct v4l2_ctrl_handler *from) { struct v4l2_ctrl_ref *ref; int err = 0; if (WARN_ON(!hdl || hdl == from)) return -EINVAL; if (hdl->error) return hdl->error; WARN_ON(hdl->lock != &hdl->_lock); mutex_lock(from->lock); list_for_each_entry(ref, &from->ctrl_refs, node) { struct v4l2_ctrl *ctrl = ref->ctrl; struct v4l2_ctrl_ref *new_ref; /* Skip refs inherited from other devices */ if (ref->from_other_dev) continue; err = handler_new_ref(hdl, ctrl, &new_ref, false, true); if (err) break; } mutex_unlock(from->lock); return err; } static void v4l2_ctrl_request_queue(struct media_request_object *obj) { struct v4l2_ctrl_handler *hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); struct v4l2_ctrl_handler *main_hdl = obj->priv; mutex_lock(main_hdl->lock); list_add_tail(&hdl->requests_queued, &main_hdl->requests_queued); hdl->request_is_queued = true; mutex_unlock(main_hdl->lock); } static void v4l2_ctrl_request_unbind(struct media_request_object *obj) { struct v4l2_ctrl_handler *hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); struct v4l2_ctrl_handler *main_hdl = obj->priv; mutex_lock(main_hdl->lock); list_del_init(&hdl->requests); if (hdl->request_is_queued) { list_del_init(&hdl->requests_queued); hdl->request_is_queued = false; } mutex_unlock(main_hdl->lock); } static void v4l2_ctrl_request_release(struct media_request_object *obj) { struct v4l2_ctrl_handler *hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); v4l2_ctrl_handler_free(hdl); kfree(hdl); } static const struct media_request_object_ops req_ops = { .queue = v4l2_ctrl_request_queue, .unbind = v4l2_ctrl_request_unbind, .release = v4l2_ctrl_request_release, }; struct v4l2_ctrl_handler *v4l2_ctrl_request_hdl_find(struct media_request *req, struct v4l2_ctrl_handler *parent) { struct media_request_object *obj; if (WARN_ON(req->state != MEDIA_REQUEST_STATE_VALIDATING && req->state != MEDIA_REQUEST_STATE_QUEUED)) return NULL; obj = media_request_object_find(req, &req_ops, parent); if (obj) return container_of(obj, struct v4l2_ctrl_handler, req_obj); return NULL; } EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_find); struct v4l2_ctrl * v4l2_ctrl_request_hdl_ctrl_find(struct v4l2_ctrl_handler *hdl, u32 id) { struct v4l2_ctrl_ref *ref = find_ref_lock(hdl, id); return (ref && ref->p_req_valid) ? ref->ctrl : NULL; } EXPORT_SYMBOL_GPL(v4l2_ctrl_request_hdl_ctrl_find); static int v4l2_ctrl_request_bind(struct media_request *req, struct v4l2_ctrl_handler *hdl, struct v4l2_ctrl_handler *from) { int ret; ret = v4l2_ctrl_request_clone(hdl, from); if (!ret) { ret = media_request_object_bind(req, &req_ops, from, false, &hdl->req_obj); if (!ret) { mutex_lock(from->lock); list_add_tail(&hdl->requests, &from->requests); mutex_unlock(from->lock); } } return ret; } static struct media_request_object * v4l2_ctrls_find_req_obj(struct v4l2_ctrl_handler *hdl, struct media_request *req, bool set) { struct media_request_object *obj; struct v4l2_ctrl_handler *new_hdl; int ret; if (IS_ERR(req)) return ERR_CAST(req); if (set && WARN_ON(req->state != MEDIA_REQUEST_STATE_UPDATING)) return ERR_PTR(-EBUSY); obj = media_request_object_find(req, &req_ops, hdl); if (obj) return obj; /* * If there are no controls in this completed request, * then that can only happen if: * * 1) no controls were present in the queued request, and * 2) v4l2_ctrl_request_complete() could not allocate a * control handler object to store the completed state in. * * So return ENOMEM to indicate that there was an out-of-memory * error. */ if (!set) return ERR_PTR(-ENOMEM); new_hdl = kzalloc(sizeof(*new_hdl), GFP_KERNEL); if (!new_hdl) return ERR_PTR(-ENOMEM); obj = &new_hdl->req_obj; ret = v4l2_ctrl_handler_init(new_hdl, (hdl->nr_of_buckets - 1) * 8); if (!ret) ret = v4l2_ctrl_request_bind(req, new_hdl, hdl); if (ret) { v4l2_ctrl_handler_free(new_hdl); kfree(new_hdl); return ERR_PTR(ret); } media_request_object_get(obj); return obj; } int v4l2_g_ext_ctrls_request(struct v4l2_ctrl_handler *hdl, struct video_device *vdev, struct media_device *mdev, struct v4l2_ext_controls *cs) { struct media_request_object *obj = NULL; struct media_request *req = NULL; int ret; if (!mdev || cs->request_fd < 0) return -EINVAL; req = media_request_get_by_fd(mdev, cs->request_fd); if (IS_ERR(req)) return PTR_ERR(req); if (req->state != MEDIA_REQUEST_STATE_COMPLETE) { media_request_put(req); return -EACCES; } ret = media_request_lock_for_access(req); if (ret) { media_request_put(req); return ret; } obj = v4l2_ctrls_find_req_obj(hdl, req, false); if (IS_ERR(obj)) { media_request_unlock_for_access(req); media_request_put(req); return PTR_ERR(obj); } hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); ret = v4l2_g_ext_ctrls_common(hdl, cs, vdev); media_request_unlock_for_access(req); media_request_object_put(obj); media_request_put(req); return ret; } int try_set_ext_ctrls_request(struct v4l2_fh *fh, struct v4l2_ctrl_handler *hdl, struct video_device *vdev, struct media_device *mdev, struct v4l2_ext_controls *cs, bool set) { struct media_request_object *obj = NULL; struct media_request *req = NULL; int ret; if (!mdev) { dprintk(vdev, "%s: missing media device\n", video_device_node_name(vdev)); return -EINVAL; } if (cs->request_fd < 0) { dprintk(vdev, "%s: invalid request fd %d\n", video_device_node_name(vdev), cs->request_fd); return -EINVAL; } req = media_request_get_by_fd(mdev, cs->request_fd); if (IS_ERR(req)) { dprintk(vdev, "%s: cannot find request fd %d\n", video_device_node_name(vdev), cs->request_fd); return PTR_ERR(req); } ret = media_request_lock_for_update(req); if (ret) { dprintk(vdev, "%s: cannot lock request fd %d\n", video_device_node_name(vdev), cs->request_fd); media_request_put(req); return ret; } obj = v4l2_ctrls_find_req_obj(hdl, req, set); if (IS_ERR(obj)) { dprintk(vdev, "%s: cannot find request object for request fd %d\n", video_device_node_name(vdev), cs->request_fd); media_request_unlock_for_update(req); media_request_put(req); return PTR_ERR(obj); } hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); ret = try_set_ext_ctrls_common(fh, hdl, cs, vdev, set); if (ret) dprintk(vdev, "%s: try_set_ext_ctrls_common failed (%d)\n", video_device_node_name(vdev), ret); media_request_unlock_for_update(req); media_request_object_put(obj); media_request_put(req); return ret; } void v4l2_ctrl_request_complete(struct media_request *req, struct v4l2_ctrl_handler *main_hdl) { struct media_request_object *obj; struct v4l2_ctrl_handler *hdl; struct v4l2_ctrl_ref *ref; if (!req || !main_hdl) return; /* * Note that it is valid if nothing was found. It means * that this request doesn't have any controls and so just * wants to leave the controls unchanged. */ obj = media_request_object_find(req, &req_ops, main_hdl); if (!obj) { int ret; /* Create a new request so the driver can return controls */ hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return; ret = v4l2_ctrl_handler_init(hdl, (main_hdl->nr_of_buckets - 1) * 8); if (!ret) ret = v4l2_ctrl_request_bind(req, hdl, main_hdl); if (ret) { v4l2_ctrl_handler_free(hdl); kfree(hdl); return; } hdl->request_is_queued = true; obj = media_request_object_find(req, &req_ops, main_hdl); } hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); list_for_each_entry(ref, &hdl->ctrl_refs, node) { struct v4l2_ctrl *ctrl = ref->ctrl; struct v4l2_ctrl *master = ctrl->cluster[0]; unsigned int i; if (ctrl->flags & V4L2_CTRL_FLAG_VOLATILE) { v4l2_ctrl_lock(master); /* g_volatile_ctrl will update the current control values */ for (i = 0; i < master->ncontrols; i++) cur_to_new(master->cluster[i]); call_op(master, g_volatile_ctrl); new_to_req(ref); v4l2_ctrl_unlock(master); continue; } if (ref->p_req_valid) continue; /* Copy the current control value into the request */ v4l2_ctrl_lock(ctrl); cur_to_req(ref); v4l2_ctrl_unlock(ctrl); } mutex_lock(main_hdl->lock); WARN_ON(!hdl->request_is_queued); list_del_init(&hdl->requests_queued); hdl->request_is_queued = false; mutex_unlock(main_hdl->lock); media_request_object_complete(obj); media_request_object_put(obj); } EXPORT_SYMBOL(v4l2_ctrl_request_complete); int v4l2_ctrl_request_setup(struct media_request *req, struct v4l2_ctrl_handler *main_hdl) { struct media_request_object *obj; struct v4l2_ctrl_handler *hdl; struct v4l2_ctrl_ref *ref; int ret = 0; if (!req || !main_hdl) return 0; if (WARN_ON(req->state != MEDIA_REQUEST_STATE_QUEUED)) return -EBUSY; /* * Note that it is valid if nothing was found. It means * that this request doesn't have any controls and so just * wants to leave the controls unchanged. */ obj = media_request_object_find(req, &req_ops, main_hdl); if (!obj) return 0; if (obj->completed) { media_request_object_put(obj); return -EBUSY; } hdl = container_of(obj, struct v4l2_ctrl_handler, req_obj); list_for_each_entry(ref, &hdl->ctrl_refs, node) ref->req_done = false; list_for_each_entry(ref, &hdl->ctrl_refs, node) { struct v4l2_ctrl *ctrl = ref->ctrl; struct v4l2_ctrl *master = ctrl->cluster[0]; bool have_new_data = false; int i; /* * Skip if this control was already handled by a cluster. * Skip button controls and read-only controls. */ if (ref->req_done || (ctrl->flags & V4L2_CTRL_FLAG_READ_ONLY)) continue; v4l2_ctrl_lock(master); for (i = 0; i < master->ncontrols; i++) { if (master->cluster[i]) { struct v4l2_ctrl_ref *r = find_ref(hdl, master->cluster[i]->id); if (r->p_req_valid) { have_new_data = true; break; } } } if (!have_new_data) { v4l2_ctrl_unlock(master); continue; } for (i = 0; i < master->ncontrols; i++) { if (master->cluster[i]) { struct v4l2_ctrl_ref *r = find_ref(hdl, master->cluster[i]->id); ret = req_to_new(r); if (ret) { v4l2_ctrl_unlock(master); goto error; } master->cluster[i]->is_new = 1; r->req_done = true; } } /* * For volatile autoclusters that are currently in auto mode * we need to discover if it will be set to manual mode. * If so, then we have to copy the current volatile values * first since those will become the new manual values (which * may be overwritten by explicit new values from this set * of controls). */ if (master->is_auto && master->has_volatiles && !is_cur_manual(master)) { s32 new_auto_val = *master->p_new.p_s32; /* * If the new value == the manual value, then copy * the current volatile values. */ if (new_auto_val == master->manual_mode_value) update_from_auto_cluster(master); } ret = try_or_set_cluster(NULL, master, true, 0); v4l2_ctrl_unlock(master); if (ret) break; } error: media_request_object_put(obj); return ret; } EXPORT_SYMBOL(v4l2_ctrl_request_setup);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Packet RX/TX history data structures and routines for TFRC-based protocols. * * Copyright (c) 2007 The University of Aberdeen, Scotland, UK * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. * * This code has been developed by the University of Waikato WAND * research group. For further information please see https://www.wand.net.nz/ * or e-mail Ian McDonald - ian.mcdonald@jandi.co.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon * * Changes to meet Linux coding standards, to make it meet latest ccid3 draft * and to make it work as a loadable module in the DCCP stack written by * Arnaldo Carvalho de Melo <acme@conectiva.com.br>. * * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br> */ #ifndef _DCCP_PKT_HIST_ #define _DCCP_PKT_HIST_ #include <linux/list.h> #include <linux/slab.h> #include "tfrc.h" /** * tfrc_tx_hist_entry - Simple singly-linked TX history list * @next: next oldest entry (LIFO order) * @seqno: sequence number of this entry * @stamp: send time of packet with sequence number @seqno */ struct tfrc_tx_hist_entry { struct tfrc_tx_hist_entry *next; u64 seqno; ktime_t stamp; }; static inline struct tfrc_tx_hist_entry * tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno) { while (head != NULL && head->seqno != seqno) head = head->next; return head; } int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); /* Subtraction a-b modulo-16, respects circular wrap-around */ #define SUB16(a, b) (((a) + 16 - (b)) & 0xF) /* Number of packets to wait after a missing packet (RFC 4342, 6.1) */ #define TFRC_NDUPACK 3 /** * tfrc_rx_hist_entry - Store information about a single received packet * @tfrchrx_seqno: DCCP packet sequence number * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1) * @tfrchrx_ndp: the NDP count (if any) of the packet * @tfrchrx_tstamp: actual receive time of packet */ struct tfrc_rx_hist_entry { u64 tfrchrx_seqno:48, tfrchrx_ccval:4, tfrchrx_type:4; u64 tfrchrx_ndp:48; ktime_t tfrchrx_tstamp; }; /** * tfrc_rx_hist - RX history structure for TFRC-based protocols * @ring: Packet history for RTT sampling and loss detection * @loss_count: Number of entries in circular history * @loss_start: Movable index (for loss detection) * @rtt_sample_prev: Used during RTT sampling, points to candidate entry */ struct tfrc_rx_hist { struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1]; u8 loss_count:2, loss_start:2; #define rtt_sample_prev loss_start }; /** * tfrc_rx_hist_index - index to reach n-th entry after loss_start */ static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n) { return (h->loss_start + n) & TFRC_NDUPACK; } /** * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h) { return h->ring[tfrc_rx_hist_index(h, h->loss_count)]; } /** * tfrc_rx_hist_entry - return the n-th history entry after loss_start */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n) { return h->ring[tfrc_rx_hist_index(h, n)]; } /** * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected */ static inline struct tfrc_rx_hist_entry * tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h) { return h->ring[h->loss_start]; } /* indicate whether previously a packet was detected missing */ static inline bool tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h) { return h->loss_count > 0; } void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h, const struct sk_buff *skb, const u64 ndp); int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb); struct tfrc_loss_hist; int tfrc_rx_handle_loss(struct tfrc_rx_hist *h, struct tfrc_loss_hist *lh, struct sk_buff *skb, const u64 ndp, u32 (*first_li)(struct sock *sk), struct sock *sk); u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb); int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h); void tfrc_rx_hist_purge(struct tfrc_rx_hist *h); #endif /* _DCCP_PKT_HIST_ */
474 65 133 133 30 22 13 13 31 3 3 2 8 1 1 470 24 8 173 174 173 11 172 176 34 173 116 6 113 140 1 32 1 31 31 1 30 115 37 24 15 1 1 26 1 25 2 51 45 174 473 473 474 474 474 76 454 28 445 454 88 6 12 12 8 8 31 31 3 145 15 175 176 474 473 474 473 471 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 // SPDX-License-Identifier: GPL-2.0-only /* * 9P Protocol Support Code * * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> * * Base on code from Anthony Liguori <aliguori@us.ibm.com> * Copyright (C) 2008 by IBM, Corp. */ #include <linux/module.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/uaccess.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/stddef.h> #include <linux/types.h> #include <linux/uio.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include "protocol.h" #include <trace/events/9p.h> /* len[2] text[len] */ #define P9_STRLEN(s) \ (2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX)) /** * p9_msg_buf_size - Returns a buffer size sufficiently large to hold the * intended 9p message. * @c: client * @type: message type * @fmt: format template for assembling request message * (see p9pdu_vwritef) * @ap: variable arguments to be fed to passed format template * (see p9pdu_vwritef) * * Note: Even for response types (P9_R*) the format template and variable * arguments must always be for the originating request type (P9_T*). */ size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type, const char *fmt, va_list ap) { /* size[4] type[1] tag[2] */ const int hdr = 4 + 1 + 2; /* ename[s] errno[4] */ const int rerror_size = hdr + P9_ERRMAX + 4; /* ecode[4] */ const int rlerror_size = hdr + 4; const int err_size = c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size; static_assert(NAME_MAX <= 4*1024, "p9_msg_buf_size() currently assumes " "a max. allowed directory entry name length of 4k"); switch (type) { /* message types not used at all */ case P9_TERROR: case P9_TLERROR: case P9_TAUTH: case P9_RAUTH: BUG(); /* variable length & potentially large message types */ case P9_TATTACH: BUG_ON(strcmp("ddss?u", fmt)); va_arg(ap, int32_t); va_arg(ap, int32_t); { const char *uname = va_arg(ap, const char *); const char *aname = va_arg(ap, const char *); /* fid[4] afid[4] uname[s] aname[s] n_uname[4] */ return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4; } case P9_TWALK: BUG_ON(strcmp("ddT", fmt)); va_arg(ap, int32_t); va_arg(ap, int32_t); { uint i, nwname = va_arg(ap, int); size_t wname_all; const char **wnames = va_arg(ap, const char **); for (i = 0, wname_all = 0; i < nwname; ++i) { wname_all += P9_STRLEN(wnames[i]); } /* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */ return hdr + 4 + 4 + 2 + wname_all; } case P9_RWALK: BUG_ON(strcmp("ddT", fmt)); va_arg(ap, int32_t); va_arg(ap, int32_t); { uint nwname = va_arg(ap, int); /* nwqid[2] nwqid*(wqid[13]) */ return max_t(size_t, hdr + 2 + nwname * 13, err_size); } case P9_TCREATE: BUG_ON(strcmp("dsdb?s", fmt)); va_arg(ap, int32_t); { const char *name = va_arg(ap, const char *); if (c->proto_version == p9_proto_legacy) { /* fid[4] name[s] perm[4] mode[1] */ return hdr + 4 + P9_STRLEN(name) + 4 + 1; } else { va_arg(ap, int32_t); va_arg(ap, int); { const char *ext = va_arg(ap, const char *); /* fid[4] name[s] perm[4] mode[1] extension[s] */ return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext); } } } case P9_TLCREATE: BUG_ON(strcmp("dsddg", fmt)); va_arg(ap, int32_t); { const char *name = va_arg(ap, const char *); /* fid[4] name[s] flags[4] mode[4] gid[4] */ return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4; } case P9_RREAD: case P9_RREADDIR: BUG_ON(strcmp("dqd", fmt)); va_arg(ap, int32_t); va_arg(ap, int64_t); { const int32_t count = va_arg(ap, int32_t); /* count[4] data[count] */ return max_t(size_t, hdr + 4 + count, err_size); } case P9_TWRITE: BUG_ON(strcmp("dqV", fmt)); va_arg(ap, int32_t); va_arg(ap, int64_t); { const int32_t count = va_arg(ap, int32_t); /* fid[4] offset[8] count[4] data[count] */ return hdr + 4 + 8 + 4 + count; } case P9_TRENAMEAT: BUG_ON(strcmp("dsds", fmt)); va_arg(ap, int32_t); { const char *oldname, *newname; oldname = va_arg(ap, const char *); va_arg(ap, int32_t); newname = va_arg(ap, const char *); /* olddirfid[4] oldname[s] newdirfid[4] newname[s] */ return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname); } case P9_TSYMLINK: BUG_ON(strcmp("dssg", fmt)); va_arg(ap, int32_t); { const char *name = va_arg(ap, const char *); const char *symtgt = va_arg(ap, const char *); /* fid[4] name[s] symtgt[s] gid[4] */ return hdr + 4 + P9_STRLEN(name) + P9_STRLEN(symtgt) + 4; } case P9_RERROR: return rerror_size; case P9_RLERROR: return rlerror_size; /* small message types */ case P9_TWSTAT: case P9_RSTAT: case P9_RREADLINK: case P9_TXATTRWALK: case P9_TXATTRCREATE: case P9_TLINK: case P9_TMKDIR: case P9_TMKNOD: case P9_TRENAME: case P9_TUNLINKAT: case P9_TLOCK: return 8 * 1024; /* tiny message types */ default: return 4 * 1024; } } static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); void p9stat_free(struct p9_wstat *stbuf) { kfree(stbuf->name); stbuf->name = NULL; kfree(stbuf->uid); stbuf->uid = NULL; kfree(stbuf->gid); stbuf->gid = NULL; kfree(stbuf->muid); stbuf->muid = NULL; kfree(stbuf->extension); stbuf->extension = NULL; } EXPORT_SYMBOL(p9stat_free); size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) { size_t len = min(pdu->size - pdu->offset, size); memcpy(data, &pdu->sdata[pdu->offset], len); pdu->offset += len; return size - len; } static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) { size_t len = min(pdu->capacity - pdu->size, size); memcpy(&pdu->sdata[pdu->size], data, len); pdu->size += len; return size - len; } static size_t pdu_write_u(struct p9_fcall *pdu, struct iov_iter *from, size_t size) { size_t len = min(pdu->capacity - pdu->size, size); if (!copy_from_iter_full(&pdu->sdata[pdu->size], len, from)) len = 0; pdu->size += len; return size - len; } /* b - int8_t * w - int16_t * d - int32_t * q - int64_t * s - string * u - numeric uid * g - numeric gid * S - stat * Q - qid * D - data blob (int32_t size followed by void *, results are not freed) * T - array of strings (int16_t count, followed by strings) * R - array of qids (int16_t count, followed by qids) * A - stat for 9p2000.L (p9_stat_dotl) * ? - if optional = 1, continue parsing */ static int p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap) { const char *ptr; int errcode = 0; for (ptr = fmt; *ptr; ptr++) { switch (*ptr) { case 'b':{ int8_t *val = va_arg(ap, int8_t *); if (pdu_read(pdu, val, sizeof(*val))) { errcode = -EFAULT; break; } } break; case 'w':{ int16_t *val = va_arg(ap, int16_t *); __le16 le_val; if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } *val = le16_to_cpu(le_val); } break; case 'd':{ int32_t *val = va_arg(ap, int32_t *); __le32 le_val; if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } *val = le32_to_cpu(le_val); } break; case 'q':{ int64_t *val = va_arg(ap, int64_t *); __le64 le_val; if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } *val = le64_to_cpu(le_val); } break; case 's':{ char **sptr = va_arg(ap, char **); uint16_t len; errcode = p9pdu_readf(pdu, proto_version, "w", &len); if (errcode) break; *sptr = kmalloc(len + 1, GFP_NOFS); if (*sptr == NULL) { errcode = -ENOMEM; break; } if (pdu_read(pdu, *sptr, len)) { errcode = -EFAULT; kfree(*sptr); *sptr = NULL; } else (*sptr)[len] = 0; } break; case 'u': { kuid_t *uid = va_arg(ap, kuid_t *); __le32 le_val; if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } *uid = make_kuid(&init_user_ns, le32_to_cpu(le_val)); } break; case 'g': { kgid_t *gid = va_arg(ap, kgid_t *); __le32 le_val; if (pdu_read(pdu, &le_val, sizeof(le_val))) { errcode = -EFAULT; break; } *gid = make_kgid(&init_user_ns, le32_to_cpu(le_val)); } break; case 'Q':{ struct p9_qid *qid = va_arg(ap, struct p9_qid *); errcode = p9pdu_readf(pdu, proto_version, "bdq", &qid->type, &qid->version, &qid->path); } break; case 'S':{ struct p9_wstat *stbuf = va_arg(ap, struct p9_wstat *); memset(stbuf, 0, sizeof(struct p9_wstat)); stbuf->n_uid = stbuf->n_muid = INVALID_UID; stbuf->n_gid = INVALID_GID; errcode = p9pdu_readf(pdu, proto_version, "wwdQdddqssss?sugu", &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, &stbuf->mode, &stbuf->atime, &stbuf->mtime, &stbuf->length, &stbuf->name, &stbuf->uid, &stbuf->gid, &stbuf->muid, &stbuf->extension, &stbuf->n_uid, &stbuf->n_gid, &stbuf->n_muid); if (errcode) p9stat_free(stbuf); } break; case 'D':{ uint32_t *count = va_arg(ap, uint32_t *); void **data = va_arg(ap, void **); errcode = p9pdu_readf(pdu, proto_version, "d", count); if (!errcode) { *count = min_t(uint32_t, *count, pdu->size - pdu->offset); *data = &pdu->sdata[pdu->offset]; } } break; case 'T':{ uint16_t *nwname = va_arg(ap, uint16_t *); char ***wnames = va_arg(ap, char ***); *wnames = NULL; errcode = p9pdu_readf(pdu, proto_version, "w", nwname); if (!errcode) { *wnames = kmalloc_array(*nwname, sizeof(char *), GFP_NOFS); if (!*wnames) errcode = -ENOMEM; else (*wnames)[0] = NULL; } if (!errcode) { int i; for (i = 0; i < *nwname; i++) { errcode = p9pdu_readf(pdu, proto_version, "s", &(*wnames)[i]); if (errcode) { (*wnames)[i] = NULL; break; } } } if (errcode) { if (*wnames) { int i; for (i = 0; i < *nwname; i++) { if (!(*wnames)[i]) break; kfree((*wnames)[i]); } kfree(*wnames); *wnames = NULL; } } } break; case 'R':{ uint16_t *nwqid = va_arg(ap, uint16_t *); struct p9_qid **wqids = va_arg(ap, struct p9_qid **); *wqids = NULL; errcode = p9pdu_readf(pdu, proto_version, "w", nwqid); if (!errcode) { *wqids = kmalloc_array(*nwqid, sizeof(struct p9_qid), GFP_NOFS); if (*wqids == NULL) errcode = -ENOMEM; } if (!errcode) { int i; for (i = 0; i < *nwqid; i++) { errcode = p9pdu_readf(pdu, proto_version, "Q", &(*wqids)[i]); if (errcode) break; } } if (errcode) { kfree(*wqids); *wqids = NULL; } } break; case 'A': { struct p9_stat_dotl *stbuf = va_arg(ap, struct p9_stat_dotl *); memset(stbuf, 0, sizeof(struct p9_stat_dotl)); errcode = p9pdu_readf(pdu, proto_version, "qQdugqqqqqqqqqqqqqqq", &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode, &stbuf->st_uid, &stbuf->st_gid, &stbuf->st_nlink, &stbuf->st_rdev, &stbuf->st_size, &stbuf->st_blksize, &stbuf->st_blocks, &stbuf->st_atime_sec, &stbuf->st_atime_nsec, &stbuf->st_mtime_sec, &stbuf->st_mtime_nsec, &stbuf->st_ctime_sec, &stbuf->st_ctime_nsec, &stbuf->st_btime_sec, &stbuf->st_btime_nsec, &stbuf->st_gen, &stbuf->st_data_version); } break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) return 0; break; default: BUG(); break; } if (errcode) break; } return errcode; } int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_list ap) { const char *ptr; int errcode = 0; for (ptr = fmt; *ptr; ptr++) { switch (*ptr) { case 'b':{ int8_t val = va_arg(ap, int); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'w':{ __le16 val = cpu_to_le16(va_arg(ap, int)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'd':{ __le32 val = cpu_to_le32(va_arg(ap, int32_t)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'q':{ __le64 val = cpu_to_le64(va_arg(ap, int64_t)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 's':{ const char *sptr = va_arg(ap, const char *); uint16_t len = 0; if (sptr) len = min_t(size_t, strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, "w", len); if (!errcode && pdu_write(pdu, sptr, len)) errcode = -EFAULT; } break; case 'u': { kuid_t uid = va_arg(ap, kuid_t); __le32 val = cpu_to_le32( from_kuid(&init_user_ns, uid)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'g': { kgid_t gid = va_arg(ap, kgid_t); __le32 val = cpu_to_le32( from_kgid(&init_user_ns, gid)); if (pdu_write(pdu, &val, sizeof(val))) errcode = -EFAULT; } break; case 'Q':{ const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); errcode = p9pdu_writef(pdu, proto_version, "bdq", qid->type, qid->version, qid->path); } break; case 'S':{ const struct p9_wstat *stbuf = va_arg(ap, const struct p9_wstat *); errcode = p9pdu_writef(pdu, proto_version, "wwdQdddqssss?sugu", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, stbuf->mode, stbuf->atime, stbuf->mtime, stbuf->length, stbuf->name, stbuf->uid, stbuf->gid, stbuf->muid, stbuf->extension, stbuf->n_uid, stbuf->n_gid, stbuf->n_muid); } break; case 'V':{ uint32_t count = va_arg(ap, uint32_t); struct iov_iter *from = va_arg(ap, struct iov_iter *); errcode = p9pdu_writef(pdu, proto_version, "d", count); if (!errcode && pdu_write_u(pdu, from, count)) errcode = -EFAULT; } break; case 'T':{ uint16_t nwname = va_arg(ap, int); const char **wnames = va_arg(ap, const char **); errcode = p9pdu_writef(pdu, proto_version, "w", nwname); if (!errcode) { int i; for (i = 0; i < nwname; i++) { errcode = p9pdu_writef(pdu, proto_version, "s", wnames[i]); if (errcode) break; } } } break; case 'R':{ uint16_t nwqid = va_arg(ap, int); struct p9_qid *wqids = va_arg(ap, struct p9_qid *); errcode = p9pdu_writef(pdu, proto_version, "w", nwqid); if (!errcode) { int i; for (i = 0; i < nwqid; i++) { errcode = p9pdu_writef(pdu, proto_version, "Q", &wqids[i]); if (errcode) break; } } } break; case 'I':{ struct p9_iattr_dotl *p9attr = va_arg(ap, struct p9_iattr_dotl *); errcode = p9pdu_writef(pdu, proto_version, "ddugqqqqq", p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, p9attr->mtime_sec, p9attr->mtime_nsec); } break; case '?': if ((proto_version != p9_proto_2000u) && (proto_version != p9_proto_2000L)) return 0; break; default: BUG(); break; } if (errcode) break; } return errcode; } int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); ret = p9pdu_vreadf(pdu, proto_version, fmt, ap); va_end(ap); return ret; } static int p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...) { va_list ap; int ret; va_start(ap, fmt); ret = p9pdu_vwritef(pdu, proto_version, fmt, ap); va_end(ap); return ret; } int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) { struct p9_fcall fake_pdu; int ret; fake_pdu.size = len; fake_pdu.capacity = len; fake_pdu.sdata = buf; fake_pdu.offset = 0; ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st); if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); return ret; } return fake_pdu.offset; } EXPORT_SYMBOL(p9stat_read); int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) { pdu->id = type; return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); } int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu) { int size = pdu->size; int err; pdu->size = 0; err = p9pdu_writef(pdu, 0, "d", size); pdu->size = size; trace_9p_protocol_dump(clnt, pdu); p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); return err; } void p9pdu_reset(struct p9_fcall *pdu) { pdu->offset = 0; pdu->size = 0; } int p9dirent_read(struct p9_client *clnt, char *buf, int len, struct p9_dirent *dirent) { struct p9_fcall fake_pdu; int ret; char *nameptr; fake_pdu.size = len; fake_pdu.capacity = len; fake_pdu.sdata = buf; fake_pdu.offset = 0; ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, &dirent->d_off, &dirent->d_type, &nameptr); if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); return ret; } ret = strscpy(dirent->d_name, nameptr, sizeof(dirent->d_name)); if (ret < 0) { p9_debug(P9_DEBUG_ERROR, "On the wire dirent name too long: %s\n", nameptr); kfree(nameptr); return ret; } kfree(nameptr); return fake_pdu.offset; } EXPORT_SYMBOL(p9dirent_read);
2 3 345 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_RTNETLINK_H #define __LINUX_RTNETLINK_H #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/wait.h> #include <linux/refcount.h> #include <uapi/linux/rtnetlink.h> extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo); static inline int rtnetlink_maybe_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo) { return !skb ? 0 : rtnetlink_send(skb, net, pid, group, echo); } extern int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid); extern void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, const struct nlmsghdr *nlh, gfp_t flags); extern void rtnl_set_sk_err(struct net *net, u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, gfp_t flags, int *new_nsid, int new_ifindex); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, gfp_t flags, int *new_nsid, int new_ifindex, u32 portid, const struct nlmsghdr *nlh); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags, u32 portid, const struct nlmsghdr *nlh); /* RTNL is used as a global lock for all changes to network configuration */ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); extern wait_queue_head_t netdev_unregistering_wq; extern atomic_t dev_unreg_count; extern struct rw_semaphore pernet_ops_rwsem; extern struct rw_semaphore net_rwsem; #define ASSERT_RTNL() \ WARN_ONCE(!rtnl_is_locked(), \ "RTNL: assertion failed at %s (%d)\n", __FILE__, __LINE__) #ifdef CONFIG_PROVE_LOCKING extern bool lockdep_rtnl_is_held(void); #else static inline bool lockdep_rtnl_is_held(void) { return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ /** * rcu_dereference_rtnl - rcu_dereference with debug checking * @p: The pointer to read, prior to dereferencing * * Do an rcu_dereference(p), but check caller either holds rcu_read_lock() * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference() */ #define rcu_dereference_rtnl(p) \ rcu_dereference_check(p, lockdep_rtnl_is_held()) /** * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit * the READ_ONCE(), because caller holds RTNL. */ #define rtnl_dereference(p) \ rcu_dereference_protected(p, lockdep_rtnl_is_held()) /** * rcu_replace_pointer_rtnl - replace an RCU pointer under rtnl_lock, returning * its old value * @rp: RCU pointer, whose value is returned * @p: regular pointer * * Perform a replacement under rtnl_lock, where @rp is an RCU-annotated * pointer. The old value of @rp is returned, and @rp is set to @p */ #define rcu_replace_pointer_rtnl(rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_is_held()) #ifdef CONFIG_DEBUG_NET_SMALL_RTNL void __rtnl_net_lock(struct net *net); void __rtnl_net_unlock(struct net *net); void rtnl_net_lock(struct net *net); void rtnl_net_unlock(struct net *net); int rtnl_net_trylock(struct net *net); int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b); bool rtnl_net_is_locked(struct net *net); #define ASSERT_RTNL_NET(net) \ WARN_ONCE(!rtnl_net_is_locked(net), \ "RTNL_NET: assertion failed at %s (%d)\n", \ __FILE__, __LINE__) bool lockdep_rtnl_net_is_held(struct net *net); #define rcu_dereference_rtnl_net(net, p) \ rcu_dereference_check(p, lockdep_rtnl_net_is_held(net)) #define rtnl_net_dereference(net, p) \ rcu_dereference_protected(p, lockdep_rtnl_net_is_held(net)) #define rcu_replace_pointer_rtnl_net(net, rp, p) \ rcu_replace_pointer(rp, p, lockdep_rtnl_net_is_held(net)) #else static inline void __rtnl_net_lock(struct net *net) {} static inline void __rtnl_net_unlock(struct net *net) {} static inline void rtnl_net_lock(struct net *net) { rtnl_lock(); } static inline void rtnl_net_unlock(struct net *net) { rtnl_unlock(); } static inline int rtnl_net_trylock(struct net *net) { return rtnl_trylock(); } static inline void ASSERT_RTNL_NET(struct net *net) { ASSERT_RTNL(); } #define rcu_dereference_rtnl_net(net, p) \ rcu_dereference_rtnl(p) #define rtnl_net_dereference(net, p) \ rtnl_dereference(p) #define rcu_replace_pointer_rtnl_net(net, rp, p) \ rcu_replace_pointer_rtnl(rp, p) #endif static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev) { return rtnl_dereference(dev->ingress_queue); } static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev) { return rcu_dereference(dev->ingress_queue); } struct netdev_queue *dev_ingress_queue_create(struct net_device *dev); #ifdef CONFIG_NET_INGRESS void net_inc_ingress_queue(void); void net_dec_ingress_queue(void); #endif #ifdef CONFIG_NET_EGRESS void net_inc_egress_queue(void); void net_dec_egress_queue(void); void netdev_xmit_skip_txqueue(bool skip); #endif void rtnetlink_init(void); void __rtnl_unlock(void); void rtnl_kfree_skbs(struct sk_buff *head, struct sk_buff *tail); extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 flags); extern int ndo_dflt_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid); extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u16 mode, u32 flags, u32 mask, int nlflags, u32 filter_mask, int (*vlan_fill)(struct sk_buff *skb, struct net_device *dev, u32 filter_mask)); extern void rtnl_offload_xstats_notify(struct net_device *dev); static inline int rtnl_has_listeners(const struct net *net, u32 group) { struct sock *rtnl = net->rtnl; return netlink_has_listeners(rtnl, group); } /** * rtnl_notify_needed - check if notification is needed * @net: Pointer to the net namespace * @nlflags: netlink ingress message flags * @group: rtnl group * * Based on the ingress message flags and rtnl group, returns true * if a notification is needed, false otherwise. */ static inline bool rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group) { return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group); } void netdev_set_operstate(struct net_device *dev, int newstate); #endif /* __LINUX_RTNETLINK_H */
3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SECCOMP_H #define _LINUX_SECCOMP_H #include <uapi/linux/seccomp.h> #include <linux/seccomp_types.h> #define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ SECCOMP_FILTER_FLAG_LOG | \ SECCOMP_FILTER_FLAG_SPEC_ALLOW | \ SECCOMP_FILTER_FLAG_NEW_LISTENER | \ SECCOMP_FILTER_FLAG_TSYNC_ESRCH | \ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV) /* sizeof() the first published struct seccomp_notif_addfd */ #define SECCOMP_NOTIFY_ADDFD_SIZE_VER0 24 #define SECCOMP_NOTIFY_ADDFD_SIZE_LATEST SECCOMP_NOTIFY_ADDFD_SIZE_VER0 #ifdef CONFIG_SECCOMP #include <linux/thread_info.h> #include <linux/atomic.h> #include <asm/seccomp.h> #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER extern int __secure_computing(const struct seccomp_data *sd); static inline int secure_computing(void) { if (unlikely(test_syscall_work(SECCOMP))) return __secure_computing(NULL); return 0; } #else extern void secure_computing_strict(int this_syscall); static inline int __secure_computing(const struct seccomp_data *sd) { secure_computing_strict(sd->nr); return 0; } #endif extern long prctl_get_seccomp(void); extern long prctl_set_seccomp(unsigned long, void __user *); static inline int seccomp_mode(struct seccomp *s) { return s->mode; } #else /* CONFIG_SECCOMP */ #include <linux/errno.h> struct seccomp_data; #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER static inline int secure_computing(void) { return 0; } static inline int __secure_computing(const struct seccomp_data *sd) { return 0; } #else static inline void secure_computing_strict(int this_syscall) { return; } #endif static inline long prctl_get_seccomp(void) { return -EINVAL; } static inline long prctl_set_seccomp(unsigned long arg2, char __user *arg3) { return -EINVAL; } static inline int seccomp_mode(struct seccomp *s) { return SECCOMP_MODE_DISABLED; } #endif /* CONFIG_SECCOMP */ #ifdef CONFIG_SECCOMP_FILTER extern void seccomp_filter_release(struct task_struct *tsk); extern void get_seccomp_filter(struct task_struct *tsk); #else /* CONFIG_SECCOMP_FILTER */ static inline void seccomp_filter_release(struct task_struct *tsk) { return; } static inline void get_seccomp_filter(struct task_struct *tsk) { return; } #endif /* CONFIG_SECCOMP_FILTER */ #if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE) extern long seccomp_get_filter(struct task_struct *task, unsigned long filter_off, void __user *data); extern long seccomp_get_metadata(struct task_struct *task, unsigned long filter_off, void __user *data); #else static inline long seccomp_get_filter(struct task_struct *task, unsigned long n, void __user *data) { return -EINVAL; } static inline long seccomp_get_metadata(struct task_struct *task, unsigned long filter_off, void __user *data) { return -EINVAL; } #endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ #ifdef CONFIG_SECCOMP_CACHE_DEBUG struct seq_file; struct pid_namespace; struct pid; int proc_pid_seccomp_cache(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); #endif #endif /* _LINUX_SECCOMP_H */
4 1 2 1 3 3 1 2 1 1 3 1 5 1 2 17 17 2 5 17 14 21 21 21 21 21 12 18 18 8 18 16 18 23 18 3 21 14 7 4 11 11 11 1 6 11 16 2 14 7 7 7 7 11 7 7 7 6 1 1 7 7 7 7 7 7 28 1 28 28 6 1 21 22 22 22 27 24 4 24 4 23 23 21 2 3 13 7 18 7 11 18 18 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 // SPDX-License-Identifier: GPL-2.0-or-later /* * Xbox gamepad driver * * Copyright (c) 2002 Marko Friedemann <mfr@bmx-chemnitz.de> * 2004 Oliver Schwartz <Oliver.Schwartz@gmx.de>, * Steven Toth <steve@toth.demon.co.uk>, * Franz Lehner <franz@caos.at>, * Ivan Hawkes <blackhawk@ivanhawkes.com> * 2005 Dominic Cerquetti <binary1230@yahoo.com> * 2006 Adam Buchbinder <adam.buchbinder@gmail.com> * 2007 Jan Kratochvil <honza@jikos.cz> * 2010 Christoph Fritz <chf.fritz@googlemail.com> * * This driver is based on: * - information from http://euc.jp/periphs/xbox-controller.ja.html * - the iForce driver drivers/char/joystick/iforce.c * - the skeleton-driver drivers/usb/usb-skeleton.c * - Xbox 360 information http://www.free60.org/wiki/Gamepad * - Xbox One information https://github.com/quantus/xbox-one-controller-protocol * * Thanks to: * - ITO Takayuki for providing essential xpad information on his website * - Vojtech Pavlik - iforce driver / input subsystem * - Greg Kroah-Hartman - usb-skeleton driver * - Xbox Linux project - extra USB IDs * - Pekka Pöyry (quantus) - Xbox One controller reverse-engineering * * TODO: * - fine tune axes (especially trigger axes) * - fix "analog" buttons (reported as digital now) * - get rumble working * - need USB IDs for other dance pads * * History: * * 2002-06-27 - 0.0.1 : first version, just said "XBOX HID controller" * * 2002-07-02 - 0.0.2 : basic working version * - all axes and 9 of the 10 buttons work (german InterAct device) * - the black button does not work * * 2002-07-14 - 0.0.3 : rework by Vojtech Pavlik * - indentation fixes * - usb + input init sequence fixes * * 2002-07-16 - 0.0.4 : minor changes, merge with Vojtech's v0.0.3 * - verified the lack of HID and report descriptors * - verified that ALL buttons WORK * - fixed d-pad to axes mapping * * 2002-07-17 - 0.0.5 : simplified d-pad handling * * 2004-10-02 - 0.0.6 : DDR pad support * - borrowed from the Xbox Linux kernel * - USB id's for commonly used dance pads are present * - dance pads will map D-PAD to buttons, not axes * - pass the module paramater 'dpad_to_buttons' to force * the D-PAD to map to buttons if your pad is not detected * * Later changes can be tracked in SCM. */ #include <linux/bits.h> #include <linux/kernel.h> #include <linux/input.h> #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/module.h> #include <linux/usb/input.h> #include <linux/usb/quirks.h> #define XPAD_PKT_LEN 64 /* * xbox d-pads should map to buttons, as is required for DDR pads * but we map them to axes when possible to simplify things */ #define MAP_DPAD_TO_BUTTONS (1 << 0) #define MAP_TRIGGERS_TO_BUTTONS (1 << 1) #define MAP_STICKS_TO_NULL (1 << 2) #define MAP_SELECT_BUTTON (1 << 3) #define MAP_PADDLES (1 << 4) #define MAP_PROFILE_BUTTON (1 << 5) #define DANCEPAD_MAP_CONFIG (MAP_DPAD_TO_BUTTONS | \ MAP_TRIGGERS_TO_BUTTONS | MAP_STICKS_TO_NULL) #define XTYPE_XBOX 0 #define XTYPE_XBOX360 1 #define XTYPE_XBOX360W 2 #define XTYPE_XBOXONE 3 #define XTYPE_UNKNOWN 4 /* Send power-off packet to xpad360w after holding the mode button for this many * seconds */ #define XPAD360W_POWEROFF_TIMEOUT 5 #define PKT_XB 0 #define PKT_XBE1 1 #define PKT_XBE2_FW_OLD 2 #define PKT_XBE2_FW_5_EARLY 3 #define PKT_XBE2_FW_5_11 4 static bool dpad_to_buttons; module_param(dpad_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(dpad_to_buttons, "Map D-PAD to buttons rather than axes for unknown pads"); static bool triggers_to_buttons; module_param(triggers_to_buttons, bool, S_IRUGO); MODULE_PARM_DESC(triggers_to_buttons, "Map triggers to buttons rather than axes for unknown pads"); static bool sticks_to_null; module_param(sticks_to_null, bool, S_IRUGO); MODULE_PARM_DESC(sticks_to_null, "Do not map sticks at all for unknown pads"); static bool auto_poweroff = true; module_param(auto_poweroff, bool, S_IWUSR | S_IRUGO); MODULE_PARM_DESC(auto_poweroff, "Power off wireless controllers on suspend"); static const struct xpad_device { u16 idVendor; u16 idProduct; char *name; u8 mapping; u8 xtype; } xpad_device[] = { /* Please keep this list sorted by vendor and product ID. */ { 0x0079, 0x18d4, "GPD Win 2 X-Box Controller", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff01, "Wooting One (Legacy)", 0, XTYPE_XBOX360 }, { 0x03eb, 0xff02, "Wooting Two (Legacy)", 0, XTYPE_XBOX360 }, { 0x03f0, 0x038D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wired */ { 0x03f0, 0x048D, "HyperX Clutch", 0, XTYPE_XBOX360 }, /* wireless */ { 0x03f0, 0x0495, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, { 0x03f0, 0x07A0, "HyperX Clutch Gladiate RGB", 0, XTYPE_XBOXONE }, { 0x03f0, 0x08B6, "HyperX Clutch Gladiate", 0, XTYPE_XBOXONE }, /* v2 */ { 0x03f0, 0x09B4, "HyperX Clutch Tanto", 0, XTYPE_XBOXONE }, { 0x044f, 0x0f00, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f03, "Thrustmaster Wheel", 0, XTYPE_XBOX }, { 0x044f, 0x0f07, "Thrustmaster, Inc. Controller", 0, XTYPE_XBOX }, { 0x044f, 0x0f10, "Thrustmaster Modena GT Wheel", 0, XTYPE_XBOX }, { 0x044f, 0xb326, "Thrustmaster Gamepad GP XID", 0, XTYPE_XBOX360 }, { 0x045e, 0x0202, "Microsoft X-Box pad v1 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x0285, "Microsoft X-Box pad (Japan)", 0, XTYPE_XBOX }, { 0x045e, 0x0287, "Microsoft Xbox Controller S", 0, XTYPE_XBOX }, { 0x045e, 0x0288, "Microsoft Xbox Controller S v2", 0, XTYPE_XBOX }, { 0x045e, 0x0289, "Microsoft X-Box pad v2 (US)", 0, XTYPE_XBOX }, { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, { 0x045e, 0x028f, "Microsoft X-Box 360 pad v2", 0, XTYPE_XBOX360 }, { 0x045e, 0x0291, "Xbox 360 Wireless Receiver (XBOX)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x02d1, "Microsoft X-Box One pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x02dd, "Microsoft X-Box One pad (Firmware 2015)", 0, XTYPE_XBOXONE }, { 0x045e, 0x02e3, "Microsoft X-Box One Elite pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x02ea, "Microsoft X-Box One S pad", 0, XTYPE_XBOXONE }, { 0x045e, 0x0719, "Xbox 360 Wireless Receiver", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360W }, { 0x045e, 0x0b00, "Microsoft X-Box One Elite 2 pad", MAP_PADDLES, XTYPE_XBOXONE }, { 0x045e, 0x0b0a, "Microsoft X-Box Adaptive Controller", MAP_PROFILE_BUTTON, XTYPE_XBOXONE }, { 0x045e, 0x0b12, "Microsoft Xbox Series S|X Controller", MAP_SELECT_BUTTON, XTYPE_XBOXONE }, { 0x046d, 0xc21d, "Logitech Gamepad F310", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21e, "Logitech Gamepad F510", 0, XTYPE_XBOX360 }, { 0x046d, 0xc21f, "Logitech Gamepad F710", 0, XTYPE_XBOX360 }, { 0x046d, 0xc242, "Logitech Chillstream Controller", 0, XTYPE_XBOX360 }, { 0x046d, 0xca84, "Logitech Xbox Cordless Controller", 0, XTYPE_XBOX }, { 0x046d, 0xca88, "Logitech Compact Controller for Xbox", 0, XTYPE_XBOX }, { 0x046d, 0xca8a, "Logitech Precision Vibration Feedback Wheel", 0, XTYPE_XBOX }, { 0x046d, 0xcaa3, "Logitech DriveFx Racing Wheel", 0, XTYPE_XBOX360 }, { 0x056e, 0x2004, "Elecom JC-U3613M", 0, XTYPE_XBOX360 }, { 0x05fd, 0x1007, "Mad Catz Controller (unverified)", 0, XTYPE_XBOX }, { 0x05fd, 0x107a, "InterAct 'PowerPad Pro' X-Box pad (Germany)", 0, XTYPE_XBOX }, { 0x05fe, 0x3030, "Chic Controller", 0, XTYPE_XBOX }, { 0x05fe, 0x3031, "Chic Controller", 0, XTYPE_XBOX }, { 0x062a, 0x0020, "Logic3 Xbox GamePad", 0, XTYPE_XBOX }, { 0x062a, 0x0033, "Competition Pro Steering Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0200, "Saitek Racing Wheel", 0, XTYPE_XBOX }, { 0x06a3, 0x0201, "Saitek Adrenalin", 0, XTYPE_XBOX }, { 0x06a3, 0xf51a, "Saitek P3600", 0, XTYPE_XBOX360 }, { 0x0738, 0x4506, "Mad Catz 4506 Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4516, "Mad Catz Control Pad", 0, XTYPE_XBOX }, { 0x0738, 0x4520, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, { 0x0738, 0x4522, "Mad Catz LumiCON", 0, XTYPE_XBOX }, { 0x0738, 0x4526, "Mad Catz Control Pad Pro", 0, XTYPE_XBOX }, { 0x0738, 0x4530, "Mad Catz Universal MC2 Racing Wheel and Pedals", 0, XTYPE_XBOX }, { 0x0738, 0x4536, "Mad Catz MicroCON", 0, XTYPE_XBOX }, { 0x0738, 0x4540, "Mad Catz Beat Pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4556, "Mad Catz Lynx Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4586, "Mad Catz MicroCon Wireless Controller", 0, XTYPE_XBOX }, { 0x0738, 0x4588, "Mad Catz Blaster", 0, XTYPE_XBOX }, { 0x0738, 0x45ff, "Mad Catz Beat Pad (w/ Handle)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4716, "Mad Catz Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4718, "Mad Catz Street Fighter IV FightStick SE", 0, XTYPE_XBOX360 }, { 0x0738, 0x4726, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0738, 0x4728, "Mad Catz Street Fighter IV FightPad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4736, "Mad Catz MicroCon Gamepad", 0, XTYPE_XBOX360 }, { 0x0738, 0x4738, "Mad Catz Wired Xbox 360 Controller (SFIV)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4740, "Mad Catz Beat Pad", 0, XTYPE_XBOX360 }, { 0x0738, 0x4743, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x4758, "Mad Catz Arcade Game Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0x4a01, "Mad Catz FightStick TE 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0738, 0x6040, "Mad Catz Beat Pad Pro", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0738, 0x9871, "Mad Catz Portable Drum", 0, XTYPE_XBOX360 }, { 0x0738, 0xb726, "Mad Catz Xbox controller - MW2", 0, XTYPE_XBOX360 }, { 0x0738, 0xb738, "Mad Catz MVC2TE Stick 2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0738, 0xbeef, "Mad Catz JOYTECH NEO SE Advanced GamePad", XTYPE_XBOX360 }, { 0x0738, 0xcb02, "Saitek Cyborg Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb03, "Saitek P3200 Rumble Pad - PC/Xbox 360", 0, XTYPE_XBOX360 }, { 0x0738, 0xcb29, "Saitek Aviator Stick AV8R02", 0, XTYPE_XBOX360 }, { 0x0738, 0xf738, "Super SFIV FightStick TE S", 0, XTYPE_XBOX360 }, { 0x07ff, 0xffff, "Mad Catz GamePad", 0, XTYPE_XBOX360 }, { 0x0b05, 0x1a38, "ASUS ROG RAIKIRI", 0, XTYPE_XBOXONE }, { 0x0b05, 0x1abb, "ASUS ROG RAIKIRI PRO", 0, XTYPE_XBOXONE }, { 0x0c12, 0x0005, "Intec wireless", 0, XTYPE_XBOX }, { 0x0c12, 0x8801, "Nyko Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8802, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x8809, "RedOctane Xbox Dance Pad", DANCEPAD_MAP_CONFIG, XTYPE_XBOX }, { 0x0c12, 0x880a, "Pelican Eclipse PL-2023", 0, XTYPE_XBOX }, { 0x0c12, 0x8810, "Zeroplus Xbox Controller", 0, XTYPE_XBOX }, { 0x0c12, 0x9902, "HAMA VibraX - *FAULTY HARDWARE*", 0, XTYPE_XBOX }, { 0x0d2f, 0x0002, "Andamiro Pump It Up pad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x0db0, 0x1901, "Micro Star International Xbox360 Controller for Windows", 0, XTYPE_XBOX360 }, { 0x0e4c, 0x1097, "Radica Gamester Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x1103, "Radica Gamester Reflex", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX }, { 0x0e4c, 0x2390, "Radica Games Jtech Controller", 0, XTYPE_XBOX }, { 0x0e4c, 0x3510, "Radica Gamester", 0, XTYPE_XBOX }, { 0x0e6f, 0x0003, "Logic3 Freebird wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0005, "Eclipse wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0006, "Edge wireless Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0008, "After Glow Pro Controller", 0, XTYPE_XBOX }, { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0e6f, 0x0113, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x011f, "Rock Candy Gamepad Wired Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0131, "PDP EA Sports Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0133, "Xbox 360 Wired Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0139, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x013a, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0146, "Rock Candy Wired Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0147, "PDP Marvel Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x015c, "PDP Xbox One Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0e6f, 0x0161, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0162, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0163, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0164, "PDP Battlefield One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0165, "PDP Titanfall 2", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0201, "Pelican PL-3601 'TSZ' Wired Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0213, "Afterglow Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x021f, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0246, "Rock Candy Gamepad for Xbox One 2015", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a0, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a1, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a2, "PDP Wired Controller for Xbox One - Crimson Red", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a4, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a6, "PDP Wired Controller for Xbox One - Camo Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a7, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02a8, "PDP Xbox One Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ab, "PDP Controller for Xbox One", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02ad, "PDP Wired Controller for Xbox One - Stealth Series", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02b3, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x02b8, "Afterglow Prismatic Wired Controller", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0301, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0346, "Rock Candy Gamepad for Xbox One 2016", 0, XTYPE_XBOXONE }, { 0x0e6f, 0x0401, "Logic3 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0413, "Afterglow AX.1 Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x0e6f, 0x0501, "PDP Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x0e6f, 0xf900, "PDP Afterglow AX.1", 0, XTYPE_XBOX360 }, { 0x0e8f, 0x0201, "SmartJoy Frag Xpad/PS2 adaptor", 0, XTYPE_XBOX }, { 0x0e8f, 0x3008, "Generic xbox control (dealextreme)", 0, XTYPE_XBOX }, { 0x0f0d, 0x000a, "Hori Co. DOA4 FightStick", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000c, "Hori PadEX Turbo", 0, XTYPE_XBOX360 }, { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x001b, "Hori Real Arcade Pro VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0063, "Hori Real Arcade Pro Hayabusa (USA) Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE }, { 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX }, { 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX }, { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x1038, 0x1430, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x1038, 0x1431, "SteelSeries Stratus Duo", 0, XTYPE_XBOX360 }, { 0x11c9, 0x55f0, "Nacon GC-100XF", 0, XTYPE_XBOX360 }, { 0x11ff, 0x0511, "PXN V900", 0, XTYPE_XBOX360 }, { 0x1209, 0x2882, "Ardwiino Controller", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x0301, "PDP AFTERGLOW AX.1", 0, XTYPE_XBOX360 }, { 0x12ab, 0x0303, "Mortal Kombat Klassic FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 }, { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x1430, 0xf801, "RedOctane Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x146b, 0x0604, "Bigben Interactive DAIJA Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1532, 0x0a00, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE }, { 0x1532, 0x0a03, "Razer Wildcat", 0, XTYPE_XBOXONE }, { 0x1532, 0x0a29, "Razer Wolverine V2", 0, XTYPE_XBOXONE }, { 0x15e4, 0x3f00, "Power A Mini Pro Elite", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f0a, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x15e4, 0x3f10, "Batarang Xbox 360 controller", 0, XTYPE_XBOX360 }, { 0x162e, 0xbeef, "Joytech Neo-Se Take2", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 }, { 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x17ef, 0x6182, "Lenovo Legion Controller for Windows", 0, XTYPE_XBOX360 }, { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf016, "Mad Catz Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf018, "Mad Catz Street Fighter IV SE Fighting Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf019, "Mad Catz Brawlstick for Xbox 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf021, "Mad Cats Ghost Recon FS GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf023, "MLG Pro Circuit Controller (Xbox)", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf025, "Mad Catz Call Of Duty", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf027, "Mad Catz FPS Pro", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf028, "Street Fighter IV FightPad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf02e, "Mad Catz Fightpad", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf030, "Mad Catz Xbox 360 MC2 MicroCon Racing Wheel", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf036, "Mad Catz MicroCon GamePad Pro", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf038, "Street Fighter IV FightStick TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf039, "Mad Catz MvC2 TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03a, "Mad Catz SFxT Fightstick Pro", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03d, "Street Fighter IV Arcade Stick TE - Chun Li", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03e, "Mad Catz MLG FightStick TE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf03f, "Mad Catz FightStick SoulCaliber", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf042, "Mad Catz FightStick TES+", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf080, "Mad Catz FightStick TE2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf501, "HoriPad EX2 Turbo", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf502, "Hori Real Arcade Pro.VX SA", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf503, "Hori Fighting Stick VX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf504, "Hori Real Arcade Pro. EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf505, "Hori Fighting Stick EX2B", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xf506, "Hori Real Arcade Pro.EX Premium VLX", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf900, "Harmonix Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf901, "Gamestop Xbox 360 Controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf903, "Tron Xbox 360 controller", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf904, "PDP Versus Fighting Pad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xf906, "MortalKombat FightStick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1bad, 0xfa01, "MadCatz GamePad", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, { 0x2345, 0xe00b, "Machenike G5 Pro Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5300, "PowerA MINI PROEX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5303, "Xbox Airflo wired controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x530a, "Xbox 360 Pro EX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x531a, "PowerA Pro Ex", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5397, "FUS1ON Tournament Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x541a, "PowerA Xbox One Mini Wired Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x542a, "Xbox ONE spectra", 0, XTYPE_XBOXONE }, { 0x24c6, 0x543a, "PowerA Xbox One wired controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5500, "Hori XBOX 360 EX 2 with Turbo", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5501, "Hori Real Arcade Pro VX-SA", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5502, "Hori Fighting Stick VX Alt", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5503, "Hori Fighting Edge", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5506, "Hori SOULCALIBUR V Stick", 0, XTYPE_XBOX360 }, { 0x24c6, 0x550d, "Hori GEM Xbox controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x550e, "Hori Real Arcade Pro V Kai 360", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x5510, "Hori Fighting Commander ONE (Xbox 360/PC Mode)", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x24c6, 0x551a, "PowerA FUSION Pro Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x561a, "PowerA FUSION Controller", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5b00, "ThrustMaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b02, "Thrustmaster, Inc. GPX Controller", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, { 0x2563, 0x058d, "OneXPlayer Gamepad", 0, XTYPE_XBOX360 }, { 0x294b, 0x3303, "Snakebyte GAMEPAD BASE X", 0, XTYPE_XBOXONE }, { 0x294b, 0x3404, "Snakebyte GAMEPAD RGB X", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x2000, "8BitDo Pro 2 Wired Controller fox Xbox", 0, XTYPE_XBOXONE }, { 0x2dc8, 0x3106, "8BitDo Pro 2 Wired Controller", 0, XTYPE_XBOX360 }, { 0x2dc8, 0x310a, "8BitDo Ultimate 2C Wireless Controller", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x31e3, 0x1100, "Wooting One", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1200, "Wooting Two", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1210, "Wooting Lekker", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1220, "Wooting Two HE", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 }, { 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 }, { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } }; /* buttons shared with xbox and xbox360 */ static const signed short xpad_common_btn[] = { BTN_A, BTN_B, BTN_X, BTN_Y, /* "analog" buttons */ BTN_START, BTN_SELECT, BTN_THUMBL, BTN_THUMBR, /* start/back/sticks */ -1 /* terminating entry */ }; /* original xbox controllers only */ static const signed short xpad_btn[] = { BTN_C, BTN_Z, /* "analog" buttons */ -1 /* terminating entry */ }; /* used when dpad is mapped to buttons */ static const signed short xpad_btn_pad[] = { BTN_TRIGGER_HAPPY1, BTN_TRIGGER_HAPPY2, /* d-pad left, right */ BTN_TRIGGER_HAPPY3, BTN_TRIGGER_HAPPY4, /* d-pad up, down */ -1 /* terminating entry */ }; /* used when triggers are mapped to buttons */ static const signed short xpad_btn_triggers[] = { BTN_TL2, BTN_TR2, /* triggers left/right */ -1 }; static const signed short xpad360_btn[] = { /* buttons for x360 controller */ BTN_TL, BTN_TR, /* Button LB/RB */ BTN_MODE, /* The big X button */ -1 }; static const signed short xpad_abs[] = { ABS_X, ABS_Y, /* left stick */ ABS_RX, ABS_RY, /* right stick */ -1 /* terminating entry */ }; /* used when dpad is mapped to axes */ static const signed short xpad_abs_pad[] = { ABS_HAT0X, ABS_HAT0Y, /* d-pad axes */ -1 /* terminating entry */ }; /* used when triggers are mapped to axes */ static const signed short xpad_abs_triggers[] = { ABS_Z, ABS_RZ, /* triggers left/right */ -1 }; /* used when the controller has extra paddle buttons */ static const signed short xpad_btn_paddles[] = { BTN_TRIGGER_HAPPY5, BTN_TRIGGER_HAPPY6, /* paddle upper right, lower right */ BTN_TRIGGER_HAPPY7, BTN_TRIGGER_HAPPY8, /* paddle upper left, lower left */ -1 /* terminating entry */ }; /* * Xbox 360 has a vendor-specific class, so we cannot match it with only * USB_INTERFACE_INFO (also specifically refused by USB subsystem), so we * match against vendor id as well. Wired Xbox 360 devices have protocol 1, * wireless controllers have protocol 129. */ #define XPAD_XBOX360_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 93, \ .bInterfaceProtocol = (pr) #define XPAD_XBOX360_VENDOR(vend) \ { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 1) }, \ { XPAD_XBOX360_VENDOR_PROTOCOL((vend), 129) } /* The Xbox One controller uses subclass 71 and protocol 208. */ #define XPAD_XBOXONE_VENDOR_PROTOCOL(vend, pr) \ .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_INFO, \ .idVendor = (vend), \ .bInterfaceClass = USB_CLASS_VENDOR_SPEC, \ .bInterfaceSubClass = 71, \ .bInterfaceProtocol = (pr) #define XPAD_XBOXONE_VENDOR(vend) \ { XPAD_XBOXONE_VENDOR_PROTOCOL((vend), 208) } static const struct usb_device_id xpad_table[] = { /* * Please keep this list sorted by vendor ID. Note that there are 2 * macros - XPAD_XBOX360_VENDOR and XPAD_XBOXONE_VENDOR. */ { USB_INTERFACE_INFO('X', 'B', 0) }, /* Xbox USB-IF not-approved class */ XPAD_XBOX360_VENDOR(0x0079), /* GPD Win 2 controller */ XPAD_XBOX360_VENDOR(0x03eb), /* Wooting Keyboards (Legacy) */ XPAD_XBOX360_VENDOR(0x03f0), /* HP HyperX Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x03f0), /* HP HyperX Xbox One controllers */ XPAD_XBOX360_VENDOR(0x044f), /* Thrustmaster Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x045e), /* Microsoft Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x045e), /* Microsoft Xbox One controllers */ XPAD_XBOX360_VENDOR(0x046d), /* Logitech Xbox 360-style controllers */ XPAD_XBOX360_VENDOR(0x056e), /* Elecom JC-U3613M */ XPAD_XBOX360_VENDOR(0x06a3), /* Saitek P3600 */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz Xbox 360 controllers */ { USB_DEVICE(0x0738, 0x4540) }, /* Mad Catz Beat Pad */ XPAD_XBOXONE_VENDOR(0x0738), /* Mad Catz FightStick TE 2 */ XPAD_XBOX360_VENDOR(0x07ff), /* Mad Catz Gamepad */ XPAD_XBOXONE_VENDOR(0x0b05), /* ASUS controllers */ XPAD_XBOX360_VENDOR(0x0c12), /* Zeroplus X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0db0), /* Micro Star International X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f Xbox 360 controllers */ XPAD_XBOXONE_VENDOR(0x0e6f), /* 0x0e6f Xbox One controllers */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori controllers */ XPAD_XBOXONE_VENDOR(0x0f0d), /* Hori controllers */ XPAD_XBOX360_VENDOR(0x1038), /* SteelSeries controllers */ XPAD_XBOXONE_VENDOR(0x10f5), /* Turtle Beach Controllers */ XPAD_XBOX360_VENDOR(0x11c9), /* Nacon GC100XF */ XPAD_XBOX360_VENDOR(0x11ff), /* PXN V900 */ XPAD_XBOX360_VENDOR(0x1209), /* Ardwiino Controllers */ XPAD_XBOX360_VENDOR(0x12ab), /* Xbox 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* Bigben Interactive controllers */ XPAD_XBOX360_VENDOR(0x1532), /* Razer Sabertooth */ XPAD_XBOXONE_VENDOR(0x1532), /* Razer Wildcat */ XPAD_XBOX360_VENDOR(0x15e4), /* Numark Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x162e), /* Joytech Xbox 360 controllers */ XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */ XPAD_XBOX360_VENDOR(0x17ef), /* Lenovo */ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */ XPAD_XBOX360_VENDOR(0x1bad), /* Harmonix Rock Band guitar and drums */ XPAD_XBOX360_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2345), /* Machenike Controllers */ XPAD_XBOX360_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA controllers */ XPAD_XBOX360_VENDOR(0x2563), /* OneXPlayer Gamepad */ XPAD_XBOX360_VENDOR(0x260d), /* Dareu H101 */ XPAD_XBOXONE_VENDOR(0x294b), /* Snakebyte */ XPAD_XBOX360_VENDOR(0x2c22), /* Qanba Controllers */ XPAD_XBOX360_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller */ XPAD_XBOXONE_VENDOR(0x2dc8), /* 8BitDo Pro 2 Wired Controller for Xbox */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke Xbox One pad */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */ XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */ XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */ { } }; MODULE_DEVICE_TABLE(usb, xpad_table); struct xboxone_init_packet { u16 idVendor; u16 idProduct; const u8 *data; u8 len; }; #define XBOXONE_INIT_PKT(_vid, _pid, _data) \ { \ .idVendor = (_vid), \ .idProduct = (_pid), \ .data = (_data), \ .len = ARRAY_SIZE(_data), \ } /* * starting with xbox one, the game input protocol is used * magic numbers are taken from * - https://github.com/xpadneo/gip-dissector/blob/main/src/gip-dissector.lua * - https://github.com/medusalix/xone/blob/master/bus/protocol.c */ #define GIP_CMD_ACK 0x01 #define GIP_CMD_IDENTIFY 0x04 #define GIP_CMD_POWER 0x05 #define GIP_CMD_AUTHENTICATE 0x06 #define GIP_CMD_VIRTUAL_KEY 0x07 #define GIP_CMD_RUMBLE 0x09 #define GIP_CMD_LED 0x0a #define GIP_CMD_FIRMWARE 0x0c #define GIP_CMD_INPUT 0x20 #define GIP_SEQ0 0x00 #define GIP_OPT_ACK 0x10 #define GIP_OPT_INTERNAL 0x20 /* * length of the command payload encoded with * https://en.wikipedia.org/wiki/LEB128 * which is a no-op for N < 128 */ #define GIP_PL_LEN(N) (N) /* * payload specific defines */ #define GIP_PWR_ON 0x00 #define GIP_LED_ON 0x01 #define GIP_MOTOR_R BIT(0) #define GIP_MOTOR_L BIT(1) #define GIP_MOTOR_RT BIT(2) #define GIP_MOTOR_LT BIT(3) #define GIP_MOTOR_ALL (GIP_MOTOR_R | GIP_MOTOR_L | GIP_MOTOR_RT | GIP_MOTOR_LT) #define GIP_WIRED_INTF_DATA 0 #define GIP_WIRED_INTF_AUDIO 1 /* * This packet is required for all Xbox One pads with 2015 * or later firmware installed (or present from the factory). */ static const u8 xboxone_power_on[] = { GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(1), GIP_PWR_ON }; /* * This packet is required for Xbox One S (0x045e:0x02ea) * and Xbox One Elite Series 2 (0x045e:0x0b00) pads to * initialize the controller that was previously used in * Bluetooth mode. */ static const u8 xboxone_s_init[] = { GIP_CMD_POWER, GIP_OPT_INTERNAL, GIP_SEQ0, 0x0f, 0x06 }; /* * This packet is required to get additional input data * from Xbox One Elite Series 2 (0x045e:0x0b00) pads. * We mostly do this right now to get paddle data */ static const u8 extra_input_packet_init[] = { 0x4d, 0x10, 0x01, 0x02, 0x07, 0x00 }; /* * This packet is required for the Titanfall 2 Xbox One pads * (0x0e6f:0x0165) to finish initialization and for Hori pads * (0x0f0d:0x0067) to make the analog sticks work. */ static const u8 xboxone_hori_ack_id[] = { GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_CMD_IDENTIFY, GIP_OPT_INTERNAL, 0x3a, 0x00, 0x00, 0x00, 0x80, 0x00 }; /* * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_led_on[] = { GIP_CMD_LED, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(3), 0x00, GIP_LED_ON, 0x14 }; /* * This packet is required for most (all?) of the PDP pads to start * sending input reports. These pads include: (0x0e6f:0x02ab), * (0x0e6f:0x02a4), (0x0e6f:0x02a6). */ static const u8 xboxone_pdp_auth[] = { GIP_CMD_AUTHENTICATE, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(2), 0x01, 0x00 }; /* * A specific rumble packet is required for some PowerA pads to start * sending input reports. One of those pads is (0x24c6:0x543a). */ static const u8 xboxone_rumblebegin_init[] = { GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x1D, 0x1D, 0xFF, 0x00, 0x00 }; /* * A rumble packet with zero FF intensity will immediately * terminate the rumbling required to init PowerA pads. * This should happen fast enough that the motors don't * spin up to enough speed to actually vibrate the gamepad. */ static const u8 xboxone_rumbleend_init[] = { GIP_CMD_RUMBLE, 0x00, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_MOTOR_ALL, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; /* * This specifies the selection of init packets that a gamepad * will be sent on init *and* the order in which they will be * sent. The correct sequence number will be added when the * packet is going to be sent. */ static const struct xboxone_init_packet xboxone_init_packets[] = { XBOXONE_INIT_PKT(0x0e6f, 0x0165, xboxone_hori_ack_id), XBOXONE_INIT_PKT(0x0f0d, 0x0067, xboxone_hori_ack_id), XBOXONE_INIT_PKT(0x0000, 0x0000, xboxone_power_on), XBOXONE_INIT_PKT(0x045e, 0x02ea, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, xboxone_s_init), XBOXONE_INIT_PKT(0x045e, 0x0b00, extra_input_packet_init), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_led_on), XBOXONE_INIT_PKT(0x0e6f, 0x0000, xboxone_pdp_auth), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumblebegin_init), XBOXONE_INIT_PKT(0x24c6, 0x541a, xboxone_rumbleend_init), XBOXONE_INIT_PKT(0x24c6, 0x542a, xboxone_rumbleend_init), XBOXONE_INIT_PKT(0x24c6, 0x543a, xboxone_rumbleend_init), }; struct xpad_output_packet { u8 data[XPAD_PKT_LEN]; u8 len; bool pending; }; #define XPAD_OUT_CMD_IDX 0 #define XPAD_OUT_FF_IDX 1 #define XPAD_OUT_LED_IDX (1 + IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF)) #define XPAD_NUM_OUT_PACKETS (1 + \ IS_ENABLED(CONFIG_JOYSTICK_XPAD_FF) + \ IS_ENABLED(CONFIG_JOYSTICK_XPAD_LEDS)) struct usb_xpad { struct input_dev *dev; /* input device interface */ struct input_dev __rcu *x360w_dev; struct usb_device *udev; /* usb device */ struct usb_interface *intf; /* usb interface */ bool pad_present; bool input_created; struct urb *irq_in; /* urb for interrupt in report */ unsigned char *idata; /* input data */ dma_addr_t idata_dma; struct urb *irq_out; /* urb for interrupt out report */ struct usb_anchor irq_out_anchor; bool irq_out_active; /* we must not use an active URB */ u8 odata_serial; /* serial number for xbox one protocol */ unsigned char *odata; /* output data */ dma_addr_t odata_dma; spinlock_t odata_lock; struct xpad_output_packet out_packets[XPAD_NUM_OUT_PACKETS]; int last_out_packet; int init_seq; #if defined(CONFIG_JOYSTICK_XPAD_LEDS) struct xpad_led *led; #endif char phys[64]; /* physical device path */ int mapping; /* map d-pad to buttons or to axes */ int xtype; /* type of xbox device */ int packet_type; /* type of the extended packet */ int pad_nr; /* the order x360 pads were attached */ const char *name; /* name of the device */ struct work_struct work; /* init/remove device from callback */ time64_t mode_btn_down_ts; }; static int xpad_init_input(struct usb_xpad *xpad); static void xpad_deinit_input(struct usb_xpad *xpad); static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num); static void xpad360w_poweroff_controller(struct usb_xpad *xpad); /* * xpad_process_packet * * Completes a request by converting the data into events for the * input subsystem. * * The used report descriptor was taken from ITO Takayuki's website: * http://euc.jp/periphs/xbox-controller.ja.html */ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 12))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 14))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 16))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 18))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, data[10]); input_report_key(dev, BTN_TR2, data[11]); } else { input_report_abs(dev, ABS_Z, data[10]); input_report_abs(dev, ABS_RZ, data[11]); } /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } /* start/back buttons and stick press left/right */ input_report_key(dev, BTN_START, data[2] & BIT(4)); input_report_key(dev, BTN_SELECT, data[2] & BIT(5)); input_report_key(dev, BTN_THUMBL, data[2] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[2] & BIT(7)); /* "analog" buttons A, B, X, Y */ input_report_key(dev, BTN_A, data[4]); input_report_key(dev, BTN_B, data[5]); input_report_key(dev, BTN_X, data[6]); input_report_key(dev, BTN_Y, data[7]); /* "analog" buttons black, white */ input_report_key(dev, BTN_C, data[8]); input_report_key(dev, BTN_Z, data[9]); input_sync(dev); } /* * xpad360_process_packet * * Completes a request by converting the data into events for the * input subsystem. It is version for xbox 360 controller * * The used report descriptor was taken from: * http://www.free60.org/wiki/Gamepad */ static void xpad360_process_packet(struct usb_xpad *xpad, struct input_dev *dev, u16 cmd, unsigned char *data) { /* valid pad data */ if (data[0] != 0x00) return; /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_TRIGGER_HAPPY1, data[2] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY2, data[2] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[2] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[2] & BIT(1)); } /* * This should be a simple else block. However historically * xbox360w has mapped DPAD to buttons while xbox360 did not. This * made no sense, but now we can not just switch back and have to * support both behaviors. */ if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || xpad->xtype == XTYPE_XBOX360W) { input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } /* start/back buttons */ input_report_key(dev, BTN_START, data[2] & BIT(4)); input_report_key(dev, BTN_SELECT, data[2] & BIT(5)); /* stick press left/right */ input_report_key(dev, BTN_THUMBL, data[2] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[2] & BIT(7)); /* buttons A,B,X,Y,TL,TR and MODE */ input_report_key(dev, BTN_A, data[3] & BIT(4)); input_report_key(dev, BTN_B, data[3] & BIT(5)); input_report_key(dev, BTN_X, data[3] & BIT(6)); input_report_key(dev, BTN_Y, data[3] & BIT(7)); input_report_key(dev, BTN_TL, data[3] & BIT(0)); input_report_key(dev, BTN_TR, data[3] & BIT(1)); input_report_key(dev, BTN_MODE, data[3] & BIT(2)); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 6))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 8))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 10))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 12))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, data[4]); input_report_key(dev, BTN_TR2, data[5]); } else { input_report_abs(dev, ABS_Z, data[4]); input_report_abs(dev, ABS_RZ, data[5]); } input_sync(dev); /* XBOX360W controllers can't be turned off without driver assistance */ if (xpad->xtype == XTYPE_XBOX360W) { if (xpad->mode_btn_down_ts > 0 && xpad->pad_present && ((ktime_get_seconds() - xpad->mode_btn_down_ts) >= XPAD360W_POWEROFF_TIMEOUT)) { xpad360w_poweroff_controller(xpad); xpad->mode_btn_down_ts = 0; return; } /* mode button down/up */ if (data[3] & BIT(2)) xpad->mode_btn_down_ts = ktime_get_seconds(); else xpad->mode_btn_down_ts = 0; } } static void xpad_presence_work(struct work_struct *work) { struct usb_xpad *xpad = container_of(work, struct usb_xpad, work); int error; if (xpad->pad_present) { error = xpad_init_input(xpad); if (error) { /* complain only, not much else we can do here */ dev_err(&xpad->dev->dev, "unable to init device: %d\n", error); } else { rcu_assign_pointer(xpad->x360w_dev, xpad->dev); } } else { RCU_INIT_POINTER(xpad->x360w_dev, NULL); synchronize_rcu(); /* * Now that we are sure xpad360w_process_packet is not * using input device we can get rid of it. */ xpad_deinit_input(xpad); } } /* * xpad360w_process_packet * * Completes a request by converting the data into events for the * input subsystem. It is version for xbox 360 wireless controller. * * Byte.Bit * 00.1 - Status change: The controller or headset has connected/disconnected * Bits 01.7 and 01.6 are valid * 01.7 - Controller present * 01.6 - Headset present * 01.1 - Pad state (Bytes 4+) valid * */ static void xpad360w_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev; bool present; /* Presence change */ if (data[0] & 0x08) { present = (data[1] & 0x80) != 0; if (xpad->pad_present != present) { xpad->pad_present = present; schedule_work(&xpad->work); } } /* Valid pad data */ if (data[1] != 0x1) return; rcu_read_lock(); dev = rcu_dereference(xpad->x360w_dev); if (dev) xpad360_process_packet(xpad, dev, cmd, &data[4]); rcu_read_unlock(); } /* * xpadone_process_packet * * Completes a request by converting the data into events for the * input subsystem. This version is for the Xbox One controller. * * The report format was gleaned from * https://github.com/kylelemons/xbox/blob/master/xbox.go */ static void xpadone_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; bool do_sync = false; /* the xbox button has its own special report */ if (data[0] == GIP_CMD_VIRTUAL_KEY) { /* * The Xbox One S controller requires these reports to be * acked otherwise it continues sending them forever and * won't report further mode button events. */ if (data[1] == (GIP_OPT_ACK | GIP_OPT_INTERNAL)) xpadone_ack_mode_report(xpad, data[2]); input_report_key(dev, BTN_MODE, data[4] & GENMASK(1, 0)); input_sync(dev); do_sync = true; } else if (data[0] == GIP_CMD_FIRMWARE) { /* Some packet formats force us to use this separate to poll paddle inputs */ if (xpad->packet_type == PKT_XBE2_FW_5_11) { /* Mute paddles if controller is in a custom profile slot * Checked by looking at the active profile slot to * verify it's the default slot */ if (data[19] != 0) data[18] = 0; /* Elite Series 2 split packet paddle bits */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3)); do_sync = true; } } else if (data[0] == GIP_CMD_INPUT) { /* The main valid packet type for inputs */ /* menu/view buttons */ input_report_key(dev, BTN_START, data[4] & BIT(2)); input_report_key(dev, BTN_SELECT, data[4] & BIT(3)); if (xpad->mapping & MAP_SELECT_BUTTON) input_report_key(dev, KEY_RECORD, data[22] & BIT(0)); /* buttons A,B,X,Y */ input_report_key(dev, BTN_A, data[4] & BIT(4)); input_report_key(dev, BTN_B, data[4] & BIT(5)); input_report_key(dev, BTN_X, data[4] & BIT(6)); input_report_key(dev, BTN_Y, data[4] & BIT(7)); /* digital pad */ if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (left, right, up, down) */ input_report_key(dev, BTN_TRIGGER_HAPPY1, data[5] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY2, data[5] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY3, data[5] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY4, data[5] & BIT(1)); } else { input_report_abs(dev, ABS_HAT0X, !!(data[5] & 0x08) - !!(data[5] & 0x04)); input_report_abs(dev, ABS_HAT0Y, !!(data[5] & 0x02) - !!(data[5] & 0x01)); } /* TL/TR */ input_report_key(dev, BTN_TL, data[5] & BIT(4)); input_report_key(dev, BTN_TR, data[5] & BIT(5)); /* stick press left/right */ input_report_key(dev, BTN_THUMBL, data[5] & BIT(6)); input_report_key(dev, BTN_THUMBR, data[5] & BIT(7)); if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* left stick */ input_report_abs(dev, ABS_X, (__s16) le16_to_cpup((__le16 *)(data + 10))); input_report_abs(dev, ABS_Y, ~(__s16) le16_to_cpup((__le16 *)(data + 12))); /* right stick */ input_report_abs(dev, ABS_RX, (__s16) le16_to_cpup((__le16 *)(data + 14))); input_report_abs(dev, ABS_RY, ~(__s16) le16_to_cpup((__le16 *)(data + 16))); } /* triggers left/right */ if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { input_report_key(dev, BTN_TL2, (__u16) le16_to_cpup((__le16 *)(data + 6))); input_report_key(dev, BTN_TR2, (__u16) le16_to_cpup((__le16 *)(data + 8))); } else { input_report_abs(dev, ABS_Z, (__u16) le16_to_cpup((__le16 *)(data + 6))); input_report_abs(dev, ABS_RZ, (__u16) le16_to_cpup((__le16 *)(data + 8))); } /* Profile button has a value of 0-3, so it is reported as an axis */ if (xpad->mapping & MAP_PROFILE_BUTTON) input_report_abs(dev, ABS_PROFILE, data[34]); /* paddle handling */ /* based on SDL's SDL_hidapi_xboxone.c */ if (xpad->mapping & MAP_PADDLES) { if (xpad->packet_type == PKT_XBE1) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (memcmp(&data[4], &data[18], 2) != 0) data[32] = 0; /* OG Elite Series Controller paddle bits */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[32] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[32] & BIT(3)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[32] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[32] & BIT(2)); } else if (xpad->packet_type == PKT_XBE2_FW_OLD) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (data[19] != 0) data[18] = 0; /* Elite Series 2 4.x firmware paddle bits */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[18] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[18] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[18] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[18] & BIT(3)); } else if (xpad->packet_type == PKT_XBE2_FW_5_EARLY) { /* Mute paddles if controller has a custom mapping applied. * Checked by comparing the current mapping * config against the factory mapping config */ if (data[23] != 0) data[22] = 0; /* Elite Series 2 5.x firmware paddle bits * (before the packet was split) */ input_report_key(dev, BTN_TRIGGER_HAPPY5, data[22] & BIT(0)); input_report_key(dev, BTN_TRIGGER_HAPPY6, data[22] & BIT(1)); input_report_key(dev, BTN_TRIGGER_HAPPY7, data[22] & BIT(2)); input_report_key(dev, BTN_TRIGGER_HAPPY8, data[22] & BIT(3)); } } do_sync = true; } if (do_sync) input_sync(dev); } static void xpad_irq_in(struct urb *urb) { struct usb_xpad *xpad = urb->context; struct device *dev = &xpad->intf->dev; int retval, status; status = urb->status; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } switch (xpad->xtype) { case XTYPE_XBOX360: xpad360_process_packet(xpad, xpad->dev, 0, xpad->idata); break; case XTYPE_XBOX360W: xpad360w_process_packet(xpad, 0, xpad->idata); break; case XTYPE_XBOXONE: xpadone_process_packet(xpad, 0, xpad->idata); break; default: xpad_process_packet(xpad, 0, xpad->idata); } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* Callers must hold xpad->odata_lock spinlock */ static bool xpad_prepare_next_init_packet(struct usb_xpad *xpad) { const struct xboxone_init_packet *init_packet; if (xpad->xtype != XTYPE_XBOXONE) return false; /* Perform initialization sequence for Xbox One pads that require it */ while (xpad->init_seq < ARRAY_SIZE(xboxone_init_packets)) { init_packet = &xboxone_init_packets[xpad->init_seq++]; if (init_packet->idVendor != 0 && init_packet->idVendor != xpad->dev->id.vendor) continue; if (init_packet->idProduct != 0 && init_packet->idProduct != xpad->dev->id.product) continue; /* This packet applies to our device, so prepare to send it */ memcpy(xpad->odata, init_packet->data, init_packet->len); xpad->irq_out->transfer_buffer_length = init_packet->len; /* Update packet with current sequence number */ xpad->odata[2] = xpad->odata_serial++; return true; } return false; } /* Callers must hold xpad->odata_lock spinlock */ static bool xpad_prepare_next_out_packet(struct usb_xpad *xpad) { struct xpad_output_packet *pkt, *packet = NULL; int i; /* We may have init packets to send before we can send user commands */ if (xpad_prepare_next_init_packet(xpad)) return true; for (i = 0; i < XPAD_NUM_OUT_PACKETS; i++) { if (++xpad->last_out_packet >= XPAD_NUM_OUT_PACKETS) xpad->last_out_packet = 0; pkt = &xpad->out_packets[xpad->last_out_packet]; if (pkt->pending) { dev_dbg(&xpad->intf->dev, "%s - found pending output packet %d\n", __func__, xpad->last_out_packet); packet = pkt; break; } } if (packet) { memcpy(xpad->odata, packet->data, packet->len); xpad->irq_out->transfer_buffer_length = packet->len; packet->pending = false; return true; } return false; } /* Callers must hold xpad->odata_lock spinlock */ static int xpad_try_sending_next_out_packet(struct usb_xpad *xpad) { int error; if (!xpad->irq_out_active && xpad_prepare_next_out_packet(xpad)) { usb_anchor_urb(xpad->irq_out, &xpad->irq_out_anchor); error = usb_submit_urb(xpad->irq_out, GFP_ATOMIC); if (error) { dev_err(&xpad->intf->dev, "%s - usb_submit_urb failed with result %d\n", __func__, error); usb_unanchor_urb(xpad->irq_out); return -EIO; } xpad->irq_out_active = true; } return 0; } static void xpad_irq_out(struct urb *urb) { struct usb_xpad *xpad = urb->context; struct device *dev = &xpad->intf->dev; int status = urb->status; int error; guard(spinlock_irqsave)(&xpad->odata_lock); switch (status) { case 0: /* success */ xpad->irq_out_active = xpad_prepare_next_out_packet(xpad); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, status); xpad->irq_out_active = false; break; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, status); break; } if (xpad->irq_out_active) { usb_anchor_urb(urb, &xpad->irq_out_anchor); error = usb_submit_urb(urb, GFP_ATOMIC); if (error) { dev_err(dev, "%s - usb_submit_urb failed with result %d\n", __func__, error); usb_unanchor_urb(urb); xpad->irq_out_active = false; } } } static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad, struct usb_endpoint_descriptor *ep_irq_out) { int error; if (xpad->xtype == XTYPE_UNKNOWN) return 0; init_usb_anchor(&xpad->irq_out_anchor); xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN, GFP_KERNEL, &xpad->odata_dma); if (!xpad->odata) return -ENOMEM; spin_lock_init(&xpad->odata_lock); xpad->irq_out = usb_alloc_urb(0, GFP_KERNEL); if (!xpad->irq_out) { error = -ENOMEM; goto err_free_coherent; } usb_fill_int_urb(xpad->irq_out, xpad->udev, usb_sndintpipe(xpad->udev, ep_irq_out->bEndpointAddress), xpad->odata, XPAD_PKT_LEN, xpad_irq_out, xpad, ep_irq_out->bInterval); xpad->irq_out->transfer_dma = xpad->odata_dma; xpad->irq_out->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; return 0; err_free_coherent: usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); return error; } static void xpad_stop_output(struct usb_xpad *xpad) { if (xpad->xtype != XTYPE_UNKNOWN) { if (!usb_wait_anchor_empty_timeout(&xpad->irq_out_anchor, 5000)) { dev_warn(&xpad->intf->dev, "timed out waiting for output URB to complete, killing\n"); usb_kill_anchored_urbs(&xpad->irq_out_anchor); } } } static void xpad_deinit_output(struct usb_xpad *xpad) { if (xpad->xtype != XTYPE_UNKNOWN) { usb_free_urb(xpad->irq_out); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); } } static int xpad_inquiry_pad_presence(struct usb_xpad *xpad) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; guard(spinlock_irqsave)(&xpad->odata_lock); packet->data[0] = 0x08; packet->data[1] = 0x00; packet->data[2] = 0x0F; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; /* Reset the sequence so we send out presence first */ xpad->last_out_packet = -1; return xpad_try_sending_next_out_packet(xpad); } static int xpad_start_xbox_one(struct usb_xpad *xpad) { int error; if (usb_ifnum_to_if(xpad->udev, GIP_WIRED_INTF_AUDIO)) { /* * Explicitly disable the audio interface. This is needed * for some controllers, such as the PowerA Enhanced Wired * Controller for Series X|S (0x20d6:0x200e) to report the * guide button. */ error = usb_set_interface(xpad->udev, GIP_WIRED_INTF_AUDIO, 0); if (error) dev_warn(&xpad->dev->dev, "unable to disable audio interface: %d\n", error); } guard(spinlock_irqsave)(&xpad->odata_lock); /* * Begin the init sequence by attempting to send a packet. * We will cycle through the init packet sequence before * sending any packets from the output ring. */ xpad->init_seq = 0; return xpad_try_sending_next_out_packet(xpad); } static void xpadone_ack_mode_report(struct usb_xpad *xpad, u8 seq_num) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; static const u8 mode_report_ack[] = { GIP_CMD_ACK, GIP_OPT_INTERNAL, GIP_SEQ0, GIP_PL_LEN(9), 0x00, GIP_CMD_VIRTUAL_KEY, GIP_OPT_INTERNAL, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 }; guard(spinlock_irqsave)(&xpad->odata_lock); packet->len = sizeof(mode_report_ack); memcpy(packet->data, mode_report_ack, packet->len); packet->data[2] = seq_num; packet->pending = true; /* Reset the sequence so we send out the ack now */ xpad->last_out_packet = -1; xpad_try_sending_next_out_packet(xpad); } #ifdef CONFIG_JOYSTICK_XPAD_FF static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct usb_xpad *xpad = input_get_drvdata(dev); struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_FF_IDX]; __u16 strong; __u16 weak; if (effect->type != FF_RUMBLE) return 0; strong = effect->u.rumble.strong_magnitude; weak = effect->u.rumble.weak_magnitude; guard(spinlock_irqsave)(&xpad->odata_lock); switch (xpad->xtype) { case XTYPE_XBOX: packet->data[0] = 0x00; packet->data[1] = 0x06; packet->data[2] = 0x00; packet->data[3] = strong / 256; /* left actuator */ packet->data[4] = 0x00; packet->data[5] = weak / 256; /* right actuator */ packet->len = 6; packet->pending = true; break; case XTYPE_XBOX360: packet->data[0] = 0x00; packet->data[1] = 0x08; packet->data[2] = 0x00; packet->data[3] = strong / 256; /* left actuator? */ packet->data[4] = weak / 256; /* right actuator? */ packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->len = 8; packet->pending = true; break; case XTYPE_XBOX360W: packet->data[0] = 0x00; packet->data[1] = 0x01; packet->data[2] = 0x0F; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = strong / 256; packet->data[6] = weak / 256; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; break; case XTYPE_XBOXONE: packet->data[0] = GIP_CMD_RUMBLE; /* activate rumble */ packet->data[1] = 0x00; packet->data[2] = xpad->odata_serial++; packet->data[3] = GIP_PL_LEN(9); packet->data[4] = 0x00; packet->data[5] = GIP_MOTOR_ALL; packet->data[6] = 0x00; /* left trigger */ packet->data[7] = 0x00; /* right trigger */ packet->data[8] = strong / 512; /* left actuator */ packet->data[9] = weak / 512; /* right actuator */ packet->data[10] = 0xFF; /* on period */ packet->data[11] = 0x00; /* off period */ packet->data[12] = 0xFF; /* repeat count */ packet->len = 13; packet->pending = true; break; default: dev_dbg(&xpad->dev->dev, "%s - rumble command sent to unsupported xpad type: %d\n", __func__, xpad->xtype); return -EINVAL; } return xpad_try_sending_next_out_packet(xpad); } static int xpad_init_ff(struct usb_xpad *xpad) { if (xpad->xtype == XTYPE_UNKNOWN) return 0; input_set_capability(xpad->dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(xpad->dev, NULL, xpad_play_effect); } #else static int xpad_init_ff(struct usb_xpad *xpad) { return 0; } #endif #if defined(CONFIG_JOYSTICK_XPAD_LEDS) #include <linux/leds.h> #include <linux/idr.h> static DEFINE_IDA(xpad_pad_seq); struct xpad_led { char name[16]; struct led_classdev led_cdev; struct usb_xpad *xpad; }; /* * set the LEDs on Xbox 360 / Wireless Controllers * @param command * 0: off * 1: all blink, then previous setting * 2: 1/top-left blink, then on * 3: 2/top-right blink, then on * 4: 3/bottom-left blink, then on * 5: 4/bottom-right blink, then on * 6: 1/top-left on * 7: 2/top-right on * 8: 3/bottom-left on * 9: 4/bottom-right on * 10: rotate * 11: blink, based on previous setting * 12: slow blink, based on previous setting * 13: rotate with two lights * 14: persistent slow all blink * 15: blink once, then previous setting */ static void xpad_send_led_command(struct usb_xpad *xpad, int command) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_LED_IDX]; command %= 16; guard(spinlock_irqsave)(&xpad->odata_lock); switch (xpad->xtype) { case XTYPE_XBOX360: packet->data[0] = 0x01; packet->data[1] = 0x03; packet->data[2] = command; packet->len = 3; packet->pending = true; break; case XTYPE_XBOX360W: packet->data[0] = 0x00; packet->data[1] = 0x00; packet->data[2] = 0x08; packet->data[3] = 0x40 + command; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; break; } xpad_try_sending_next_out_packet(xpad); } /* * Light up the segment corresponding to the pad number on * Xbox 360 Controllers. */ static void xpad_identify_controller(struct usb_xpad *xpad) { led_set_brightness(&xpad->led->led_cdev, (xpad->pad_nr % 4) + 2); } static void xpad_led_set(struct led_classdev *led_cdev, enum led_brightness value) { struct xpad_led *xpad_led = container_of(led_cdev, struct xpad_led, led_cdev); xpad_send_led_command(xpad_led->xpad, value); } static int xpad_led_probe(struct usb_xpad *xpad) { struct xpad_led *led; struct led_classdev *led_cdev; int error; if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX360W) return 0; xpad->led = led = kzalloc(sizeof(*led), GFP_KERNEL); if (!led) return -ENOMEM; xpad->pad_nr = ida_alloc(&xpad_pad_seq, GFP_KERNEL); if (xpad->pad_nr < 0) { error = xpad->pad_nr; goto err_free_mem; } snprintf(led->name, sizeof(led->name), "xpad%d", xpad->pad_nr); led->xpad = xpad; led_cdev = &led->led_cdev; led_cdev->name = led->name; led_cdev->brightness_set = xpad_led_set; led_cdev->flags = LED_CORE_SUSPENDRESUME; error = led_classdev_register(&xpad->udev->dev, led_cdev); if (error) goto err_free_id; xpad_identify_controller(xpad); return 0; err_free_id: ida_free(&xpad_pad_seq, xpad->pad_nr); err_free_mem: kfree(led); xpad->led = NULL; return error; } static void xpad_led_disconnect(struct usb_xpad *xpad) { struct xpad_led *xpad_led = xpad->led; if (xpad_led) { led_classdev_unregister(&xpad_led->led_cdev); ida_free(&xpad_pad_seq, xpad->pad_nr); kfree(xpad_led); } } #else static int xpad_led_probe(struct usb_xpad *xpad) { return 0; } static void xpad_led_disconnect(struct usb_xpad *xpad) { } #endif static int xpad_start_input(struct usb_xpad *xpad) { int error; if (usb_submit_urb(xpad->irq_in, GFP_KERNEL)) return -EIO; if (xpad->xtype == XTYPE_XBOXONE) { error = xpad_start_xbox_one(xpad); if (error) { usb_kill_urb(xpad->irq_in); return error; } } if (xpad->xtype == XTYPE_XBOX360) { /* * Some third-party controllers Xbox 360-style controllers * require this message to finish initialization. */ u8 dummy[20]; error = usb_control_msg_recv(xpad->udev, 0, /* bRequest */ 0x01, /* bmRequestType */ USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, /* wValue */ 0x100, /* wIndex */ 0x00, dummy, sizeof(dummy), 25, GFP_KERNEL); if (error) dev_warn(&xpad->dev->dev, "unable to receive magic message: %d\n", error); } return 0; } static void xpad_stop_input(struct usb_xpad *xpad) { usb_kill_urb(xpad->irq_in); } static void xpad360w_poweroff_controller(struct usb_xpad *xpad) { struct xpad_output_packet *packet = &xpad->out_packets[XPAD_OUT_CMD_IDX]; guard(spinlock_irqsave)(&xpad->odata_lock); packet->data[0] = 0x00; packet->data[1] = 0x00; packet->data[2] = 0x08; packet->data[3] = 0xC0; packet->data[4] = 0x00; packet->data[5] = 0x00; packet->data[6] = 0x00; packet->data[7] = 0x00; packet->data[8] = 0x00; packet->data[9] = 0x00; packet->data[10] = 0x00; packet->data[11] = 0x00; packet->len = 12; packet->pending = true; /* Reset the sequence so we send out poweroff now */ xpad->last_out_packet = -1; xpad_try_sending_next_out_packet(xpad); } static int xpad360w_start_input(struct usb_xpad *xpad) { int error; error = usb_submit_urb(xpad->irq_in, GFP_KERNEL); if (error) return -EIO; /* * Send presence packet. * This will force the controller to resend connection packets. * This is useful in the case we activate the module after the * adapter has been plugged in, as it won't automatically * send us info about the controllers. */ error = xpad_inquiry_pad_presence(xpad); if (error) { usb_kill_urb(xpad->irq_in); return error; } return 0; } static void xpad360w_stop_input(struct usb_xpad *xpad) { usb_kill_urb(xpad->irq_in); /* Make sure we are done with presence work if it was scheduled */ flush_work(&xpad->work); } static int xpad_open(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); return xpad_start_input(xpad); } static void xpad_close(struct input_dev *dev) { struct usb_xpad *xpad = input_get_drvdata(dev); xpad_stop_input(xpad); } static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) { struct usb_xpad *xpad = input_get_drvdata(input_dev); switch (abs) { case ABS_X: case ABS_Y: case ABS_RX: case ABS_RY: /* the two sticks */ input_set_abs_params(input_dev, abs, -32768, 32767, 16, 128); break; case ABS_Z: case ABS_RZ: /* the triggers (if mapped to axes) */ if (xpad->xtype == XTYPE_XBOXONE) input_set_abs_params(input_dev, abs, 0, 1023, 0, 0); else input_set_abs_params(input_dev, abs, 0, 255, 0, 0); break; case ABS_HAT0X: case ABS_HAT0Y: /* the d-pad (only if dpad is mapped to axes */ input_set_abs_params(input_dev, abs, -1, 1, 0, 0); break; case ABS_PROFILE: /* 4 value profile button (such as on XAC) */ input_set_abs_params(input_dev, abs, 0, 4, 0, 0); break; default: input_set_abs_params(input_dev, abs, 0, 0, 0, 0); break; } } static void xpad_deinit_input(struct usb_xpad *xpad) { if (xpad->input_created) { xpad->input_created = false; xpad_led_disconnect(xpad); input_unregister_device(xpad->dev); } } static int xpad_init_input(struct usb_xpad *xpad) { struct input_dev *input_dev; int i, error; input_dev = input_allocate_device(); if (!input_dev) return -ENOMEM; xpad->dev = input_dev; input_dev->name = xpad->name; input_dev->phys = xpad->phys; usb_to_input_id(xpad->udev, &input_dev->id); if (xpad->xtype == XTYPE_XBOX360W) { /* x360w controllers and the receiver have different ids */ input_dev->id.product = 0x02a1; } input_dev->dev.parent = &xpad->intf->dev; input_set_drvdata(input_dev, xpad); if (xpad->xtype != XTYPE_XBOX360W) { input_dev->open = xpad_open; input_dev->close = xpad_close; } if (!(xpad->mapping & MAP_STICKS_TO_NULL)) { /* set up axes */ for (i = 0; xpad_abs[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs[i]); } /* set up standard buttons */ for (i = 0; xpad_common_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_common_btn[i]); /* set up model-specific ones */ if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX360W || xpad->xtype == XTYPE_XBOXONE) { for (i = 0; xpad360_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad360_btn[i]); if (xpad->mapping & MAP_SELECT_BUTTON) input_set_capability(input_dev, EV_KEY, KEY_RECORD); } else { for (i = 0; xpad_btn[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn[i]); } if (xpad->mapping & MAP_DPAD_TO_BUTTONS) { for (i = 0; xpad_btn_pad[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_pad[i]); } /* set up paddles if the controller has them */ if (xpad->mapping & MAP_PADDLES) { for (i = 0; xpad_btn_paddles[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_paddles[i]); } /* * This should be a simple else block. However historically * xbox360w has mapped DPAD to buttons while xbox360 did not. This * made no sense, but now we can not just switch back and have to * support both behaviors. */ if (!(xpad->mapping & MAP_DPAD_TO_BUTTONS) || xpad->xtype == XTYPE_XBOX360W) { for (i = 0; xpad_abs_pad[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_pad[i]); } if (xpad->mapping & MAP_TRIGGERS_TO_BUTTONS) { for (i = 0; xpad_btn_triggers[i] >= 0; i++) input_set_capability(input_dev, EV_KEY, xpad_btn_triggers[i]); } else { for (i = 0; xpad_abs_triggers[i] >= 0; i++) xpad_set_up_abs(input_dev, xpad_abs_triggers[i]); } /* setup profile button as an axis with 4 possible values */ if (xpad->mapping & MAP_PROFILE_BUTTON) xpad_set_up_abs(input_dev, ABS_PROFILE); error = xpad_init_ff(xpad); if (error) goto err_free_input; error = xpad_led_probe(xpad); if (error) goto err_destroy_ff; error = input_register_device(xpad->dev); if (error) goto err_disconnect_led; xpad->input_created = true; return 0; err_disconnect_led: xpad_led_disconnect(xpad); err_destroy_ff: input_ff_destroy(input_dev); err_free_input: input_free_device(input_dev); return error; } static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(intf); struct usb_xpad *xpad; struct usb_endpoint_descriptor *ep_irq_in, *ep_irq_out; int i, error; if (intf->cur_altsetting->desc.bNumEndpoints != 2) return -ENODEV; for (i = 0; xpad_device[i].idVendor; i++) { if ((le16_to_cpu(udev->descriptor.idVendor) == xpad_device[i].idVendor) && (le16_to_cpu(udev->descriptor.idProduct) == xpad_device[i].idProduct)) break; } xpad = kzalloc(sizeof(*xpad), GFP_KERNEL); if (!xpad) return -ENOMEM; usb_make_path(udev, xpad->phys, sizeof(xpad->phys)); strlcat(xpad->phys, "/input0", sizeof(xpad->phys)); xpad->idata = usb_alloc_coherent(udev, XPAD_PKT_LEN, GFP_KERNEL, &xpad->idata_dma); if (!xpad->idata) { error = -ENOMEM; goto err_free_mem; } xpad->irq_in = usb_alloc_urb(0, GFP_KERNEL); if (!xpad->irq_in) { error = -ENOMEM; goto err_free_idata; } xpad->udev = udev; xpad->intf = intf; xpad->mapping = xpad_device[i].mapping; xpad->xtype = xpad_device[i].xtype; xpad->name = xpad_device[i].name; xpad->packet_type = PKT_XB; INIT_WORK(&xpad->work, xpad_presence_work); if (xpad->xtype == XTYPE_UNKNOWN) { if (intf->cur_altsetting->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC) { if (intf->cur_altsetting->desc.bInterfaceProtocol == 129) xpad->xtype = XTYPE_XBOX360W; else if (intf->cur_altsetting->desc.bInterfaceProtocol == 208) xpad->xtype = XTYPE_XBOXONE; else xpad->xtype = XTYPE_XBOX360; } else { xpad->xtype = XTYPE_XBOX; } if (dpad_to_buttons) xpad->mapping |= MAP_DPAD_TO_BUTTONS; if (triggers_to_buttons) xpad->mapping |= MAP_TRIGGERS_TO_BUTTONS; if (sticks_to_null) xpad->mapping |= MAP_STICKS_TO_NULL; } if (xpad->xtype == XTYPE_XBOXONE && intf->cur_altsetting->desc.bInterfaceNumber != GIP_WIRED_INTF_DATA) { /* * The Xbox One controller lists three interfaces all with the * same interface class, subclass and protocol. Differentiate by * interface number. */ error = -ENODEV; goto err_free_in_urb; } ep_irq_in = ep_irq_out = NULL; for (i = 0; i < 2; i++) { struct usb_endpoint_descriptor *ep = &intf->cur_altsetting->endpoint[i].desc; if (usb_endpoint_xfer_int(ep)) { if (usb_endpoint_dir_in(ep)) ep_irq_in = ep; else ep_irq_out = ep; } } if (!ep_irq_in || !ep_irq_out) { error = -ENODEV; goto err_free_in_urb; } error = xpad_init_output(intf, xpad, ep_irq_out); if (error) goto err_free_in_urb; usb_fill_int_urb(xpad->irq_in, udev, usb_rcvintpipe(udev, ep_irq_in->bEndpointAddress), xpad->idata, XPAD_PKT_LEN, xpad_irq_in, xpad, ep_irq_in->bInterval); xpad->irq_in->transfer_dma = xpad->idata_dma; xpad->irq_in->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_set_intfdata(intf, xpad); /* Packet type detection */ if (le16_to_cpu(udev->descriptor.idVendor) == 0x045e) { /* Microsoft controllers */ if (le16_to_cpu(udev->descriptor.idProduct) == 0x02e3) { /* The original elite controller always uses the oldest * type of extended packet */ xpad->packet_type = PKT_XBE1; } else if (le16_to_cpu(udev->descriptor.idProduct) == 0x0b00) { /* The elite 2 controller has seen multiple packet * revisions. These are tied to specific firmware * versions */ if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x0500) { /* This is the format that the Elite 2 used * prior to the BLE update */ xpad->packet_type = PKT_XBE2_FW_OLD; } else if (le16_to_cpu(udev->descriptor.bcdDevice) < 0x050b) { /* This is the format that the Elite 2 used * prior to the update that split the packet */ xpad->packet_type = PKT_XBE2_FW_5_EARLY; } else { /* The split packet format that was introduced * in firmware v5.11 */ xpad->packet_type = PKT_XBE2_FW_5_11; } } } if (xpad->xtype == XTYPE_XBOX360W) { /* * Submit the int URB immediately rather than waiting for open * because we get status messages from the device whether * or not any controllers are attached. In fact, it's * exactly the message that a controller has arrived that * we're waiting for. */ error = xpad360w_start_input(xpad); if (error) goto err_deinit_output; /* * Wireless controllers require RESET_RESUME to work properly * after suspend. Ideally this quirk should be in usb core * quirk list, but we have too many vendors producing these * controllers and we'd need to maintain 2 identical lists * here in this driver and in usb core. */ udev->quirks |= USB_QUIRK_RESET_RESUME; } else { error = xpad_init_input(xpad); if (error) goto err_deinit_output; } return 0; err_deinit_output: xpad_deinit_output(xpad); err_free_in_urb: usb_free_urb(xpad->irq_in); err_free_idata: usb_free_coherent(udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); err_free_mem: kfree(xpad); return error; } static void xpad_disconnect(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata(intf); if (xpad->xtype == XTYPE_XBOX360W) xpad360w_stop_input(xpad); xpad_deinit_input(xpad); /* * Now that both input device and LED device are gone we can * stop output URB. */ xpad_stop_output(xpad); xpad_deinit_output(xpad); usb_free_urb(xpad->irq_in); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->idata, xpad->idata_dma); kfree(xpad); usb_set_intfdata(intf, NULL); } static int xpad_suspend(struct usb_interface *intf, pm_message_t message) { struct usb_xpad *xpad = usb_get_intfdata(intf); struct input_dev *input = xpad->dev; if (xpad->xtype == XTYPE_XBOX360W) { /* * Wireless controllers always listen to input so * they are notified when controller shows up * or goes away. */ xpad360w_stop_input(xpad); /* * The wireless adapter is going off now, so the * gamepads are going to become disconnected. * Unless explicitly disabled, power them down * so they don't just sit there flashing. */ if (auto_poweroff && xpad->pad_present) xpad360w_poweroff_controller(xpad); } else { guard(mutex)(&input->mutex); if (input_device_enabled(input)) xpad_stop_input(xpad); } xpad_stop_output(xpad); return 0; } static int xpad_resume(struct usb_interface *intf) { struct usb_xpad *xpad = usb_get_intfdata(intf); struct input_dev *input = xpad->dev; if (xpad->xtype == XTYPE_XBOX360W) return xpad360w_start_input(xpad); guard(mutex)(&input->mutex); if (input_device_enabled(input)) return xpad_start_input(xpad); if (xpad->xtype == XTYPE_XBOXONE) { /* * Even if there are no users, we'll send Xbox One pads * the startup sequence so they don't sit there and * blink until somebody opens the input device again. */ return xpad_start_xbox_one(xpad); } return 0; } static struct usb_driver xpad_driver = { .name = "xpad", .probe = xpad_probe, .disconnect = xpad_disconnect, .suspend = xpad_suspend, .resume = xpad_resume, .id_table = xpad_table, }; module_usb_driver(xpad_driver); MODULE_AUTHOR("Marko Friedemann <mfr@bmx-chemnitz.de>"); MODULE_DESCRIPTION("Xbox pad driver"); MODULE_LICENSE("GPL");
1 30 39 1 1 36 1 1 7 29 30 6 24 24 28 29 29 29 1 1 1 1 1 1 1 1 2 2 18 17 2 2 2 2 1 1 10 4 6 5 1 2 6 5 14 3 11 11 11 2 4 6 3 2 7 28 28 7 22 2 5 15 5 39 33 3 1 1 43 10 7 4 7 7 27 28 24 1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth address family and sockets. */ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/stringify.h> #include <linux/sched/signal.h> #include <asm/ioctls.h> #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> #include "leds.h" #include "selftest.h" /* Bluetooth sockets */ #define BT_MAX_PROTO (BTPROTO_LAST + 1) static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; static const char *const bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", "sk_lock-AF_BLUETOOTH-BTPROTO_HCI", "sk_lock-AF_BLUETOOTH-BTPROTO_SCO", "sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM", "sk_lock-AF_BLUETOOTH-BTPROTO_BNEP", "sk_lock-AF_BLUETOOTH-BTPROTO_CMTP", "sk_lock-AF_BLUETOOTH-BTPROTO_HIDP", "sk_lock-AF_BLUETOOTH-BTPROTO_AVDTP", "sk_lock-AF_BLUETOOTH-BTPROTO_ISO", }; static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_L2CAP", "slock-AF_BLUETOOTH-BTPROTO_HCI", "slock-AF_BLUETOOTH-BTPROTO_SCO", "slock-AF_BLUETOOTH-BTPROTO_RFCOMM", "slock-AF_BLUETOOTH-BTPROTO_BNEP", "slock-AF_BLUETOOTH-BTPROTO_CMTP", "slock-AF_BLUETOOTH-BTPROTO_HIDP", "slock-AF_BLUETOOTH-BTPROTO_AVDTP", "slock-AF_BLUETOOTH-BTPROTO_ISO", }; void bt_sock_reclassify_lock(struct sock *sk, int proto) { BUG_ON(!sk); BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } EXPORT_SYMBOL(bt_sock_reclassify_lock); int bt_sock_register(int proto, const struct net_proto_family *ops) { int err = 0; if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; write_lock(&bt_proto_lock); if (bt_proto[proto]) err = -EEXIST; else bt_proto[proto] = ops; write_unlock(&bt_proto_lock); return err; } EXPORT_SYMBOL(bt_sock_register); void bt_sock_unregister(int proto) { if (proto < 0 || proto >= BT_MAX_PROTO) return; write_lock(&bt_proto_lock); bt_proto[proto] = NULL; write_unlock(&bt_proto_lock); } EXPORT_SYMBOL(bt_sock_unregister); static int bt_sock_create(struct net *net, struct socket *sock, int proto, int kern) { int err; if (net != &init_net) return -EAFNOSUPPORT; if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; if (!bt_proto[proto]) request_module("bt-proto-%d", proto); err = -EPROTONOSUPPORT; read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto, kern); if (!err) bt_sock_reclassify_lock(sock->sk, proto); module_put(bt_proto[proto]->owner); } read_unlock(&bt_proto_lock); return err; } struct sock *bt_sock_alloc(struct net *net, struct socket *sock, struct proto *prot, int proto, gfp_t prio, int kern) { struct sock *sk; sk = sk_alloc(net, PF_BLUETOOTH, prio, prot, kern); if (!sk) return NULL; sock_init_data(sock, sk); INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; sk->sk_state = BT_OPEN; /* Init peer information so it can be properly monitored */ if (!kern) { spin_lock(&sk->sk_peer_lock); sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); spin_unlock(&sk->sk_peer_lock); } return sk; } EXPORT_SYMBOL(bt_sock_alloc); void bt_sock_link(struct bt_sock_list *l, struct sock *sk) { write_lock(&l->lock); sk_add_node(sk, &l->head); write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_link); void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { write_lock(&l->lock); sk_del_node_init(sk); write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_unlink); bool bt_sock_linked(struct bt_sock_list *l, struct sock *s) { struct sock *sk; if (!l || !s) return false; read_lock(&l->lock); sk_for_each(sk, &l->head) { if (s == sk) { read_unlock(&l->lock); return true; } } read_unlock(&l->lock); return false; } EXPORT_SYMBOL(bt_sock_linked); void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; struct pid *old_pid; BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); if (bh) bh_lock_sock_nested(sk); else lock_sock_nested(sk, SINGLE_DEPTH_NESTING); list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; /* Copy credentials from parent since for incoming connections the * socket is allocated by the kernel. */ spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(parent->sk_peer_pid); sk->sk_peer_cred = get_cred(parent->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); put_cred(old_cred); if (bh) bh_unlock_sock(sk); else release_sock(sk); sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); /* Calling function must hold the sk lock. * bt_sk(sk)->parent must be non-NULL meaning sk is in the parent list. */ void bt_accept_unlink(struct sock *sk) { BT_DBG("sk %p state %d", sk, sk->sk_state); list_del_init(&bt_sk(sk)->accept_q); sk_acceptq_removed(bt_sk(sk)->parent); bt_sk(sk)->parent = NULL; sock_put(sk); } EXPORT_SYMBOL(bt_accept_unlink); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { struct bt_sock *s, *n; struct sock *sk; BT_DBG("parent %p", parent); restart: list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; /* Prevent early freeing of sk due to unlink and sock_kill */ sock_hold(sk); lock_sock(sk); /* Check sk has not already been unlinked via * bt_accept_unlink() due to serialisation caused by sk locking */ if (!bt_sk(sk)->parent) { BT_DBG("sk %p, already unlinked", sk); release_sock(sk); sock_put(sk); /* Restart the loop as sk is no longer in the list * and also avoid a potential infinite loop because * list_for_each_entry_safe() is not thread safe. */ goto restart; } /* sk is safely in the parent list so reduce reference count */ sock_put(sk); /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { bt_accept_unlink(sk); release_sock(sk); continue; } if (sk->sk_state == BT_CONNECTED || !newsock || test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) { bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); release_sock(sk); return sk; } release_sock(sk); } return NULL; } EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; size_t copied; size_t skblen; int err; BT_DBG("sock %p sk %p len %zu", sock, sk, len); if (flags & MSG_OOB) return -EOPNOTSUPP; skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; return err; } skblen = skb->len; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err == 0) { sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); if (test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags)) { u8 pkt_status = hci_skb_pkt_status(skb); put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS, sizeof(pkt_status), &pkt_status); } } skb_free_datagram(sk, skb); if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; } EXPORT_SYMBOL(bt_sock_recvmsg); static long bt_sock_data_wait(struct sock *sk, long timeo) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(sk_sleep(sk), &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue)) break; if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN)) break; if (signal_pending(current) || !timeo) break; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return timeo; } int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; size_t target, copied = 0; long timeo; if (flags & MSG_OOB) return -EOPNOTSUPP; BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb; int chunk; skb = skb_dequeue(&sk->sk_receive_queue); if (!skb) { if (copied >= target) break; err = sock_error(sk); if (err) break; if (sk->sk_shutdown & RCV_SHUTDOWN) break; err = -EAGAIN; if (!timeo) break; timeo = bt_sock_data_wait(sk, timeo); if (signal_pending(current)) { err = sock_intr_errno(timeo); goto out; } continue; } chunk = min_t(unsigned int, skb->len, size); if (skb_copy_datagram_msg(skb, 0, msg, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); if (!copied) copied = -EFAULT; break; } copied += chunk; size -= chunk; sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { int skb_len = skb_headlen(skb); if (chunk <= skb_len) { __skb_pull(skb, chunk); } else { struct sk_buff *frag; __skb_pull(skb, skb_len); chunk -= skb_len; skb_walk_frags(skb, frag) { if (chunk <= frag->len) { /* Pulling partial data */ skb->len -= chunk; skb->data_len -= chunk; __skb_pull(frag, chunk); break; } else if (frag->len) { /* Pulling all frag data */ chunk -= frag->len; skb->len -= frag->len; skb->data_len -= frag->len; __skb_pull(frag, frag->len); } } } if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; } kfree_skb(skb); } else { /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); break; } } while (size); out: release_sock(sk); return copied ? : err; } EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline __poll_t bt_accept_poll(struct sock *parent) { struct bt_sock *s, *n; struct sock *sk; list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; if (sk->sk_state == BT_CONNECTED || (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && sk->sk_state == BT_CONNECT2)) return EPOLLIN | EPOLLRDNORM; } return 0; } __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == BT_CLOSED) mask |= EPOLLHUP; if (sk->sk_state == BT_CONNECT || sk->sk_state == BT_CONNECT2 || sk->sk_state == BT_CONFIG) return mask; if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } EXPORT_SYMBOL(bt_sock_poll); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct sk_buff *skb; long amount; int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); switch (cmd) { case TIOCOUTQ: if (sk->sk_state == BT_LISTEN) return -EINVAL; amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *)arg); break; case TIOCINQ: if (sk->sk_state == BT_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; spin_unlock(&sk->sk_receive_queue.lock); err = put_user(amount, (int __user *)arg); break; default: err = -ENOIOCTLCMD; break; } return err; } EXPORT_SYMBOL(bt_sock_ioctl); /* This function expects the sk lock to be held when called */ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); int err = 0; BT_DBG("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { if (!timeo) { err = -EINPROGRESS; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_state); /* This function expects the sk lock to be held when called */ int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags) { DECLARE_WAITQUEUE(wait, current); unsigned long timeo; int err = 0; BT_DBG("sk %p", sk); timeo = sock_sndtimeo(sk, !!(msg_flags & MSG_DONTWAIT)); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags)) { if (!timeo) { err = -EAGAIN; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_ready); #ifdef CONFIG_PROC_FS static void *bt_seq_start(struct seq_file *seq, loff_t *pos) __acquires(seq->private->l->lock) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_lock(&l->lock); return seq_hlist_start_head(&l->head, *pos); } static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); return seq_hlist_next(v, &l->head, pos); } static void bt_seq_stop(struct seq_file *seq, void *v) __releases(seq->private->l->lock) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_unlock(&l->lock); } static int bt_seq_show(struct seq_file *seq, void *v) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); if (v == SEQ_START_TOKEN) { seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent"); if (l->custom_seq_show) { seq_putc(seq, ' '); l->custom_seq_show(seq, v); } seq_putc(seq, '\n'); } else { struct sock *sk = sk_entry(v); struct bt_sock *bt = bt_sk(sk); seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu", sk, refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), bt->parent ? sock_i_ino(bt->parent) : 0LU); if (l->custom_seq_show) { seq_putc(seq, ' '); l->custom_seq_show(seq, v); } seq_putc(seq, '\n'); } return 0; } static const struct seq_operations bt_seq_ops = { .start = bt_seq_start, .next = bt_seq_next, .stop = bt_seq_stop, .show = bt_seq_show, }; int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (*seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list)) return -ENOMEM; return 0; } void bt_procfs_cleanup(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); } #else int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (*seq_show)(struct seq_file *, void *)) { return 0; } void bt_procfs_cleanup(struct net *net, const char *name) { } #endif EXPORT_SYMBOL(bt_procfs_init); EXPORT_SYMBOL(bt_procfs_cleanup); static const struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, .create = bt_sock_create, }; struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); #define VERSION __stringify(BT_SUBSYS_VERSION) "." \ __stringify(BT_SUBSYS_REVISION) static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) return err; bt_debugfs = debugfs_create_dir("bluetooth", NULL); bt_leds_init(); err = bt_sysfs_init(); if (err < 0) goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) goto cleanup_sysfs; BT_INFO("HCI device and connection manager initialized"); err = hci_sock_init(); if (err) goto unregister_socket; err = l2cap_init(); if (err) goto cleanup_socket; err = sco_init(); if (err) goto cleanup_cap; err = mgmt_init(); if (err) goto cleanup_sco; return 0; cleanup_sco: sco_exit(); cleanup_cap: l2cap_exit(); cleanup_socket: hci_sock_cleanup(); unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); cleanup_led: bt_leds_cleanup(); debugfs_remove_recursive(bt_debugfs); return err; } static void __exit bt_exit(void) { iso_exit(); mgmt_exit(); sco_exit(); l2cap_exit(); hci_sock_cleanup(); sock_unregister(PF_BLUETOOTH); bt_sysfs_cleanup(); bt_leds_cleanup(); debugfs_remove_recursive(bt_debugfs); } subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH);
39 28 1 11 8 3 9 1 3 1 4 3 2 2 3 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 // SPDX-License-Identifier: GPL-2.0-only #include <net/ip.h> #include <net/tcp.h> #include <net/netfilter/nf_tables.h> #include <linux/netfilter/nfnetlink_osf.h> struct nft_osf { u8 dreg; u8 ttl; u32 flags; }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { [NFTA_OSF_DREG] = { .type = NLA_U32 }, [NFTA_OSF_TTL] = { .type = NLA_U8 }, [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_osf *priv = nft_expr_priv(expr); u32 *dest = &regs->data[priv->dreg]; struct sk_buff *skb = pkt->skb; char os_match[NFT_OSF_MAXGENRELEN]; const struct tcphdr *tcp; struct nf_osf_data data; struct tcphdr _tcph; if (pkt->tprot != IPPROTO_TCP) { regs->verdict.code = NFT_BREAK; return; } tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); if (!tcp) { regs->verdict.code = NFT_BREAK; return; } if (!tcp->syn) { regs->verdict.code = NFT_BREAK; return; } if (!nf_osf_find(skb, nf_osf_fingers, priv->ttl, &data)) { strscpy_pad((char *)dest, "unknown", NFT_OSF_MAXGENRELEN); } else { if (priv->flags & NFT_OSF_F_VERSION) snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s", data.genre, data.version); else strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); strscpy_pad((char *)dest, os_match, NFT_OSF_MAXGENRELEN); } } static int nft_osf_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_osf *priv = nft_expr_priv(expr); u32 flags; u8 ttl; if (!tb[NFTA_OSF_DREG]) return -EINVAL; if (tb[NFTA_OSF_TTL]) { ttl = nla_get_u8(tb[NFTA_OSF_TTL]); if (ttl > 2) return -EINVAL; priv->ttl = ttl; } if (tb[NFTA_OSF_FLAGS]) { flags = ntohl(nla_get_be32(tb[NFTA_OSF_FLAGS])); if (flags != NFT_OSF_F_VERSION) return -EINVAL; priv->flags = flags; } return nft_parse_register_store(ctx, tb[NFTA_OSF_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, NFT_OSF_MAXGENRELEN); } static int nft_osf_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_osf *priv = nft_expr_priv(expr); if (nla_put_u8(skb, NFTA_OSF_TTL, priv->ttl)) goto nla_put_failure; if (nla_put_u32(skb, NFTA_OSF_FLAGS, ntohl((__force __be32)priv->flags))) goto nla_put_failure; if (nft_dump_register(skb, NFTA_OSF_DREG, priv->dreg)) goto nla_put_failure; return 0; nla_put_failure: return -1; } static int nft_osf_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { unsigned int hooks; switch (ctx->family) { case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_FORWARD); break; default: return -EOPNOTSUPP; } return nft_chain_validate_hooks(ctx->chain, hooks); } static bool nft_osf_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { struct nft_osf *priv = nft_expr_priv(expr); struct nft_osf *osf; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); return false; } osf = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->flags != osf->flags || priv->ttl != osf->ttl) { nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); return false; } if (!track->regs[priv->dreg].bitwise) return true; return false; } static struct nft_expr_type nft_osf_type; static const struct nft_expr_ops nft_osf_op = { .eval = nft_osf_eval, .size = NFT_EXPR_SIZE(sizeof(struct nft_osf)), .init = nft_osf_init, .dump = nft_osf_dump, .type = &nft_osf_type, .validate = nft_osf_validate, .reduce = nft_osf_reduce, }; static struct nft_expr_type nft_osf_type __read_mostly = { .ops = &nft_osf_op, .name = "osf", .owner = THIS_MODULE, .policy = nft_osf_policy, .maxattr = NFTA_OSF_MAX, }; static int __init nft_osf_module_init(void) { return nft_register_expr(&nft_osf_type); } static void __exit nft_osf_module_exit(void) { return nft_unregister_expr(&nft_osf_type); } module_init(nft_osf_module_init); module_exit(nft_osf_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("osf"); MODULE_DESCRIPTION("nftables passive OS fingerprint support");
32 14 18 2 14 2 2 1 2 2 9 1 1 3 1 4 7 1 1 6 6 17 3 2 1 1 1 1 2 20 2 1 13 5 12 4 2 14 6 1 5 1 1 1 24 2 15 8 14 8 2 18 1 4 4 2 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 // SPDX-License-Identifier: GPL-2.0-or-later /* * NetLabel Management Support * * This file defines the management functions for the NetLabel system. The * NetLabel system manages static and dynamic label mappings for network * protocols such as CIPSO and RIPSO. * * Author: Paul Moore <paul@paul-moore.com> */ /* * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 */ #include <linux/types.h> #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/slab.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <net/calipso.h> #include <linux/atomic.h> #include "netlabel_calipso.h" #include "netlabel_domainhash.h" #include "netlabel_user.h" #include "netlabel_mgmt.h" /* NetLabel configured protocol counter */ atomic_t netlabel_mgmt_protocount = ATOMIC_INIT(0); /* Argument struct for netlbl_domhsh_walk() */ struct netlbl_domhsh_walk_arg { struct netlink_callback *nl_cb; struct sk_buff *skb; u32 seq; }; /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_mgmt_gnl_family; /* NetLabel Netlink attribute policy */ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { [NLBL_MGMT_A_DOMAIN] = { .type = NLA_NUL_STRING }, [NLBL_MGMT_A_PROTOCOL] = { .type = NLA_U32 }, [NLBL_MGMT_A_VERSION] = { .type = NLA_U32 }, [NLBL_MGMT_A_CV4DOI] = { .type = NLA_U32 }, [NLBL_MGMT_A_FAMILY] = { .type = NLA_U16 }, [NLBL_MGMT_A_CLPDOI] = { .type = NLA_U32 }, }; /* * Helper Functions */ /** * netlbl_mgmt_add_common - Handle an ADD message * @info: the Generic NETLINK info block * @audit_info: NetLabel audit information * * Description: * Helper function for the ADD and ADDDEF messages to add the domain mappings * from the message to the hash table. See netlabel.h for a description of the * message format. Returns zero on success, negative values on failure. * */ static int netlbl_mgmt_add_common(struct genl_info *info, struct netlbl_audit *audit_info) { void *pmap = NULL; int ret_val = -EINVAL; struct netlbl_domaddr_map *addrmap = NULL; struct cipso_v4_doi *cipsov4 = NULL; #if IS_ENABLED(CONFIG_IPV6) struct calipso_doi *calipso = NULL; #endif u32 tmp_val; struct netlbl_dom_map *entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; entry->def.type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); if (info->attrs[NLBL_MGMT_A_DOMAIN]) { size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); entry->domain = kmalloc(tmp_size, GFP_KERNEL); if (entry->domain == NULL) { ret_val = -ENOMEM; goto add_free_entry; } nla_strscpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); } /* NOTE: internally we allow/use a entry->def.type value of * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users * to pass that as a protocol value because we need to know the * "real" protocol */ switch (entry->def.type) { case NETLBL_NLTYPE_UNLABELED: entry->family = nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY], AF_UNSPEC); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) goto add_free_domain; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); cipsov4 = cipso_v4_doi_getdef(tmp_val); if (cipsov4 == NULL) goto add_free_domain; entry->family = AF_INET; entry->def.cipso = cipsov4; break; #if IS_ENABLED(CONFIG_IPV6) case NETLBL_NLTYPE_CALIPSO: if (!info->attrs[NLBL_MGMT_A_CLPDOI]) goto add_free_domain; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CLPDOI]); calipso = calipso_doi_getdef(tmp_val); if (calipso == NULL) goto add_free_domain; entry->family = AF_INET6; entry->def.calipso = calipso; break; #endif /* IPv6 */ default: goto add_free_domain; } if ((entry->family == AF_INET && info->attrs[NLBL_MGMT_A_IPV6ADDR]) || (entry->family == AF_INET6 && info->attrs[NLBL_MGMT_A_IPV4ADDR])) goto add_doi_put_def; if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { struct in_addr *addr; struct in_addr *mask; struct netlbl_domaddr4_map *map; addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != sizeof(struct in_addr)) { ret_val = -EINVAL; goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != sizeof(struct in_addr)) { ret_val = -EINVAL; goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; goto add_free_addrmap; } pmap = map; map->list.addr = addr->s_addr & mask->s_addr; map->list.mask = mask->s_addr; map->list.valid = 1; map->def.type = entry->def.type; if (cipsov4) map->def.cipso = cipsov4; ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); if (ret_val != 0) goto add_free_map; entry->family = AF_INET; entry->def.type = NETLBL_NLTYPE_ADDRSELECT; entry->def.addrsel = addrmap; #if IS_ENABLED(CONFIG_IPV6) } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { struct in6_addr *addr; struct in6_addr *mask; struct netlbl_domaddr6_map *map; addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); if (addrmap == NULL) { ret_val = -ENOMEM; goto add_doi_put_def; } INIT_LIST_HEAD(&addrmap->list4); INIT_LIST_HEAD(&addrmap->list6); if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; goto add_free_addrmap; } if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != sizeof(struct in6_addr)) { ret_val = -EINVAL; goto add_free_addrmap; } addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); map = kzalloc(sizeof(*map), GFP_KERNEL); if (map == NULL) { ret_val = -ENOMEM; goto add_free_addrmap; } pmap = map; map->list.addr = *addr; map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; map->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; map->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; map->list.mask = *mask; map->list.valid = 1; map->def.type = entry->def.type; if (calipso) map->def.calipso = calipso; ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); if (ret_val != 0) goto add_free_map; entry->family = AF_INET6; entry->def.type = NETLBL_NLTYPE_ADDRSELECT; entry->def.addrsel = addrmap; #endif /* IPv6 */ } ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) goto add_free_map; return 0; add_free_map: kfree(pmap); add_free_addrmap: kfree(addrmap); add_doi_put_def: cipso_v4_doi_putdef(cipsov4); #if IS_ENABLED(CONFIG_IPV6) calipso_doi_putdef(calipso); #endif add_free_domain: kfree(entry->domain); add_free_entry: kfree(entry); return ret_val; } /** * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry * @skb: the NETLINK buffer * @entry: the map entry * * Description: * This function is a helper function used by the LISTALL and LISTDEF command * handlers. The caller is responsible for ensuring that the RCU read lock * is held. Returns zero on success, negative values on failure. * */ static int netlbl_mgmt_listentry(struct sk_buff *skb, struct netlbl_dom_map *entry) { int ret_val = 0; struct nlattr *nla_a; struct nlattr *nla_b; struct netlbl_af4list *iter4; #if IS_ENABLED(CONFIG_IPV6) struct netlbl_af6list *iter6; #endif if (entry->domain != NULL) { ret_val = nla_put_string(skb, NLBL_MGMT_A_DOMAIN, entry->domain); if (ret_val != 0) return ret_val; } ret_val = nla_put_u16(skb, NLBL_MGMT_A_FAMILY, entry->family); if (ret_val != 0) return ret_val; switch (entry->def.type) { case NETLBL_NLTYPE_ADDRSELECT: nla_a = nla_nest_start_noflag(skb, NLBL_MGMT_A_SELECTORLIST); if (nla_a == NULL) return -ENOMEM; netlbl_af4list_foreach_rcu(iter4, &entry->def.addrsel->list4) { struct netlbl_domaddr4_map *map4; struct in_addr addr_struct; nla_b = nla_nest_start_noflag(skb, NLBL_MGMT_A_ADDRSELECTOR); if (nla_b == NULL) return -ENOMEM; addr_struct.s_addr = iter4->addr; ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4ADDR, addr_struct.s_addr); if (ret_val != 0) return ret_val; addr_struct.s_addr = iter4->mask; ret_val = nla_put_in_addr(skb, NLBL_MGMT_A_IPV4MASK, addr_struct.s_addr); if (ret_val != 0) return ret_val; map4 = netlbl_domhsh_addr4_entry(iter4); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, map4->def.type); if (ret_val != 0) return ret_val; switch (map4->def.type) { case NETLBL_NLTYPE_CIPSOV4: ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, map4->def.cipso->doi); if (ret_val != 0) return ret_val; break; } nla_nest_end(skb, nla_b); } #if IS_ENABLED(CONFIG_IPV6) netlbl_af6list_foreach_rcu(iter6, &entry->def.addrsel->list6) { struct netlbl_domaddr6_map *map6; nla_b = nla_nest_start_noflag(skb, NLBL_MGMT_A_ADDRSELECTOR); if (nla_b == NULL) return -ENOMEM; ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6ADDR, &iter6->addr); if (ret_val != 0) return ret_val; ret_val = nla_put_in6_addr(skb, NLBL_MGMT_A_IPV6MASK, &iter6->mask); if (ret_val != 0) return ret_val; map6 = netlbl_domhsh_addr6_entry(iter6); ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, map6->def.type); if (ret_val != 0) return ret_val; switch (map6->def.type) { case NETLBL_NLTYPE_CALIPSO: ret_val = nla_put_u32(skb, NLBL_MGMT_A_CLPDOI, map6->def.calipso->doi); if (ret_val != 0) return ret_val; break; } nla_nest_end(skb, nla_b); } #endif /* IPv6 */ nla_nest_end(skb, nla_a); break; case NETLBL_NLTYPE_UNLABELED: ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->def.type); break; case NETLBL_NLTYPE_CIPSOV4: ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->def.type); if (ret_val != 0) return ret_val; ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, entry->def.cipso->doi); break; case NETLBL_NLTYPE_CALIPSO: ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->def.type); if (ret_val != 0) return ret_val; ret_val = nla_put_u32(skb, NLBL_MGMT_A_CLPDOI, entry->def.calipso->doi); break; } return ret_val; } /* * NetLabel Command Handlers */ /** * netlbl_mgmt_add - Handle an ADD message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated ADD message and add the domains from the message * to the hash table. See netlabel.h for a description of the message format. * Returns zero on success, negative values on failure. * */ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) { struct netlbl_audit audit_info; if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) || (!info->attrs[NLBL_MGMT_A_PROTOCOL]) || (info->attrs[NLBL_MGMT_A_IPV4ADDR] && info->attrs[NLBL_MGMT_A_IPV6ADDR]) || (info->attrs[NLBL_MGMT_A_IPV4MASK] && info->attrs[NLBL_MGMT_A_IPV6MASK]) || ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); return netlbl_mgmt_add_common(info, &audit_info); } /** * netlbl_mgmt_remove - Handle a REMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated REMOVE message and remove the specified domain * mappings. Returns zero on success, negative values on failure. * */ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) { char *domain; struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN]) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); return netlbl_domhsh_remove(domain, AF_UNSPEC, &audit_info); } /** * netlbl_mgmt_listall_cb - netlbl_domhsh_walk() callback for LISTALL * @entry: the domain mapping hash table entry * @arg: the netlbl_domhsh_walk_arg structure * * Description: * This function is designed to be used as a callback to the * netlbl_domhsh_walk() function for use in generating a response for a LISTALL * message. Returns the size of the message on success, negative values on * failure. * */ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) { int ret_val = -ENOMEM; struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) goto listall_cb_failure; ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry); if (ret_val != 0) goto listall_cb_failure; cb_arg->seq++; genlmsg_end(cb_arg->skb, data); return 0; listall_cb_failure: genlmsg_cancel(cb_arg->skb, data); return ret_val; } /** * netlbl_mgmt_listall - Handle a LISTALL message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated LISTALL message and dumps the domain hash table in * a form suitable for use in a kernel generated LISTALL message. Returns zero * on success, negative values on failure. * */ static int netlbl_mgmt_listall(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_domhsh_walk_arg cb_arg; u32 skip_bkt = cb->args[0]; u32 skip_chain = cb->args[1]; cb_arg.nl_cb = cb; cb_arg.skb = skb; cb_arg.seq = cb->nlh->nlmsg_seq; netlbl_domhsh_walk(&skip_bkt, &skip_chain, netlbl_mgmt_listall_cb, &cb_arg); cb->args[0] = skip_bkt; cb->args[1] = skip_chain; return skb->len; } /** * netlbl_mgmt_adddef - Handle an ADDDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated ADDDEF message and respond accordingly. Returns * zero on success, negative values on failure. * */ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) { struct netlbl_audit audit_info; if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) || (info->attrs[NLBL_MGMT_A_IPV4ADDR] && info->attrs[NLBL_MGMT_A_IPV6ADDR]) || (info->attrs[NLBL_MGMT_A_IPV4MASK] && info->attrs[NLBL_MGMT_A_IPV6MASK]) || ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) return -EINVAL; netlbl_netlink_auditinfo(&audit_info); return netlbl_mgmt_add_common(info, &audit_info); } /** * netlbl_mgmt_removedef - Handle a REMOVEDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated REMOVEDEF message and remove the default domain * mapping. Returns zero on success, negative values on failure. * */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { struct netlbl_audit audit_info; netlbl_netlink_auditinfo(&audit_info); return netlbl_domhsh_remove_default(AF_UNSPEC, &audit_info); } /** * netlbl_mgmt_listdef - Handle a LISTDEF message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated LISTDEF message and dumps the default domain * mapping in a form suitable for use in a kernel generated LISTDEF message. * Returns zero on success, negative values on failure. * */ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) { int ret_val = -ENOMEM; struct sk_buff *ans_skb = NULL; void *data; struct netlbl_dom_map *entry; u16 family; family = nla_get_u16_default(info->attrs[NLBL_MGMT_A_FAMILY], AF_INET); ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) return -ENOMEM; data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family, 0, NLBL_MGMT_C_LISTDEF); if (data == NULL) goto listdef_failure; rcu_read_lock(); entry = netlbl_domhsh_getentry(NULL, family); if (entry == NULL) { ret_val = -ENOENT; goto listdef_failure_lock; } ret_val = netlbl_mgmt_listentry(ans_skb, entry); rcu_read_unlock(); if (ret_val != 0) goto listdef_failure; genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); listdef_failure_lock: rcu_read_unlock(); listdef_failure: kfree_skb(ans_skb); return ret_val; } /** * netlbl_mgmt_protocols_cb - Write an individual PROTOCOL message response * @skb: the skb to write to * @cb: the NETLINK callback * @protocol: the NetLabel protocol to use in the message * * Description: * This function is to be used in conjunction with netlbl_mgmt_protocols() to * answer a application's PROTOCOLS message. Returns the size of the message * on success, negative values on failure. * */ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, struct netlink_callback *cb, u32 protocol) { int ret_val = -ENOMEM; void *data; data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_PROTOCOLS); if (data == NULL) goto protocols_cb_failure; ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, protocol); if (ret_val != 0) goto protocols_cb_failure; genlmsg_end(skb, data); return 0; protocols_cb_failure: genlmsg_cancel(skb, data); return ret_val; } /** * netlbl_mgmt_protocols - Handle a PROTOCOLS message * @skb: the NETLINK buffer * @cb: the NETLINK callback * * Description: * Process a user generated PROTOCOLS message and respond accordingly. * */ static int netlbl_mgmt_protocols(struct sk_buff *skb, struct netlink_callback *cb) { u32 protos_sent = cb->args[0]; if (protos_sent == 0) { if (netlbl_mgmt_protocols_cb(skb, cb, NETLBL_NLTYPE_UNLABELED) < 0) goto protocols_return; protos_sent++; } if (protos_sent == 1) { if (netlbl_mgmt_protocols_cb(skb, cb, NETLBL_NLTYPE_CIPSOV4) < 0) goto protocols_return; protos_sent++; } #if IS_ENABLED(CONFIG_IPV6) if (protos_sent == 2) { if (netlbl_mgmt_protocols_cb(skb, cb, NETLBL_NLTYPE_CALIPSO) < 0) goto protocols_return; protos_sent++; } #endif protocols_return: cb->args[0] = protos_sent; return skb->len; } /** * netlbl_mgmt_version - Handle a VERSION message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block * * Description: * Process a user generated VERSION message and respond accordingly. Returns * zero on success, negative values on failure. * */ static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) { int ret_val = -ENOMEM; struct sk_buff *ans_skb = NULL; void *data; ans_skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (ans_skb == NULL) return -ENOMEM; data = genlmsg_put_reply(ans_skb, info, &netlbl_mgmt_gnl_family, 0, NLBL_MGMT_C_VERSION); if (data == NULL) goto version_failure; ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_VERSION, NETLBL_PROTO_VERSION); if (ret_val != 0) goto version_failure; genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); version_failure: kfree_skb(ans_skb); return ret_val; } /* * NetLabel Generic NETLINK Command Definitions */ static const struct genl_small_ops netlbl_mgmt_genl_ops[] = { { .cmd = NLBL_MGMT_C_ADD, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_add, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_remove, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTALL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_mgmt_listall, }, { .cmd = NLBL_MGMT_C_ADDDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_adddef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_REMOVEDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_ADMIN_PERM, .doit = netlbl_mgmt_removedef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_LISTDEF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_mgmt_listdef, .dumpit = NULL, }, { .cmd = NLBL_MGMT_C_PROTOCOLS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = NULL, .dumpit = netlbl_mgmt_protocols, }, { .cmd = NLBL_MGMT_C_VERSION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = 0, .doit = netlbl_mgmt_version, .dumpit = NULL, }, }; static struct genl_family netlbl_mgmt_gnl_family __ro_after_init = { .hdrsize = 0, .name = NETLBL_NLTYPE_MGMT_NAME, .version = NETLBL_PROTO_VERSION, .maxattr = NLBL_MGMT_A_MAX, .policy = netlbl_mgmt_genl_policy, .module = THIS_MODULE, .small_ops = netlbl_mgmt_genl_ops, .n_small_ops = ARRAY_SIZE(netlbl_mgmt_genl_ops), .resv_start_op = NLBL_MGMT_C_VERSION + 1, }; /* * NetLabel Generic NETLINK Protocol Functions */ /** * netlbl_mgmt_genl_init - Register the NetLabel management component * * Description: * Register the NetLabel management component with the Generic NETLINK * mechanism. Returns zero on success, negative values on failure. * */ int __init netlbl_mgmt_genl_init(void) { return genl_register_family(&netlbl_mgmt_gnl_family); }
1892 2029 1884 2029 1339 1 1 17 17 1 185 185 185 184 186 186 10 10 10 10 10 2862 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 /* CPU control. * (C) 2001, 2002, 2003, 2004 Rusty Russell * * This code is licenced under the GPL. */ #include <linux/sched/mm.h> #include <linux/proc_fs.h> #include <linux/smp.h> #include <linux/init.h> #include <linux/notifier.h> #include <linux/sched/signal.h> #include <linux/sched/hotplug.h> #include <linux/sched/isolation.h> #include <linux/sched/task.h> #include <linux/sched/smt.h> #include <linux/unistd.h> #include <linux/cpu.h> #include <linux/oom.h> #include <linux/rcupdate.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/bug.h> #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> #include <linux/gfp.h> #include <linux/suspend.h> #include <linux/lockdep.h> #include <linux/tick.h> #include <linux/irq.h> #include <linux/nmi.h> #include <linux/smpboot.h> #include <linux/relay.h> #include <linux/slab.h> #include <linux/scs.h> #include <linux/percpu-rwsem.h> #include <linux/cpuset.h> #include <linux/random.h> #include <linux/cc_platform.h> #include <trace/events/power.h> #define CREATE_TRACE_POINTS #include <trace/events/cpuhp.h> #include "smpboot.h" /** * struct cpuhp_cpu_state - Per cpu hotplug state storage * @state: The current cpu state * @target: The target state * @fail: Current CPU hotplug callback state * @thread: Pointer to the hotplug thread * @should_run: Thread should execute * @rollback: Perform a rollback * @single: Single callback invocation * @bringup: Single callback bringup or teardown selector * @node: Remote CPU node; for multi-instance, do a * single entry callback for install/remove * @last: For multi-instance rollback, remember how far we got * @cb_state: The state for a single callback (install/uninstall) * @result: Result of the operation * @ap_sync_state: State for AP synchronization * @done_up: Signal completion to the issuer of the task for cpu-up * @done_down: Signal completion to the issuer of the task for cpu-down */ struct cpuhp_cpu_state { enum cpuhp_state state; enum cpuhp_state target; enum cpuhp_state fail; #ifdef CONFIG_SMP struct task_struct *thread; bool should_run; bool rollback; bool single; bool bringup; struct hlist_node *node; struct hlist_node *last; enum cpuhp_state cb_state; int result; atomic_t ap_sync_state; struct completion done_up; struct completion done_down; #endif }; static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { .fail = CPUHP_INVALID, }; #ifdef CONFIG_SMP cpumask_t cpus_booted_once_mask; #endif #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) static struct lockdep_map cpuhp_state_up_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); static inline void cpuhp_lock_acquire(bool bringup) { lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } static inline void cpuhp_lock_release(bool bringup) { lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } #else static inline void cpuhp_lock_acquire(bool bringup) { } static inline void cpuhp_lock_release(bool bringup) { } #endif /** * struct cpuhp_step - Hotplug state machine step * @name: Name of the step * @startup: Startup function of the step * @teardown: Teardown function of the step * @cant_stop: Bringup/teardown can't be stopped at this step * @multi_instance: State has multiple instances which get added afterwards */ struct cpuhp_step { const char *name; union { int (*single)(unsigned int cpu); int (*multi)(unsigned int cpu, struct hlist_node *node); } startup; union { int (*single)(unsigned int cpu); int (*multi)(unsigned int cpu, struct hlist_node *node); } teardown; /* private: */ struct hlist_head list; /* public: */ bool cant_stop; bool multi_instance; }; static DEFINE_MUTEX(cpuhp_state_mutex); static struct cpuhp_step cpuhp_hp_states[]; static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) { return cpuhp_hp_states + state; } static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step) { return bringup ? !step->startup.single : !step->teardown.single; } /** * cpuhp_invoke_callback - Invoke the callbacks for a given state * @cpu: The cpu for which the callback should be invoked * @state: The state to do callbacks for * @bringup: True if the bringup callback should be invoked * @node: For multi-instance, do a single entry callback for install/remove * @lastp: For multi-instance rollback, remember how far we got * * Called from cpu hotplug and from the state register machinery. * * Return: %0 on success or a negative errno code */ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node, struct hlist_node **lastp) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct cpuhp_step *step = cpuhp_get_step(state); int (*cbm)(unsigned int cpu, struct hlist_node *node); int (*cb)(unsigned int cpu); int ret, cnt; if (st->fail == state) { st->fail = CPUHP_INVALID; return -EAGAIN; } if (cpuhp_step_empty(bringup, step)) { WARN_ON_ONCE(1); return 0; } if (!step->multi_instance) { WARN_ON_ONCE(lastp && *lastp); cb = bringup ? step->startup.single : step->teardown.single; trace_cpuhp_enter(cpu, st->target, state, cb); ret = cb(cpu); trace_cpuhp_exit(cpu, st->state, state, ret); return ret; } cbm = bringup ? step->startup.multi : step->teardown.multi; /* Single invocation for instance add/remove */ if (node) { WARN_ON_ONCE(lastp && *lastp); trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); ret = cbm(cpu, node); trace_cpuhp_exit(cpu, st->state, state, ret); return ret; } /* State transition. Invoke on all instances */ cnt = 0; hlist_for_each(node, &step->list) { if (lastp && node == *lastp) break; trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); ret = cbm(cpu, node); trace_cpuhp_exit(cpu, st->state, state, ret); if (ret) { if (!lastp) goto err; *lastp = node; return ret; } cnt++; } if (lastp) *lastp = NULL; return 0; err: /* Rollback the instances if one failed */ cbm = !bringup ? step->startup.multi : step->teardown.multi; if (!cbm) return ret; hlist_for_each(node, &step->list) { if (!cnt--) break; trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); ret = cbm(cpu, node); trace_cpuhp_exit(cpu, st->state, state, ret); /* * Rollback must not fail, */ WARN_ON_ONCE(ret); } return ret; } #ifdef CONFIG_SMP static bool cpuhp_is_ap_state(enum cpuhp_state state) { /* * The extra check for CPUHP_TEARDOWN_CPU is only for documentation * purposes as that state is handled explicitly in cpu_down. */ return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU; } static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup) { struct completion *done = bringup ? &st->done_up : &st->done_down; wait_for_completion(done); } static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup) { struct completion *done = bringup ? &st->done_up : &st->done_down; complete(done); } /* * The former STARTING/DYING states, ran with IRQs disabled and must not fail. */ static bool cpuhp_is_atomic_state(enum cpuhp_state state) { return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; } /* Synchronization state management */ enum cpuhp_sync_state { SYNC_STATE_DEAD, SYNC_STATE_KICKED, SYNC_STATE_SHOULD_DIE, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE, SYNC_STATE_ONLINE, }; #ifdef CONFIG_HOTPLUG_CORE_SYNC /** * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown * @state: The synchronization state to set * * No synchronization point. Just update of the synchronization state, but implies * a full barrier so that the AP changes are visible before the control CPU proceeds. */ static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); (void)atomic_xchg(st, state); } void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); } static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state, enum cpuhp_sync_state next_state) { atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); ktime_t now, end, start = ktime_get(); int sync; end = start + 10ULL * NSEC_PER_SEC; sync = atomic_read(st); while (1) { if (sync == state) { if (!atomic_try_cmpxchg(st, &sync, next_state)) continue; return true; } now = ktime_get(); if (now > end) { /* Timeout. Leave the state unchanged */ return false; } else if (now - start < NSEC_PER_MSEC) { /* Poll for one millisecond */ arch_cpuhp_sync_state_poll(); } else { usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); } sync = atomic_read(st); } return true; } #else /* CONFIG_HOTPLUG_CORE_SYNC */ static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { } #endif /* !CONFIG_HOTPLUG_CORE_SYNC */ #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD /** * cpuhp_ap_report_dead - Update synchronization state to DEAD * * No synchronization point. Just update of the synchronization state. */ void cpuhp_ap_report_dead(void) { cpuhp_ap_update_sync_state(SYNC_STATE_DEAD); } void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } /* * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down * because the AP cannot issue complete() at this stage. */ static void cpuhp_bp_sync_dead(unsigned int cpu) { atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); int sync = atomic_read(st); do { /* CPU can have reported dead already. Don't overwrite that! */ if (sync == SYNC_STATE_DEAD) break; } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE)); if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) { /* CPU reached dead state. Invoke the cleanup function */ arch_cpuhp_cleanup_dead_cpu(cpu); return; } /* No further action possible. Emit message and give up. */ pr_err("CPU%u failed to report dead state\n", cpu); } #else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */ static inline void cpuhp_bp_sync_dead(unsigned int cpu) { } #endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */ #ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL /** * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive * * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits * for the BP to release it. */ void cpuhp_ap_sync_alive(void) { atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE); /* Wait for the control CPU to release it. */ while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE) cpu_relax(); } static bool cpuhp_can_boot_ap(unsigned int cpu) { atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); int sync = atomic_read(st); again: switch (sync) { case SYNC_STATE_DEAD: /* CPU is properly dead */ break; case SYNC_STATE_KICKED: /* CPU did not come up in previous attempt */ break; case SYNC_STATE_ALIVE: /* CPU is stuck cpuhp_ap_sync_alive(). */ break; default: /* CPU failed to report online or dead and is in limbo state. */ return false; } /* Prepare for booting */ if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED)) goto again; return true; } void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { } /* * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up * because the AP cannot issue complete() so early in the bringup. */ static int cpuhp_bp_sync_alive(unsigned int cpu) { int ret = 0; if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL)) return 0; if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) { pr_err("CPU%u failed to report alive state\n", cpu); ret = -EIO; } /* Let the architecture cleanup the kick alive mechanics. */ arch_cpuhp_cleanup_kick_cpu(cpu); return ret; } #else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */ static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; } static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; } #endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */ /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); bool cpuhp_tasks_frozen; EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen); /* * The following two APIs (cpu_maps_update_begin/done) must be used when * attempting to serialize the updates to cpu_online_mask & cpu_present_mask. */ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } void cpu_maps_update_done(void) { mutex_unlock(&cpu_add_remove_lock); } /* * If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock */ static int cpu_hotplug_disabled; #ifdef CONFIG_HOTPLUG_CPU DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); static bool cpu_hotplug_offline_disabled __ro_after_init; void cpus_read_lock(void) { percpu_down_read(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(cpus_read_lock); int cpus_read_trylock(void) { return percpu_down_read_trylock(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(cpus_read_trylock); void cpus_read_unlock(void) { percpu_up_read(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(cpus_read_unlock); void cpus_write_lock(void) { percpu_down_write(&cpu_hotplug_lock); } void cpus_write_unlock(void) { percpu_up_write(&cpu_hotplug_lock); } void lockdep_assert_cpus_held(void) { /* * We can't have hotplug operations before userspace starts running, * and some init codepaths will knowingly not take the hotplug lock. * This is all valid, so mute lockdep until it makes sense to report * unheld locks. */ if (system_state < SYSTEM_RUNNING) return; percpu_rwsem_assert_held(&cpu_hotplug_lock); } #ifdef CONFIG_LOCKDEP int lockdep_is_cpus_held(void) { return percpu_rwsem_is_held(&cpu_hotplug_lock); } #endif static void lockdep_acquire_cpus_lock(void) { rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_); } static void lockdep_release_cpus_lock(void) { rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_); } /* Declare CPU offlining not supported */ void cpu_hotplug_disable_offlining(void) { cpu_maps_update_begin(); cpu_hotplug_offline_disabled = true; cpu_maps_update_done(); } /* * Wait for currently running CPU hotplug operations to complete (if any) and * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the * hotplug path before performing hotplug operations. So acquiring that lock * guarantees mutual exclusion from any currently running hotplug operations. */ void cpu_hotplug_disable(void) { cpu_maps_update_begin(); cpu_hotplug_disabled++; cpu_maps_update_done(); } EXPORT_SYMBOL_GPL(cpu_hotplug_disable); static void __cpu_hotplug_enable(void) { if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n")) return; cpu_hotplug_disabled--; } void cpu_hotplug_enable(void) { cpu_maps_update_begin(); __cpu_hotplug_enable(); cpu_maps_update_done(); } EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #else static void lockdep_acquire_cpus_lock(void) { } static void lockdep_release_cpus_lock(void) { } #endif /* CONFIG_HOTPLUG_CPU */ /* * Architectures that need SMT-specific errata handling during SMT hotplug * should override this. */ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; static unsigned int cpu_smt_max_threads __ro_after_init; unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX; void __init cpu_smt_disable(bool force) { if (!cpu_smt_possible()) return; if (force) { pr_info("SMT: Force disabled\n"); cpu_smt_control = CPU_SMT_FORCE_DISABLED; } else { pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } cpu_smt_num_threads = 1; } /* * The decision whether SMT is supported can only be done after the full * CPU identification. Called from architecture code. */ void __init cpu_smt_set_num_threads(unsigned int num_threads, unsigned int max_threads) { WARN_ON(!num_threads || (num_threads > max_threads)); if (max_threads == 1) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; cpu_smt_max_threads = max_threads; /* * If SMT has been disabled via the kernel command line or SMT is * not supported, set cpu_smt_num_threads to 1 for consistency. * If enabled, take the architecture requested number of threads * to bring up into account. */ if (cpu_smt_control != CPU_SMT_ENABLED) cpu_smt_num_threads = 1; else if (num_threads < cpu_smt_num_threads) cpu_smt_num_threads = num_threads; } static int __init smt_cmdline_disable(char *str) { cpu_smt_disable(str && !strcmp(str, "force")); return 0; } early_param("nosmt", smt_cmdline_disable); /* * For Archicture supporting partial SMT states check if the thread is allowed. * Otherwise this has already been checked through cpu_smt_max_threads when * setting the SMT level. */ static inline bool cpu_smt_thread_allowed(unsigned int cpu) { #ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC return topology_smt_thread_allowed(cpu); #else return true; #endif } static inline bool cpu_bootable(unsigned int cpu) { if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) return true; /* All CPUs are bootable if controls are not configured */ if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED) return true; /* All CPUs are bootable if CPU is not SMT capable */ if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return true; if (topology_is_primary_thread(cpu)) return true; /* * On x86 it's required to boot all logical CPUs at least once so * that the init code can get a chance to set CR4.MCE on each * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any * core will shutdown the machine. */ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask); } /* Returns true if SMT is supported and not forcefully (irreversibly) disabled */ bool cpu_smt_possible(void) { return cpu_smt_control != CPU_SMT_FORCE_DISABLED && cpu_smt_control != CPU_SMT_NOT_SUPPORTED; } EXPORT_SYMBOL_GPL(cpu_smt_possible); #else static inline bool cpu_bootable(unsigned int cpu) { return true; } #endif static inline enum cpuhp_state cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; bool bringup = st->state < target; st->rollback = false; st->last = NULL; st->target = target; st->single = false; st->bringup = bringup; if (cpu_dying(cpu) != !bringup) set_cpu_dying(cpu, !bringup); return prev_state; } static inline void cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) { bool bringup = !st->bringup; st->target = prev_state; /* * Already rolling back. No need invert the bringup value or to change * the current state. */ if (st->rollback) return; st->rollback = true; /* * If we have st->last we need to undo partial multi_instance of this * state first. Otherwise start undo at the previous state. */ if (!st->last) { if (st->bringup) st->state--; else st->state++; } st->bringup = bringup; if (cpu_dying(cpu) != !bringup) set_cpu_dying(cpu, !bringup); } /* Regular hotplug invocation of the AP hotplug thread */ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) { if (!st->single && st->state == st->target) return; st->result = 0; /* * Make sure the above stores are visible before should_run becomes * true. Paired with the mb() above in cpuhp_thread_fun() */ smp_mb(); st->should_run = true; wake_up_process(st->thread); wait_for_ap_thread(st, st->bringup); } static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state; int ret; prev_state = cpuhp_set_state(cpu, st, target); __cpuhp_kick_ap(st); if ((ret = st->result)) { cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } return ret; } static int bringup_wait_for_ap_online(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ wait_for_ap_thread(st, true); if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* * SMT soft disabling on X86 requires to bring the CPU out of the * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The * CPU marked itself as booted_once in notify_cpu_starting() so the * cpu_bootable() check will now return false if this is not the * primary sibling. */ if (!cpu_bootable(cpu)) return -ECANCELED; return 0; } #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP static int cpuhp_kick_ap_alive(unsigned int cpu) { if (!cpuhp_can_boot_ap(cpu)) return -EAGAIN; return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu)); } static int cpuhp_bringup_ap(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int ret; /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. * Prevent irq alloc/free across the bringup. */ irq_lock_sparse(); ret = cpuhp_bp_sync_alive(cpu); if (ret) goto out_unlock; ret = bringup_wait_for_ap_online(cpu); if (ret) goto out_unlock; irq_unlock_sparse(); if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; return cpuhp_kick_ap(cpu, st, st->target); out_unlock: irq_unlock_sparse(); return ret; } #else static int bringup_cpu(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct task_struct *idle = idle_thread_get(cpu); int ret; if (!cpuhp_can_boot_ap(cpu)) return -EAGAIN; /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. * * Prevent irq alloc/free across the bringup by acquiring the * sparse irq lock. Hold it until the upcoming CPU completes the * startup in cpuhp_online_idle() which allows to avoid * intermediate synchronization points in the architecture code. */ irq_lock_sparse(); ret = __cpu_up(cpu, idle); if (ret) goto out_unlock; ret = cpuhp_bp_sync_alive(cpu); if (ret) goto out_unlock; ret = bringup_wait_for_ap_online(cpu); if (ret) goto out_unlock; irq_unlock_sparse(); if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; return cpuhp_kick_ap(cpu, st, st->target); out_unlock: irq_unlock_sparse(); return ret; } #endif static int finish_cpu(unsigned int cpu) { struct task_struct *idle = idle_thread_get(cpu); struct mm_struct *mm = idle->active_mm; /* * idle_task_exit() will have switched to &init_mm, now * clean up any remaining active_mm state. */ if (mm != &init_mm) idle->active_mm = &init_mm; mmdrop_lazy_tlb(mm); return 0; } /* * Hotplug state machine related functions */ /* * Get the next state to run. Empty ones will be skipped. Returns true if a * state must be run. * * st->state will be modified ahead of time, to match state_to_run, as if it * has already ran. */ static bool cpuhp_next_state(bool bringup, enum cpuhp_state *state_to_run, struct cpuhp_cpu_state *st, enum cpuhp_state target) { do { if (bringup) { if (st->state >= target) return false; *state_to_run = ++st->state; } else { if (st->state <= target) return false; *state_to_run = st->state--; } if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run))) break; } while (true); return true; } static int __cpuhp_invoke_callback_range(bool bringup, unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target, bool nofail) { enum cpuhp_state state; int ret = 0; while (cpuhp_next_state(bringup, &state, st, target)) { int err; err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL); if (!err) continue; if (nofail) { pr_warn("CPU %u %s state %s (%d) failed (%d)\n", cpu, bringup ? "UP" : "DOWN", cpuhp_get_step(st->state)->name, st->state, err); ret = -1; } else { ret = err; break; } } return ret; } static inline int cpuhp_invoke_callback_range(bool bringup, unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false); } static inline void cpuhp_invoke_callback_range_nofail(bool bringup, unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { __cpuhp_invoke_callback_range(bringup, cpu, st, target, true); } static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) { if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) return true; /* * When CPU hotplug is disabled, then taking the CPU down is not * possible because takedown_cpu() and the architecture and * subsystem specific mechanisms are not available. So the CPU * which would be completely unplugged again needs to stay around * in the current state. */ return st->state <= CPUHP_BRINGUP_CPU; } static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; int ret = 0; ret = cpuhp_invoke_callback_range(true, cpu, st, target); if (ret) { pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n", ret, cpu, cpuhp_get_step(st->state)->name, st->state); cpuhp_reset_state(cpu, st, prev_state); if (can_rollback_cpu(st)) WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, prev_state)); } return ret; } /* * The cpu hotplug threads manage the bringup and teardown of the cpus */ static int cpuhp_should_run(unsigned int cpu) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); return st->should_run; } /* * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke * callbacks when a state gets [un]installed at runtime. * * Each invocation of this function by the smpboot thread does a single AP * state callback. * * It has 3 modes of operation: * - single: runs st->cb_state * - up: runs ++st->state, while st->state < st->target * - down: runs st->state--, while st->state > st->target * * When complete or on error, should_run is cleared and the completion is fired. */ static void cpuhp_thread_fun(unsigned int cpu) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); bool bringup = st->bringup; enum cpuhp_state state; if (WARN_ON_ONCE(!st->should_run)) return; /* * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures * that if we see ->should_run we also see the rest of the state. */ smp_mb(); /* * The BP holds the hotplug lock, but we're now running on the AP, * ensure that anybody asserting the lock is held, will actually find * it so. */ lockdep_acquire_cpus_lock(); cpuhp_lock_acquire(bringup); if (st->single) { state = st->cb_state; st->should_run = false; } else { st->should_run = cpuhp_next_state(bringup, &state, st, st->target); if (!st->should_run) goto end; } WARN_ON_ONCE(!cpuhp_is_ap_state(state)); if (cpuhp_is_atomic_state(state)) { local_irq_disable(); st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); local_irq_enable(); /* * STARTING/DYING must not fail! */ WARN_ON_ONCE(st->result); } else { st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); } if (st->result) { /* * If we fail on a rollback, we're up a creek without no * paddle, no way forward, no way back. We loose, thanks for * playing. */ WARN_ON_ONCE(st->rollback); st->should_run = false; } end: cpuhp_lock_release(bringup); lockdep_release_cpus_lock(); if (!st->should_run) complete_ap_thread(st, bringup); } /* Invoke a single callback on a remote cpu */ static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int ret; if (!cpu_online(cpu)) return 0; cpuhp_lock_acquire(false); cpuhp_lock_release(false); cpuhp_lock_acquire(true); cpuhp_lock_release(true); /* * If we are up and running, use the hotplug thread. For early calls * we invoke the thread function directly. */ if (!st->thread) return cpuhp_invoke_callback(cpu, state, bringup, node, NULL); st->rollback = false; st->last = NULL; st->node = node; st->bringup = bringup; st->cb_state = state; st->single = true; __cpuhp_kick_ap(st); /* * If we failed and did a partial, do a rollback. */ if ((ret = st->result) && st->last) { st->rollback = true; st->bringup = !bringup; __cpuhp_kick_ap(st); } /* * Clean up the leftovers so the next hotplug operation wont use stale * data. */ st->node = st->last = NULL; return ret; } static int cpuhp_kick_ap_work(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state prev_state = st->state; int ret; cpuhp_lock_acquire(false); cpuhp_lock_release(false); cpuhp_lock_acquire(true); cpuhp_lock_release(true); trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); ret = cpuhp_kick_ap(cpu, st, st->target); trace_cpuhp_exit(cpu, st->state, prev_state, ret); return ret; } static struct smp_hotplug_thread cpuhp_threads = { .store = &cpuhp_state.thread, .thread_should_run = cpuhp_should_run, .thread_fn = cpuhp_thread_fun, .thread_comm = "cpuhp/%u", .selfparking = true, }; static __init void cpuhp_init_state(void) { struct cpuhp_cpu_state *st; int cpu; for_each_possible_cpu(cpu) { st = per_cpu_ptr(&cpuhp_state, cpu); init_completion(&st->done_up); init_completion(&st->done_down); } } void __init cpuhp_threads_init(void) { cpuhp_init_state(); BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads)); kthread_unpark(this_cpu_read(cpuhp_state.thread)); } #ifdef CONFIG_HOTPLUG_CPU #ifndef arch_clear_mm_cpumask_cpu #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm)) #endif /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id * * This function walks all processes, finds a valid mm struct for each one and * then clears a corresponding bit in mm's cpumask. While this all sounds * trivial, there are various non-obvious corner cases, which this function * tries to solve in a safe manner. * * Also note that the function uses a somewhat relaxed locking scheme, so it may * be called only for an already offlined CPU. */ void clear_tasks_mm_cpumask(int cpu) { struct task_struct *p; /* * This function is called after the cpu is taken down and marked * offline, so its not like new tasks will ever get this cpu set in * their mm mask. -- Peter Zijlstra * Thus, we may use rcu_read_lock() here, instead of grabbing * full-fledged tasklist_lock. */ WARN_ON(cpu_online(cpu)); rcu_read_lock(); for_each_process(p) { struct task_struct *t; /* * Main thread might exit, but other threads may still have * a valid mm. Find one. */ t = find_lock_task_mm(p); if (!t) continue; arch_clear_mm_cpumask_cpu(cpu, t->mm); task_unlock(t); } rcu_read_unlock(); } /* Take this CPU down. */ static int take_cpu_down(void *_param) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); int err, cpu = smp_processor_id(); /* Ensure this CPU doesn't handle any more interrupts. */ err = __cpu_disable(); if (err < 0) return err; /* * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going * down, that the current state is CPUHP_TEARDOWN_CPU - 1. */ WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1)); /* * Invoke the former CPU_DYING callbacks. DYING must not fail! */ cpuhp_invoke_callback_range_nofail(false, cpu, st, target); /* Park the stopper thread */ stop_machine_park(cpu); return 0; } static int takedown_cpu(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int err; /* Park the smpboot threads */ kthread_park(st->thread); /* * Prevent irq alloc/free while the dying cpu reorganizes the * interrupt affinities. */ irq_lock_sparse(); /* * So now all preempt/rcu users must observe !cpu_active(). */ err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); if (err) { /* CPU refused to die */ irq_unlock_sparse(); /* Unpark the hotplug thread so we can rollback there */ kthread_unpark(st->thread); return err; } BUG_ON(cpu_online(cpu)); /* * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed * all runnable tasks from the CPU, there's only the idle task left now * that the migration thread is done doing the stop_machine thing. * * Wait for the stop thread to go away. */ wait_for_ap_thread(st, false); BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); /* Interrupts are moved away from the dying cpu, reenable alloc/free */ irq_unlock_sparse(); hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); cpuhp_bp_sync_dead(cpu); lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu)); /* * Callbacks must be re-integrated right away to the RCU state machine. * Otherwise an RCU callback could block a further teardown function * waiting for its completion. */ rcutree_migrate_callbacks(cpu); return 0; } static void cpuhp_complete_idle_dead(void *arg) { struct cpuhp_cpu_state *st = arg; complete_ap_thread(st, false); } void cpuhp_report_idle_dead(void) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); tick_assert_timekeeping_handover(); rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* * We cannot call complete after rcutree_report_cpu_dead() so we delegate it * to an online cpu. */ smp_call_function_single(cpumask_first(cpu_online_mask), cpuhp_complete_idle_dead, st, 0); } static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; int ret = 0; ret = cpuhp_invoke_callback_range(false, cpu, st, target); if (ret) { pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n", ret, cpu, cpuhp_get_step(st->state)->name, st->state); cpuhp_reset_state(cpu, st, prev_state); if (st->state < prev_state) WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, prev_state)); } return ret; } /* Requires cpu_add_remove_lock to be held */ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int prev_state, ret = 0; if (num_online_cpus() == 1) return -EBUSY; if (!cpu_present(cpu)) return -EINVAL; cpus_write_lock(); cpuhp_tasks_frozen = tasks_frozen; prev_state = cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread. */ if (st->state > CPUHP_TEARDOWN_CPU) { st->target = max((int)target, CPUHP_TEARDOWN_CPU); ret = cpuhp_kick_ap_work(cpu); /* * The AP side has done the error rollback already. Just * return the error code.. */ if (ret) goto out; /* * We might have stopped still in the range of the AP hotplug * thread. Nothing to do anymore. */ if (st->state > CPUHP_TEARDOWN_CPU) goto out; st->target = target; } /* * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); if (ret && st->state < prev_state) { if (st->state == CPUHP_TEARDOWN_CPU) { cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } else { WARN(1, "DEAD callback error for CPU%d", cpu); } } out: cpus_write_unlock(); /* * Do post unplug cleanup. This is still protected against * concurrent CPU hotplug via cpu_add_remove_lock. */ lockup_detector_cleanup(); arch_smt_update(); return ret; } struct cpu_down_work { unsigned int cpu; enum cpuhp_state target; }; static long __cpu_down_maps_locked(void *arg) { struct cpu_down_work *work = arg; return _cpu_down(work->cpu, 0, work->target); } static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) { struct cpu_down_work work = { .cpu = cpu, .target = target, }; /* * If the platform does not support hotplug, report it explicitly to * differentiate it from a transient offlining failure. */ if (cpu_hotplug_offline_disabled) return -EOPNOTSUPP; if (cpu_hotplug_disabled) return -EBUSY; /* * Ensure that the control task does not run on the to be offlined * CPU to prevent a deadlock against cfs_b->period_timer. * Also keep at least one housekeeping cpu onlined to avoid generating * an empty sched_domain span. */ for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) { if (cpu != work.cpu) return work_on_cpu(cpu, __cpu_down_maps_locked, &work); } return -EBUSY; } static int cpu_down(unsigned int cpu, enum cpuhp_state target) { int err; cpu_maps_update_begin(); err = cpu_down_maps_locked(cpu, target); cpu_maps_update_done(); return err; } /** * cpu_device_down - Bring down a cpu device * @dev: Pointer to the cpu device to offline * * This function is meant to be used by device core cpu subsystem only. * * Other subsystems should use remove_cpu() instead. * * Return: %0 on success or a negative errno code */ int cpu_device_down(struct device *dev) { return cpu_down(dev->id, CPUHP_OFFLINE); } int remove_cpu(unsigned int cpu) { int ret; lock_device_hotplug(); ret = device_offline(get_cpu_device(cpu)); unlock_device_hotplug(); return ret; } EXPORT_SYMBOL_GPL(remove_cpu); void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { unsigned int cpu; int error; cpu_maps_update_begin(); /* * Make certain the cpu I'm about to reboot on is online. * * This is inline to what migrate_to_reboot_cpu() already do. */ if (!cpu_online(primary_cpu)) primary_cpu = cpumask_first(cpu_online_mask); for_each_online_cpu(cpu) { if (cpu == primary_cpu) continue; error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); if (error) { pr_err("Failed to offline CPU%d - error=%d", cpu, error); break; } } /* * Ensure all but the reboot CPU are offline. */ BUG_ON(num_online_cpus() > 1); /* * Make sure the CPUs won't be enabled by someone else after this * point. Kexec will reboot to a new kernel shortly resetting * everything along the way. */ cpu_hotplug_disabled++; cpu_maps_update_done(); } #else #define takedown_cpu NULL #endif /*CONFIG_HOTPLUG_CPU*/ /** * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU * @cpu: cpu that just started * * It must be called by the arch code on the new cpu, before the new cpu * enables interrupts and before the "boot" cpu returns from __cpu_up(). */ void notify_cpu_starting(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); rcutree_report_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ cpumask_set_cpu(cpu, &cpus_booted_once_mask); /* * STARTING must not fail! */ cpuhp_invoke_callback_range_nofail(true, cpu, st, target); } /* * Called from the idle task. Wake up the controlling task which brings the * hotplug thread of the upcoming CPU up and then delegates the rest of the * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); /* Happens for the boot cpu */ if (state != CPUHP_AP_ONLINE_IDLE) return; cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE); /* * Unpark the stopper thread before we start the idle loop (and start * scheduling); this ensures the stopper task is always available. */ stop_machine_unpark(smp_processor_id()); st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } /* Requires cpu_add_remove_lock to be held */ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct task_struct *idle; int ret = 0; cpus_write_lock(); if (!cpu_present(cpu)) { ret = -EINVAL; goto out; } /* * The caller of cpu_up() might have raced with another * caller. Nothing to do. */ if (st->state >= target) goto out; if (st->state == CPUHP_OFFLINE) { /* Let it fail before we try to bring the cpu up */ idle = idle_thread_get(cpu); if (IS_ERR(idle)) { ret = PTR_ERR(idle); goto out; } /* * Reset stale stack state from the last time this CPU was online. */ scs_task_reset(idle); kasan_unpoison_task_stack(idle); } cpuhp_tasks_frozen = tasks_frozen; cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread once more. */ if (st->state > CPUHP_BRINGUP_CPU) { ret = cpuhp_kick_ap_work(cpu); /* * The AP side has done the error rollback already. Just * return the error code.. */ if (ret) goto out; } /* * Try to reach the target state. We max out on the BP at * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is * responsible for bringing it up to the target state. */ target = min((int)target, CPUHP_BRINGUP_CPU); ret = cpuhp_up_callbacks(cpu, st, target); out: cpus_write_unlock(); arch_smt_update(); return ret; } static int cpu_up(unsigned int cpu, enum cpuhp_state target) { int err = 0; if (!cpu_possible(cpu)) { pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu); return -EINVAL; } err = try_online_node(cpu_to_node(cpu)); if (err) return err; cpu_maps_update_begin(); if (cpu_hotplug_disabled) { err = -EBUSY; goto out; } if (!cpu_bootable(cpu)) { err = -EPERM; goto out; } err = _cpu_up(cpu, 0, target); out: cpu_maps_update_done(); return err; } /** * cpu_device_up - Bring up a cpu device * @dev: Pointer to the cpu device to online * * This function is meant to be used by device core cpu subsystem only. * * Other subsystems should use add_cpu() instead. * * Return: %0 on success or a negative errno code */ int cpu_device_up(struct device *dev) { return cpu_up(dev->id, CPUHP_ONLINE); } int add_cpu(unsigned int cpu) { int ret; lock_device_hotplug(); ret = device_online(get_cpu_device(cpu)); unlock_device_hotplug(); return ret; } EXPORT_SYMBOL_GPL(add_cpu); /** * bringup_hibernate_cpu - Bring up the CPU that we hibernated on * @sleep_cpu: The cpu we hibernated on and should be brought up. * * On some architectures like arm64, we can hibernate on any CPU, but on * wake up the CPU we hibernated on might be offline as a side effect of * using maxcpus= for example. * * Return: %0 on success or a negative errno code */ int bringup_hibernate_cpu(unsigned int sleep_cpu) { int ret; if (!cpu_online(sleep_cpu)) { pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n"); ret = cpu_up(sleep_cpu, CPUHP_ONLINE); if (ret) { pr_err("Failed to bring hibernate-CPU up!\n"); return ret; } } return 0; } static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus, enum cpuhp_state target) { unsigned int cpu; for_each_cpu(cpu, mask) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); if (cpu_up(cpu, target) && can_rollback_cpu(st)) { /* * If this failed then cpu_up() might have only * rolled back to CPUHP_BP_KICK_AP for the final * online. Clean it up. NOOP if already rolled back. */ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE)); } if (!--ncpus) break; } } #ifdef CONFIG_HOTPLUG_PARALLEL static bool __cpuhp_parallel_bringup __ro_after_init = true; static int __init parallel_bringup_parse_param(char *arg) { return kstrtobool(arg, &__cpuhp_parallel_bringup); } early_param("cpuhp.parallel", parallel_bringup_parse_param); #ifdef CONFIG_HOTPLUG_SMT static inline bool cpuhp_smt_aware(void) { return cpu_smt_max_threads > 1; } static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) { return cpu_primary_thread_mask; } #else static inline bool cpuhp_smt_aware(void) { return false; } static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) { return cpu_none_mask; } #endif bool __weak arch_cpuhp_init_parallel_bringup(void) { return true; } /* * On architectures which have enabled parallel bringup this invokes all BP * prepare states for each of the to be onlined APs first. The last state * sends the startup IPI to the APs. The APs proceed through the low level * bringup code in parallel and then wait for the control CPU to release * them one by one for the final onlining procedure. * * This avoids waiting for each AP to respond to the startup IPI in * CPUHP_BRINGUP_CPU. */ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus) { const struct cpumask *mask = cpu_present_mask; if (__cpuhp_parallel_bringup) __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup(); if (!__cpuhp_parallel_bringup) return false; if (cpuhp_smt_aware()) { const struct cpumask *pmask = cpuhp_get_primary_thread_mask(); static struct cpumask tmp_mask __initdata; /* * X86 requires to prevent that SMT siblings stopped while * the primary thread does a microcode update for various * reasons. Bring the primary threads up first. */ cpumask_and(&tmp_mask, mask, pmask); cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP); cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE); /* Account for the online CPUs */ ncpus -= num_online_cpus(); if (!ncpus) return true; /* Create the mask for secondary CPUs */ cpumask_andnot(&tmp_mask, mask, pmask); mask = &tmp_mask; } /* Bring the not-yet started CPUs up */ cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP); cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE); return true; } #else static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; } #endif /* CONFIG_HOTPLUG_PARALLEL */ void __init bringup_nonboot_cpus(unsigned int max_cpus) { if (!max_cpus) return; /* Try parallel bringup optimization if enabled */ if (cpuhp_bringup_cpus_parallel(max_cpus)) return; /* Full per CPU serialized bringup */ cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE); } #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; int freeze_secondary_cpus(int primary) { int cpu, error = 0; cpu_maps_update_begin(); if (primary == -1) { primary = cpumask_first(cpu_online_mask); if (!housekeeping_cpu(primary, HK_TYPE_TIMER)) primary = housekeeping_any_cpu(HK_TYPE_TIMER); } else { if (!cpu_online(primary)) primary = cpumask_first(cpu_online_mask); } /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); pr_info("Disabling non-boot CPUs ...\n"); for (cpu = nr_cpu_ids - 1; cpu >= 0; cpu--) { if (!cpu_online(cpu) || cpu == primary) continue; if (pm_wakeup_pending()) { pr_info("Wakeup pending. Abort CPU freeze\n"); error = -EBUSY; break; } trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1, CPUHP_OFFLINE); trace_suspend_resume(TPS("CPU_OFF"), cpu, false); if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { pr_err("Error taking CPU%d down: %d\n", cpu, error); break; } } if (!error) BUG_ON(num_online_cpus() > 1); else pr_err("Non-boot CPUs are not disabled\n"); /* * Make sure the CPUs won't be enabled by someone else. We need to do * this even in case of failure as all freeze_secondary_cpus() users are * supposed to do thaw_secondary_cpus() on the failure path. */ cpu_hotplug_disabled++; cpu_maps_update_done(); return error; } void __weak arch_thaw_secondary_cpus_begin(void) { } void __weak arch_thaw_secondary_cpus_end(void) { } void thaw_secondary_cpus(void) { int cpu, error; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); __cpu_hotplug_enable(); if (cpumask_empty(frozen_cpus)) goto out; pr_info("Enabling non-boot CPUs ...\n"); arch_thaw_secondary_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { trace_suspend_resume(TPS("CPU_ON"), cpu, true); error = _cpu_up(cpu, 1, CPUHP_ONLINE); trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); continue; } pr_warn("Error taking CPU%d up: %d\n", cpu, error); } arch_thaw_secondary_cpus_end(); cpumask_clear(frozen_cpus); out: cpu_maps_update_done(); } static int __init alloc_frozen_cpus(void) { if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) return -ENOMEM; return 0; } core_initcall(alloc_frozen_cpus); /* * When callbacks for CPU hotplug notifications are being executed, we must * ensure that the state of the system with respect to the tasks being frozen * or not, as reported by the notification, remains unchanged *throughout the * duration* of the execution of the callbacks. * Hence we need to prevent the freezer from racing with regular CPU hotplug. * * This synchronization is implemented by mutually excluding regular CPU * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ * Hibernate notifications. */ static int cpu_hotplug_pm_callback(struct notifier_block *nb, unsigned long action, void *ptr) { switch (action) { case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: cpu_hotplug_disable(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: cpu_hotplug_enable(); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static int __init cpu_hotplug_pm_sync_init(void) { /* * cpu_hotplug_pm_callback has higher priority than x86 * bsp_pm_callback which depends on cpu_hotplug_pm_callback * to disable cpu hotplug to avoid cpu hotplug race. */ pm_notifier(cpu_hotplug_pm_callback, 0); return 0; } core_initcall(cpu_hotplug_pm_sync_init); #endif /* CONFIG_PM_SLEEP_SMP */ int __boot_cpu_id; #endif /* CONFIG_SMP */ /* Boot processor state steps */ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_OFFLINE] = { .name = "offline", .startup.single = NULL, .teardown.single = NULL, }, #ifdef CONFIG_SMP [CPUHP_CREATE_THREADS]= { .name = "threads:prepare", .startup.single = smpboot_create_threads, .teardown.single = NULL, .cant_stop = true, }, [CPUHP_PERF_PREPARE] = { .name = "perf:prepare", .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, [CPUHP_RANDOM_PREPARE] = { .name = "random:prepare", .startup.single = random_prepare_cpu, .teardown.single = NULL, }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, .teardown.single = NULL, }, [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", .startup.single = smpcfd_prepare_cpu, .teardown.single = smpcfd_dead_cpu, }, [CPUHP_RELAY_PREPARE] = { .name = "relay:prepare", .startup.single = relay_prepare_cpu, .teardown.single = NULL, }, [CPUHP_RCUTREE_PREP] = { .name = "RCU/tree:prepare", .startup.single = rcutree_prepare_cpu, .teardown.single = rcutree_dead_cpu, }, /* * On the tear-down path, timers_dead_cpu() must be invoked * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ [CPUHP_TIMERS_PREPARE] = { .name = "timers:prepare", .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP /* * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until * the next step will release it. */ [CPUHP_BP_KICK_AP] = { .name = "cpu:kick_ap", .startup.single = cpuhp_kick_ap_alive, }, /* * Waits for the AP to reach cpuhp_ap_sync_alive() and then * releases it for the complete bringup. */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", .startup.single = cpuhp_bringup_ap, .teardown.single = finish_cpu, .cant_stop = true, }, #else /* * All-in-one CPU bringup state which includes the kick alive. */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", .startup.single = bringup_cpu, .teardown.single = finish_cpu, .cant_stop = true, }, #endif /* Final state before CPU kills itself */ [CPUHP_AP_IDLE_DEAD] = { .name = "idle:dead", }, /* * Last state before CPU enters the idle loop to die. Transient state * for synchronization. */ [CPUHP_AP_OFFLINE] = { .name = "ap:offline", .cant_stop = true, }, /* First state is scheduler control. Interrupts are disabled */ [CPUHP_AP_SCHED_STARTING] = { .name = "sched:starting", .startup.single = sched_cpu_starting, .teardown.single = sched_cpu_dying, }, [CPUHP_AP_RCUTREE_DYING] = { .name = "RCU/tree:dying", .startup.single = NULL, .teardown.single = rcutree_dying_cpu, }, [CPUHP_AP_SMPCFD_DYING] = { .name = "smpcfd:dying", .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, [CPUHP_AP_HRTIMERS_DYING] = { .name = "hrtimers:dying", .startup.single = NULL, .teardown.single = hrtimers_cpu_dying, }, [CPUHP_AP_TICK_DYING] = { .name = "tick:dying", .startup.single = NULL, .teardown.single = tick_cpu_dying, }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { .name = "ap:online", }, /* * Handled on control processor until the plugged processor manages * this itself. */ [CPUHP_TEARDOWN_CPU] = { .name = "cpu:teardown", .startup.single = NULL, .teardown.single = takedown_cpu, .cant_stop = true, }, [CPUHP_AP_SCHED_WAIT_EMPTY] = { .name = "sched:waitempty", .startup.single = NULL, .teardown.single = sched_cpu_wait_empty, }, /* Handle smpboot threads park/unpark */ [CPUHP_AP_SMPBOOT_THREADS] = { .name = "smpboot/threads:online", .startup.single = smpboot_unpark_threads, .teardown.single = smpboot_park_threads, }, [CPUHP_AP_IRQ_AFFINITY_ONLINE] = { .name = "irq/affinity:online", .startup.single = irq_affinity_online_cpu, .teardown.single = NULL, }, [CPUHP_AP_PERF_ONLINE] = { .name = "perf:online", .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, [CPUHP_AP_WATCHDOG_ONLINE] = { .name = "lockup_detector:online", .startup.single = lockup_detector_online_cpu, .teardown.single = lockup_detector_offline_cpu, }, [CPUHP_AP_WORKQUEUE_ONLINE] = { .name = "workqueue:online", .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, [CPUHP_AP_RANDOM_ONLINE] = { .name = "random:online", .startup.single = random_online_cpu, .teardown.single = NULL, }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, .teardown.single = rcutree_offline_cpu, }, #endif /* * The dynamically registered state space is here */ #ifdef CONFIG_SMP /* Last state is scheduler control setting the cpu active */ [CPUHP_AP_ACTIVE] = { .name = "sched:active", .startup.single = sched_cpu_activate, .teardown.single = sched_cpu_deactivate, }, #endif /* CPU is fully up and running. */ [CPUHP_ONLINE] = { .name = "online", .startup.single = NULL, .teardown.single = NULL, }, }; /* Sanity check for callbacks */ static int cpuhp_cb_check(enum cpuhp_state state) { if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE) return -EINVAL; return 0; } /* * Returns a free for dynamic slot assignment of the Online state. The states * are protected by the cpuhp_slot_states mutex and an empty slot is identified * by having no name assigned. */ static int cpuhp_reserve_state(enum cpuhp_state state) { enum cpuhp_state i, end; struct cpuhp_step *step; switch (state) { case CPUHP_AP_ONLINE_DYN: step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN; end = CPUHP_AP_ONLINE_DYN_END; break; case CPUHP_BP_PREPARE_DYN: step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN; end = CPUHP_BP_PREPARE_DYN_END; break; default: return -EINVAL; } for (i = state; i <= end; i++, step++) { if (!step->name) return i; } WARN(1, "No more dynamic states available for CPU hotplug\n"); return -ENOSPC; } static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance) { /* (Un)Install the callbacks for further cpu hotplug operations */ struct cpuhp_step *sp; int ret = 0; /* * If name is NULL, then the state gets removed. * * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on * the first allocation from these dynamic ranges, so the removal * would trigger a new allocation and clear the wrong (already * empty) state, leaving the callbacks of the to be cleared state * dangling, which causes wreckage on the next hotplug operation. */ if (name && (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN)) { ret = cpuhp_reserve_state(state); if (ret < 0) return ret; state = ret; } sp = cpuhp_get_step(state); if (name && sp->name) return -EBUSY; sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); return ret; } static void *cpuhp_get_teardown_cb(enum cpuhp_state state) { return cpuhp_get_step(state)->teardown.single; } /* * Call the startup/teardown function for a step either on the AP or * on the current CPU. */ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node) { struct cpuhp_step *sp = cpuhp_get_step(state); int ret; /* * If there's nothing to do, we done. * Relies on the union for multi_instance. */ if (cpuhp_step_empty(bringup, sp)) return 0; /* * The non AP bound callbacks can fail on bringup. On teardown * e.g. module removal we crash for now. */ #ifdef CONFIG_SMP if (cpuhp_is_ap_state(state)) ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); else ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); #else ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); #endif BUG_ON(ret && !bringup); return ret; } /* * Called from __cpuhp_setup_state on a recoverable failure. * * Note: The teardown callbacks for rollback are not allowed to fail! */ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, struct hlist_node *node) { int cpu; /* Roll back the already executed steps on the other cpus */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpu >= failedcpu) break; /* Did we invoke the startup call on that cpu ? */ if (cpustate >= state) cpuhp_issue_call(cpu, state, false, node); } } int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, struct hlist_node *node, bool invoke) { struct cpuhp_step *sp; int cpu; int ret; lockdep_assert_cpus_held(); sp = cpuhp_get_step(state); if (sp->multi_instance == false) return -EINVAL; mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; /* * Try to call the startup callback for each present cpu * depending on the hotplug state of the cpu. */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate < state) continue; ret = cpuhp_issue_call(cpu, state, true, node); if (ret) { if (sp->teardown.multi) cpuhp_rollback_install(cpu, state, node); goto unlock; } } add_node: ret = 0; hlist_add_head(node, &sp->list); unlock: mutex_unlock(&cpuhp_state_mutex); return ret; } int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { int ret; cpus_read_lock(); ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke); cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); /** * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state * @state: The state to setup * @name: Name of the step * @invoke: If true, the startup function is invoked for cpus where * cpu state >= @state * @startup: startup callback function * @teardown: teardown callback function * @multi_instance: State is set up for multiple instances which get * added afterwards. * * The caller needs to hold cpus read locked while calling this function. * Return: * On success: * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN; * 0 for all other states * On failure: proper (negative) error code */ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance) { int cpu, ret = 0; bool dynstate; lockdep_assert_cpus_held(); if (cpuhp_cb_check(state) || !name) return -EINVAL; mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN; if (ret > 0 && dynstate) { state = ret; ret = 0; } if (ret || !invoke || !startup) goto out; /* * Try to call the startup callback for each present cpu * depending on the hotplug state of the cpu. */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate < state) continue; ret = cpuhp_issue_call(cpu, state, true, NULL); if (ret) { if (teardown) cpuhp_rollback_install(cpu, state, NULL); cpuhp_store_callbacks(state, NULL, NULL, NULL, false); goto out; } } out: mutex_unlock(&cpuhp_state_mutex); /* * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN, * return the dynamically allocated state in case of success. */ if (!ret && dynstate) return state; return ret; } EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked); int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance) { int ret; cpus_read_lock(); ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup, teardown, multi_instance); cpus_read_unlock(); return ret; } EXPORT_SYMBOL(__cpuhp_setup_state); int __cpuhp_state_remove_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { struct cpuhp_step *sp = cpuhp_get_step(state); int cpu; BUG_ON(cpuhp_cb_check(state)); if (!sp->multi_instance) return -EINVAL; cpus_read_lock(); mutex_lock(&cpuhp_state_mutex); if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* * Call the teardown callback for each present cpu depending * on the hotplug state of the cpu. This function is not * allowed to fail currently! */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate >= state) cpuhp_issue_call(cpu, state, false, node); } remove: hlist_del(node); mutex_unlock(&cpuhp_state_mutex); cpus_read_unlock(); return 0; } EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance); /** * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state * @state: The state to remove * @invoke: If true, the teardown function is invoked for cpus where * cpu state >= @state * * The caller needs to hold cpus read locked while calling this function. * The teardown callback is currently not allowed to fail. Think * about module removal! */ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke) { struct cpuhp_step *sp = cpuhp_get_step(state); int cpu; BUG_ON(cpuhp_cb_check(state)); lockdep_assert_cpus_held(); mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", state); goto remove; } if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* * Call the teardown callback for each present cpu depending * on the hotplug state of the cpu. This function is not * allowed to fail currently! */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate >= state) cpuhp_issue_call(cpu, state, false, NULL); } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); mutex_unlock(&cpuhp_state_mutex); } EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked); void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) { cpus_read_lock(); __cpuhp_remove_state_cpuslocked(state, invoke); cpus_read_unlock(); } EXPORT_SYMBOL(__cpuhp_remove_state); #ifdef CONFIG_HOTPLUG_SMT static void cpuhp_offline_cpu_device(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); dev->offline = true; /* Tell user space about the state change */ kobject_uevent(&dev->kobj, KOBJ_OFFLINE); } static void cpuhp_online_cpu_device(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); dev->offline = false; /* Tell user space about the state change */ kobject_uevent(&dev->kobj, KOBJ_ONLINE); } int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; cpu_maps_update_begin(); for_each_online_cpu(cpu) { if (topology_is_primary_thread(cpu)) continue; /* * Disable can be called with CPU_SMT_ENABLED when changing * from a higher to lower number of SMT threads per core. */ if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) continue; ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); if (ret) break; /* * As this needs to hold the cpu maps lock it's impossible * to call device_offline() because that ends up calling * cpu_down() which takes cpu maps lock. cpu maps lock * needs to be held as this might race against in kernel * abusers of the hotplug machinery (thermal management). * * So nothing would update device:offline state. That would * leave the sysfs entry stale and prevent onlining after * smt control has been changed to 'off' again. This is * called under the sysfs hotplug lock, so it is properly * serialized against the regular offline usage. */ cpuhp_offline_cpu_device(cpu); } if (!ret) cpu_smt_control = ctrlval; cpu_maps_update_done(); return ret; } /* Check if the core a CPU belongs to is online */ #if !defined(topology_is_core_online) static inline bool topology_is_core_online(unsigned int cpu) { return true; } #endif int cpuhp_smt_enable(void) { int cpu, ret = 0; cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) break; /* See comment in cpuhp_smt_disable() */ cpuhp_online_cpu_device(cpu); } cpu_maps_update_done(); return ret; } #endif #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU) static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->state); } static DEVICE_ATTR_RO(state); static ssize_t target_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); struct cpuhp_step *sp; int target, ret; ret = kstrtoint(buf, 10, &target); if (ret) return ret; #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE) return -EINVAL; #else if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE) return -EINVAL; #endif ret = lock_device_hotplug_sysfs(); if (ret) return ret; mutex_lock(&cpuhp_state_mutex); sp = cpuhp_get_step(target); ret = !sp->name || sp->cant_stop ? -EINVAL : 0; mutex_unlock(&cpuhp_state_mutex); if (ret) goto out; if (st->state < target) ret = cpu_up(dev->id, target); else if (st->state > target) ret = cpu_down(dev->id, target); else if (WARN_ON(st->target != target)) st->target = target; out: unlock_device_hotplug(); return ret ? ret : count; } static ssize_t target_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->target); } static DEVICE_ATTR_RW(target); static ssize_t fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); struct cpuhp_step *sp; int fail, ret; ret = kstrtoint(buf, 10, &fail); if (ret) return ret; if (fail == CPUHP_INVALID) { st->fail = fail; return count; } if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) return -EINVAL; /* * Cannot fail STARTING/DYING callbacks. */ if (cpuhp_is_atomic_state(fail)) return -EINVAL; /* * DEAD callbacks cannot fail... * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter * triggering STARTING callbacks, a failure in this state would * hinder rollback. */ if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU) return -EINVAL; /* * Cannot fail anything that doesn't have callbacks. */ mutex_lock(&cpuhp_state_mutex); sp = cpuhp_get_step(fail); if (!sp->startup.single && !sp->teardown.single) ret = -EINVAL; mutex_unlock(&cpuhp_state_mutex); if (ret) return ret; st->fail = fail; return count; } static ssize_t fail_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->fail); } static DEVICE_ATTR_RW(fail); static struct attribute *cpuhp_cpu_attrs[] = { &dev_attr_state.attr, &dev_attr_target.attr, &dev_attr_fail.attr, NULL }; static const struct attribute_group cpuhp_cpu_attr_group = { .attrs = cpuhp_cpu_attrs, .name = "hotplug", }; static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t cur, res = 0; int i; mutex_lock(&cpuhp_state_mutex); for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) { struct cpuhp_step *sp = cpuhp_get_step(i); if (sp->name) { cur = sprintf(buf, "%3d: %s\n", i, sp->name); buf += cur; res += cur; } } mutex_unlock(&cpuhp_state_mutex); return res; } static DEVICE_ATTR_RO(states); static struct attribute *cpuhp_cpu_root_attrs[] = { &dev_attr_states.attr, NULL }; static const struct attribute_group cpuhp_cpu_root_attr_group = { .attrs = cpuhp_cpu_root_attrs, .name = "hotplug", }; #ifdef CONFIG_HOTPLUG_SMT static bool cpu_smt_num_threads_valid(unsigned int threads) { if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC)) return threads >= 1 && threads <= cpu_smt_max_threads; return threads == 1 || threads == cpu_smt_max_threads; } static ssize_t __store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ctrlval, ret, num_threads, orig_threads; bool force_off; if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) return -EPERM; if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return -ENODEV; if (sysfs_streq(buf, "on")) { ctrlval = CPU_SMT_ENABLED; num_threads = cpu_smt_max_threads; } else if (sysfs_streq(buf, "off")) { ctrlval = CPU_SMT_DISABLED; num_threads = 1; } else if (sysfs_streq(buf, "forceoff")) { ctrlval = CPU_SMT_FORCE_DISABLED; num_threads = 1; } else if (kstrtoint(buf, 10, &num_threads) == 0) { if (num_threads == 1) ctrlval = CPU_SMT_DISABLED; else if (cpu_smt_num_threads_valid(num_threads)) ctrlval = CPU_SMT_ENABLED; else return -EINVAL; } else { return -EINVAL; } ret = lock_device_hotplug_sysfs(); if (ret) return ret; orig_threads = cpu_smt_num_threads; cpu_smt_num_threads = num_threads; force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED; if (num_threads > orig_threads) ret = cpuhp_smt_enable(); else if (num_threads < orig_threads || force_off) ret = cpuhp_smt_disable(ctrlval); unlock_device_hotplug(); return ret ? ret : count; } #else /* !CONFIG_HOTPLUG_SMT */ static ssize_t __store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { return -ENODEV; } #endif /* CONFIG_HOTPLUG_SMT */ static const char *smt_states[] = { [CPU_SMT_ENABLED] = "on", [CPU_SMT_DISABLED] = "off", [CPU_SMT_FORCE_DISABLED] = "forceoff", [CPU_SMT_NOT_SUPPORTED] = "notsupported", [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented", }; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *state = smt_states[cpu_smt_control]; #ifdef CONFIG_HOTPLUG_SMT /* * If SMT is enabled but not all threads are enabled then show the * number of threads. If all threads are enabled show "on". Otherwise * show the state name. */ if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_num_threads != cpu_smt_max_threads) return sysfs_emit(buf, "%d\n", cpu_smt_num_threads); #endif return sysfs_emit(buf, "%s\n", state); } static ssize_t control_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { return __store_smt_control(dev, attr, buf, count); } static DEVICE_ATTR_RW(control); static ssize_t active_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", sched_smt_active()); } static DEVICE_ATTR_RO(active); static struct attribute *cpuhp_smt_attrs[] = { &dev_attr_control.attr, &dev_attr_active.attr, NULL }; static const struct attribute_group cpuhp_smt_attr_group = { .attrs = cpuhp_smt_attrs, .name = "smt", }; static int __init cpu_smt_sysfs_init(void) { struct device *dev_root; int ret = -ENODEV; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group); put_device(dev_root); } return ret; } static int __init cpuhp_sysfs_init(void) { struct device *dev_root; int cpu, ret; ret = cpu_smt_sysfs_init(); if (ret) return ret; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group); put_device(dev_root); if (ret) return ret; } for_each_possible_cpu(cpu) { struct device *dev = get_cpu_device(cpu); if (!dev) continue; ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group); if (ret) return ret; } return 0; } device_initcall(cpuhp_sysfs_init); #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */ /* * cpu_bit_bitmap[] is a special, "compressed" data structure that * represents all NR_CPUS bits binary values of 1<<nr. * * It is used by cpumask_of() to get a constant address to a CPU * mask value that has a single bit set only. */ /* cpu_bit_bitmap[0] is empty - so we can back into it */ #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x)) #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { MASK_DECLARE_8(0), MASK_DECLARE_8(8), MASK_DECLARE_8(16), MASK_DECLARE_8(24), #if BITS_PER_LONG > 32 MASK_DECLARE_8(32), MASK_DECLARE_8(40), MASK_DECLARE_8(48), MASK_DECLARE_8(56), #endif }; EXPORT_SYMBOL_GPL(cpu_bit_bitmap); const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); #ifdef CONFIG_INIT_ALL_POSSIBLE struct cpumask __cpu_possible_mask __ro_after_init = {CPU_BITS_ALL}; #else struct cpumask __cpu_possible_mask __ro_after_init; #endif EXPORT_SYMBOL(__cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); struct cpumask __cpu_enabled_mask __read_mostly; EXPORT_SYMBOL(__cpu_enabled_mask); struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); struct cpumask __cpu_dying_mask __read_mostly; EXPORT_SYMBOL(__cpu_dying_mask); atomic_t __num_online_cpus __read_mostly; EXPORT_SYMBOL(__num_online_cpus); void init_cpu_present(const struct cpumask *src) { cpumask_copy(&__cpu_present_mask, src); } void init_cpu_possible(const struct cpumask *src) { cpumask_copy(&__cpu_possible_mask, src); } void init_cpu_online(const struct cpumask *src) { cpumask_copy(&__cpu_online_mask, src); } void set_cpu_online(unsigned int cpu, bool online) { /* * atomic_inc/dec() is required to handle the horrid abuse of this * function by the reboot and kexec code which invoke it from * IPI/NMI broadcasts when shutting down CPUs. Invocation from * regular CPU hotplug is properly serialized. * * Note, that the fact that __num_online_cpus is of type atomic_t * does not protect readers which are not serialized against * concurrent hotplug operations. */ if (online) { if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask)) atomic_inc(&__num_online_cpus); } else { if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask)) atomic_dec(&__num_online_cpus); } } /* * Activate the first processor. */ void __init boot_cpu_init(void) { int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ set_cpu_online(cpu, true); set_cpu_active(cpu, true); set_cpu_present(cpu, true); set_cpu_possible(cpu, true); #ifdef CONFIG_SMP __boot_cpu_id = cpu; #endif } /* * Must be called _AFTER_ setting up the per_cpu areas */ void __init boot_cpu_hotplug_init(void) { #ifdef CONFIG_SMP cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE); #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } #ifdef CONFIG_CPU_MITIGATIONS /* * These are used for a global "mitigations=" cmdline option for toggling * optional CPU mitigations. */ enum cpu_mitigations { CPU_MITIGATIONS_OFF, CPU_MITIGATIONS_AUTO, CPU_MITIGATIONS_AUTO_NOSMT, }; static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static int __init mitigations_parse_cmdline(char *arg) { if (!strcmp(arg, "off")) cpu_mitigations = CPU_MITIGATIONS_OFF; else if (!strcmp(arg, "auto")) cpu_mitigations = CPU_MITIGATIONS_AUTO; else if (!strcmp(arg, "auto,nosmt")) cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; else pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", arg); return 0; } /* mitigations=off */ bool cpu_mitigations_off(void) { return cpu_mitigations == CPU_MITIGATIONS_OFF; } EXPORT_SYMBOL_GPL(cpu_mitigations_off); /* mitigations=auto,nosmt */ bool cpu_mitigations_auto_nosmt(void) { return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); #else static int __init mitigations_parse_cmdline(char *arg) { pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); return 0; } #endif early_param("mitigations", mitigations_parse_cmdline);
1 1 1 1 18 18 18 2 18 1 1 2 2 2 28 27 1 4 1 1 21 3 2 1 1 16 27 111 19 137 1 2 61 111 84 2 84 84 84 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 // SPDX-License-Identifier: GPL-2.0 #include <linux/anon_inodes.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/cgroup.h> #include <linux/magic.h> #include <linux/mount.h> #include <linux/pid.h> #include <linux/pidfs.h> #include <linux/pid_namespace.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/pseudo_fs.h> #include <linux/ptrace.h> #include <linux/seq_file.h> #include <uapi/linux/pidfd.h> #include <linux/ipc_namespace.h> #include <linux/time_namespace.h> #include <linux/utsname.h> #include <net/net_namespace.h> #include "internal.h" #include "mount.h" #ifdef CONFIG_PROC_FS /** * pidfd_show_fdinfo - print information about a pidfd * @m: proc fdinfo file * @f: file referencing a pidfd * * Pid: * This function will print the pid that a given pidfd refers to in the * pid namespace of the procfs instance. * If the pid namespace of the process is not a descendant of the pid * namespace of the procfs instance 0 will be shown as its pid. This is * similar to calling getppid() on a process whose parent is outside of * its pid namespace. * * NSpid: * If pid namespaces are supported then this function will also print * the pid of a given pidfd refers to for all descendant pid namespaces * starting from the current pid namespace of the instance, i.e. the * Pid field and the first entry in the NSpid field will be identical. * If the pid namespace of the process is not a descendant of the pid * namespace of the procfs instance 0 will be shown as its first NSpid * entry and no others will be shown. * Note that this differs from the Pid and NSpid fields in * /proc/<pid>/status where Pid and NSpid are always shown relative to * the pid namespace of the procfs instance. The difference becomes * obvious when sending around a pidfd between pid namespaces from a * different branch of the tree, i.e. where no ancestral relation is * present between the pid namespaces: * - create two new pid namespaces ns1 and ns2 in the initial pid * namespace (also take care to create new mount namespaces in the * new pid namespace and mount procfs) * - create a process with a pidfd in ns1 * - send pidfd from ns1 to ns2 * - read /proc/self/fdinfo/<pidfd> and observe that both Pid and NSpid * have exactly one entry, which is 0 */ static void pidfd_show_fdinfo(struct seq_file *m, struct file *f) { struct pid *pid = pidfd_pid(f); struct pid_namespace *ns; pid_t nr = -1; if (likely(pid_has_task(pid, PIDTYPE_PID))) { ns = proc_pid_ns(file_inode(m->file)->i_sb); nr = pid_nr_ns(pid, ns); } seq_put_decimal_ll(m, "Pid:\t", nr); #ifdef CONFIG_PID_NS seq_put_decimal_ll(m, "\nNSpid:\t", nr); if (nr > 0) { int i; /* If nr is non-zero it means that 'pid' is valid and that * ns, i.e. the pid namespace associated with the procfs * instance, is in the pid namespace hierarchy of pid. * Start at one below the already printed level. */ for (i = ns->level + 1; i <= pid->level; i++) seq_put_decimal_ll(m, "\t", pid->numbers[i].nr); } #endif seq_putc(m, '\n'); } #endif /* * Poll support for process exit notification. */ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) { struct pid *pid = pidfd_pid(file); bool thread = file->f_flags & PIDFD_THREAD; struct task_struct *task; __poll_t poll_flags = 0; poll_wait(file, &pid->wait_pidfd, pts); /* * Depending on PIDFD_THREAD, inform pollers when the thread * or the whole thread-group exits. */ guard(rcu)(); task = pid_task(pid, PIDTYPE_PID); if (!task) poll_flags = EPOLLIN | EPOLLRDNORM | EPOLLHUP; else if (task->exit_state && (thread || thread_group_empty(task))) poll_flags = EPOLLIN | EPOLLRDNORM; return poll_flags; } static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; size_t usize = _IOC_SIZE(cmd); struct pidfd_info kinfo = {}; struct user_namespace *user_ns; const struct cred *c; __u64 mask; #ifdef CONFIG_CGROUPS struct cgroup *cgrp; #endif if (!uinfo) return -EINVAL; if (usize < PIDFD_INFO_SIZE_VER0) return -EINVAL; /* First version, no smaller struct possible */ if (copy_from_user(&mask, &uinfo->mask, sizeof(mask))) return -EFAULT; c = get_task_cred(task); if (!c) return -ESRCH; /* Unconditionally return identifiers and credentials, the rest only on request */ user_ns = current_user_ns(); kinfo.ruid = from_kuid_munged(user_ns, c->uid); kinfo.rgid = from_kgid_munged(user_ns, c->gid); kinfo.euid = from_kuid_munged(user_ns, c->euid); kinfo.egid = from_kgid_munged(user_ns, c->egid); kinfo.suid = from_kuid_munged(user_ns, c->suid); kinfo.sgid = from_kgid_munged(user_ns, c->sgid); kinfo.fsuid = from_kuid_munged(user_ns, c->fsuid); kinfo.fsgid = from_kgid_munged(user_ns, c->fsgid); kinfo.mask |= PIDFD_INFO_CREDS; put_cred(c); #ifdef CONFIG_CGROUPS rcu_read_lock(); cgrp = task_dfl_cgroup(task); kinfo.cgroupid = cgroup_id(cgrp); kinfo.mask |= PIDFD_INFO_CGROUPID; rcu_read_unlock(); #endif /* * Copy pid/tgid last, to reduce the chances the information might be * stale. Note that it is not possible to ensure it will be valid as the * task might return as soon as the copy_to_user finishes, but that's ok * and userspace expects that might happen and can act accordingly, so * this is just best-effort. What we can do however is checking that all * the fields are set correctly, or return ESRCH to avoid providing * incomplete information. */ kinfo.ppid = task_ppid_nr_ns(task, NULL); kinfo.tgid = task_tgid_vnr(task); kinfo.pid = task_pid_vnr(task); kinfo.mask |= PIDFD_INFO_PID; if (kinfo.pid == 0 || kinfo.tgid == 0 || (kinfo.ppid == 0 && kinfo.pid != 1)) return -ESRCH; /* * If userspace and the kernel have the same struct size it can just * be copied. If userspace provides an older struct, only the bits that * userspace knows about will be copied. If userspace provides a new * struct, only the bits that the kernel knows about will be copied. */ if (copy_to_user(uinfo, &kinfo, min(usize, sizeof(kinfo)))) return -EFAULT; return 0; } static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct task_struct *task __free(put_task) = NULL; struct nsproxy *nsp __free(put_nsproxy) = NULL; struct pid *pid = pidfd_pid(file); struct ns_common *ns_common = NULL; struct pid_namespace *pid_ns; task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) return pidfd_info(task, cmd, arg); if (arg) return -EINVAL; scoped_guard(task_lock, task) { nsp = task->nsproxy; if (nsp) get_nsproxy(nsp); } if (!nsp) return -ESRCH; /* just pretend it didn't exist */ /* * We're trying to open a file descriptor to the namespace so perform a * filesystem cred ptrace check. Also, we mirror nsfs behavior. */ if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) return -EACCES; switch (cmd) { /* Namespaces that hang of nsproxy. */ case PIDFD_GET_CGROUP_NAMESPACE: if (IS_ENABLED(CONFIG_CGROUPS)) { get_cgroup_ns(nsp->cgroup_ns); ns_common = to_ns_common(nsp->cgroup_ns); } break; case PIDFD_GET_IPC_NAMESPACE: if (IS_ENABLED(CONFIG_IPC_NS)) { get_ipc_ns(nsp->ipc_ns); ns_common = to_ns_common(nsp->ipc_ns); } break; case PIDFD_GET_MNT_NAMESPACE: get_mnt_ns(nsp->mnt_ns); ns_common = to_ns_common(nsp->mnt_ns); break; case PIDFD_GET_NET_NAMESPACE: if (IS_ENABLED(CONFIG_NET_NS)) { ns_common = to_ns_common(nsp->net_ns); get_net_ns(ns_common); } break; case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { get_pid_ns(nsp->pid_ns_for_children); ns_common = to_ns_common(nsp->pid_ns_for_children); } break; case PIDFD_GET_TIME_NAMESPACE: if (IS_ENABLED(CONFIG_TIME_NS)) { get_time_ns(nsp->time_ns); ns_common = to_ns_common(nsp->time_ns); } break; case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: if (IS_ENABLED(CONFIG_TIME_NS)) { get_time_ns(nsp->time_ns_for_children); ns_common = to_ns_common(nsp->time_ns_for_children); } break; case PIDFD_GET_UTS_NAMESPACE: if (IS_ENABLED(CONFIG_UTS_NS)) { get_uts_ns(nsp->uts_ns); ns_common = to_ns_common(nsp->uts_ns); } break; /* Namespaces that don't hang of nsproxy. */ case PIDFD_GET_USER_NAMESPACE: if (IS_ENABLED(CONFIG_USER_NS)) { rcu_read_lock(); ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns))); rcu_read_unlock(); } break; case PIDFD_GET_PID_NAMESPACE: if (IS_ENABLED(CONFIG_PID_NS)) { rcu_read_lock(); pid_ns = task_active_pid_ns(task); if (pid_ns) ns_common = to_ns_common(get_pid_ns(pid_ns)); rcu_read_unlock(); } break; default: return -ENOIOCTLCMD; } if (!ns_common) return -EOPNOTSUPP; /* open_namespace() unconditionally consumes the reference */ return open_namespace(ns_common); } static const struct file_operations pidfs_file_operations = { .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif .unlocked_ioctl = pidfd_ioctl, .compat_ioctl = compat_ptr_ioctl, }; struct pid *pidfd_pid(const struct file *file) { if (file->f_op != &pidfs_file_operations) return ERR_PTR(-EBADF); return file_inode(file)->i_private; } static struct vfsmount *pidfs_mnt __ro_after_init; #if BITS_PER_LONG == 32 /* * Provide a fallback mechanism for 32-bit systems so processes remain * reliably comparable by inode number even on those systems. */ static DEFINE_IDA(pidfd_inum_ida); static int pidfs_inum(struct pid *pid, unsigned long *ino) { int ret; ret = ida_alloc_range(&pidfd_inum_ida, RESERVED_PIDS + 1, UINT_MAX, GFP_ATOMIC); if (ret < 0) return -ENOSPC; *ino = ret; return 0; } static inline void pidfs_free_inum(unsigned long ino) { if (ino > 0) ida_free(&pidfd_inum_ida, ino); } #else static inline int pidfs_inum(struct pid *pid, unsigned long *ino) { *ino = pid->ino; return 0; } #define pidfs_free_inum(ino) ((void)(ino)) #endif /* * The vfs falls back to simple_setattr() if i_op->setattr() isn't * implemented. Let's reject it completely until we have a clean * permission concept for pidfds. */ static int pidfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { return -EOPNOTSUPP; } /* * User space expects pidfs inodes to have no file type in st_mode. * * In particular, 'lsof' has this legacy logic: * * type = s->st_mode & S_IFMT; * switch (type) { * ... * case 0: * if (!strcmp(p, "anon_inode")) * Lf->ntype = Ntype = N_ANON_INODE; * * to detect our old anon_inode logic. * * Rather than mess with our internal sane inode data, just fix it * up here in getattr() by masking off the format bits. */ static int pidfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->mode &= ~S_IFMT; return 0; } static const struct inode_operations pidfs_inode_operations = { .getattr = pidfs_getattr, .setattr = pidfs_setattr, }; static void pidfs_evict_inode(struct inode *inode) { struct pid *pid = inode->i_private; clear_inode(inode); put_pid(pid); pidfs_free_inum(inode->i_ino); } static const struct super_operations pidfs_sops = { .drop_inode = generic_delete_inode, .evict_inode = pidfs_evict_inode, .statfs = simple_statfs, }; /* * 'lsof' has knowledge of out historical anon_inode use, and expects * the pidfs dentry name to start with 'anon_inode'. */ static char *pidfs_dname(struct dentry *dentry, char *buffer, int buflen) { return dynamic_dname(buffer, buflen, "anon_inode:[pidfd]"); } static const struct dentry_operations pidfs_dentry_operations = { .d_delete = always_delete_dentry, .d_dname = pidfs_dname, .d_prune = stashed_dentry_prune, }; static int pidfs_init_inode(struct inode *inode, void *data) { inode->i_private = data; inode->i_flags |= S_PRIVATE; inode->i_mode |= S_IRWXU; inode->i_op = &pidfs_inode_operations; inode->i_fop = &pidfs_file_operations; /* * Inode numbering for pidfs start at RESERVED_PIDS + 1. This * avoids collisions with the root inode which is 1 for pseudo * filesystems. */ return pidfs_inum(data, &inode->i_ino); } static void pidfs_put_data(void *data) { struct pid *pid = data; put_pid(pid); } static const struct stashed_operations pidfs_stashed_ops = { .init_inode = pidfs_init_inode, .put_data = pidfs_put_data, }; static int pidfs_init_fs_context(struct fs_context *fc) { struct pseudo_fs_context *ctx; ctx = init_pseudo(fc, PID_FS_MAGIC); if (!ctx) return -ENOMEM; ctx->ops = &pidfs_sops; ctx->dops = &pidfs_dentry_operations; fc->s_fs_info = (void *)&pidfs_stashed_ops; return 0; } static struct file_system_type pidfs_type = { .name = "pidfs", .init_fs_context = pidfs_init_fs_context, .kill_sb = kill_anon_super, }; struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) { struct file *pidfd_file; struct path path; int ret; ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); if (ret < 0) return ERR_PTR(ret); pidfd_file = dentry_open(&path, flags, current_cred()); path_put(&path); return pidfd_file; } void __init pidfs_init(void) { pidfs_mnt = kern_mount(&pidfs_type); if (IS_ERR(pidfs_mnt)) panic("Failed to mount pidfs pseudo filesystem"); }
31 32 6 16 15