Total coverage: 349508 (18%)of 2020654
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 IBM Corporation * Copyright (C) 2010 Politecnico di Torino, Italy * TORSEC group -- https://security.polito.it * * Authors: * Mimi Zohar <zohar@us.ibm.com> * Roberto Sassu <roberto.sassu@polito.it> * * See Documentation/security/keys/trusted-encrypted.rst */ #include <linux/uaccess.h> #include <linux/err.h> #include <keys/trusted-type.h> #include <keys/encrypted-type.h> #include "encrypted.h" /* * request_trusted_key - request the trusted key * * Trusted keys are sealed to PCRs and other metadata. Although userspace * manages both trusted/encrypted key-types, like the encrypted key type * data, trusted key type data is not visible decrypted from userspace. */ struct key *request_trusted_key(const char *trusted_desc, const u8 **master_key, size_t *master_keylen) { struct trusted_key_payload *tpayload; struct key *tkey; tkey = request_key(&key_type_trusted, trusted_desc, NULL); if (IS_ERR(tkey)) goto error; down_read(&tkey->sem); tpayload = tkey->payload.data[0]; *master_key = tpayload->key; *master_keylen = tpayload->key_len; error: return tkey; }
57 54 2 2 2 2 44 2 2 2 2 44 43 44 44 44 44 44 44 44 44 13 43 5 12 44 21 16 17 16 34 35 34 34 34 30 17 17 17 17 1 1 1 1 1 1 12 1 9 12 12 12 11 10 2 2 2 2 2 3 3 20 20 19 2 2 2 2 18 18 18 18 8 18 8 8 10 18 18 18 10 18 18 18 18 8 10 8 10 10 8 18 8 18 18 18 18 18 20 7 3 3 1 7 6 5 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 1 1 2 1 1 2 2 1 2 2 2 2 2 1 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 36 51 3 3 3 3 3 3 3 3 2 3 3 3 3 3 2 2 3 3 3 1 3 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1 1 1 6 5 5 1 11 9 9 2 43 24 24 11 1 11 11 37 36 37 17 14 14 11 11 27 20 7 8 14 11 12 14 10 10 10 9 9 13 10 8 1 1 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 3 3 3 9 9 9 9 9 9 10 1 1 9 9 9 9 9 9 9 10 10 9 10 9 9 3 9 3 9 2 9 9 8 9 9 9 8 9 9 9 9 10 1 14 14 14 1 14 44 43 44 15 4 14 15 15 14 4 4 15 14 15 15 14 4 14 14 14 14 14 13 15 15 15 15 4 15 14 15 4 4 15 4 15 14 14 14 14 14 14 1 1 14 15 15 11 15 31 18 8 8 8 8 8 8 8 4 4 8 4 4 4 4 9 9 14 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 8 5 5 7 4 4 3 3 5 4 4 2 2 8 8 4 4 8 8 8 7 8 8 7 8 8 8 8 8 8 7 8 8 8 8 8 3 3 5 8 8 25 5 5 5 5 5 5 5 5 18 18 18 18 18 18 10 10 10 8 8 8 12 12 8 11 4 4 4 4 3 4 4 7 7 7 7 7 7 7 7 7 7 5 13 13 13 13 13 13 13 13 13 12 13 14 14 14 14 14 14 15 1 15 15 15 15 15 14 8 14 13 10 10 14 14 14 14 10 14 14 14 14 14 4 13 13 13 13 13 13 1 1 1 1 13 12 13 8 8 8 8 13 12 10 3 13 13 12 13 2 15 15 8 3 3 3 3 1 3 3 3 3 3 3 3 1 1 3 7 7 10 14 10 7 7 14 10 14 7 14 10 1 10 10 10 10 10 10 10 10 2 10 10 10 1 9 8 1 1 1 1 1 1 1 1 1 1 1 1 23 23 23 23 10 23 23 10 10 10 9 9 10 25 26 7 25 26 10 10 10 10 26 25 26 25 26 26 26 25 26 23 30 30 30 30 28 30 30 24 14 25 26 30 30 30 29 29 5 5 5 5 5 5 5 5 5 5 5 2 5 3 3 3 3 3 2 3 2 1 1 2 2 2 2 2 5 3 3 3 3 3 20 18 18 20 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 // SPDX-License-Identifier: GPL-2.0 /* * USB hub driver. * * (C) Copyright 1999 Linus Torvalds * (C) Copyright 1999 Johannes Erdfelt * (C) Copyright 1999 Gregory P. Smith * (C) Copyright 2001 Brad Hards (bhards@bigpond.net.au) * * Released under the GPLv2 only. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/completion.h> #include <linux/sched/mm.h> #include <linux/list.h> #include <linux/slab.h> #include <linux/string_choices.h> #include <linux/kcov.h> #include <linux/ioctl.h> #include <linux/usb.h> #include <linux/usbdevice_fs.h> #include <linux/usb/hcd.h> #include <linux/usb/onboard_dev.h> #include <linux/usb/otg.h> #include <linux/usb/quirks.h> #include <linux/workqueue.h> #include <linux/mutex.h> #include <linux/random.h> #include <linux/pm_qos.h> #include <linux/kobject.h> #include <linux/bitfield.h> #include <linux/uaccess.h> #include <asm/byteorder.h> #include "hub.h" #include "phy.h" #include "otg_productlist.h" #define USB_VENDOR_GENESYS_LOGIC 0x05e3 #define USB_VENDOR_SMSC 0x0424 #define USB_PRODUCT_USB5534B 0x5534 #define USB_VENDOR_CYPRESS 0x04b4 #define USB_PRODUCT_CY7C65632 0x6570 #define USB_VENDOR_TEXAS_INSTRUMENTS 0x0451 #define USB_PRODUCT_TUSB8041_USB3 0x8140 #define USB_PRODUCT_TUSB8041_USB2 0x8142 #define USB_VENDOR_MICROCHIP 0x0424 #define USB_PRODUCT_USB4913 0x4913 #define USB_PRODUCT_USB4914 0x4914 #define USB_PRODUCT_USB4915 0x4915 #define HUB_QUIRK_CHECK_PORT_AUTOSUSPEND BIT(0) #define HUB_QUIRK_DISABLE_AUTOSUSPEND BIT(1) #define HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL BIT(2) #define USB_TP_TRANSMISSION_DELAY 40 /* ns */ #define USB_TP_TRANSMISSION_DELAY_MAX 65535 /* ns */ #define USB_PING_RESPONSE_TIME 400 /* ns */ #define USB_REDUCE_FRAME_INTR_BINTERVAL 9 /* * The SET_ADDRESS request timeout will be 500 ms when * USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT quirk flag is set. */ #define USB_SHORT_SET_ADDRESS_REQ_TIMEOUT 500 /* ms */ /* Protect struct usb_device->state and ->children members * Note: Both are also protected by ->dev.sem, except that ->state can * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */ static DEFINE_SPINLOCK(device_state_lock); /* workqueue to process hub events */ static struct workqueue_struct *hub_wq; static void hub_event(struct work_struct *work); /* synchronize hub-port add/remove and peering operations */ DEFINE_MUTEX(usb_port_peer_mutex); /* cycle leds on hubs that aren't blinking for attention */ static bool blinkenlights; module_param(blinkenlights, bool, S_IRUGO); MODULE_PARM_DESC(blinkenlights, "true to cycle leds on hubs"); /* * Device SATA8000 FW1.0 from DATAST0R Technology Corp requires about * 10 seconds to send reply for the initial 64-byte descriptor request. */ /* define initial 64-byte descriptor request timeout in milliseconds */ static int initial_descriptor_timeout = USB_CTRL_GET_TIMEOUT; module_param(initial_descriptor_timeout, int, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(initial_descriptor_timeout, "initial 64-byte descriptor request timeout in milliseconds " "(default 5000 - 5.0 seconds)"); /* * As of 2.6.10 we introduce a new USB device initialization scheme which * closely resembles the way Windows works. Hopefully it will be compatible * with a wider range of devices than the old scheme. However some previously * working devices may start giving rise to "device not accepting address" * errors; if that happens the user can try the old scheme by adjusting the * following module parameters. * * For maximum flexibility there are two boolean parameters to control the * hub driver's behavior. On the first initialization attempt, if the * "old_scheme_first" parameter is set then the old scheme will be used, * otherwise the new scheme is used. If that fails and "use_both_schemes" * is set, then the driver will make another attempt, using the other scheme. */ static bool old_scheme_first; module_param(old_scheme_first, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(old_scheme_first, "start with the old device initialization scheme"); static bool use_both_schemes = true; module_param(use_both_schemes, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(use_both_schemes, "try the other device initialization scheme if the " "first one fails"); /* Mutual exclusion for EHCI CF initialization. This interferes with * port reset on some companion controllers. */ DECLARE_RWSEM(ehci_cf_port_reset_rwsem); EXPORT_SYMBOL_GPL(ehci_cf_port_reset_rwsem); #define HUB_DEBOUNCE_TIMEOUT 2000 #define HUB_DEBOUNCE_STEP 25 #define HUB_DEBOUNCE_STABLE 100 static int usb_reset_and_verify_device(struct usb_device *udev); static int hub_port_disable(struct usb_hub *hub, int port1, int set_state); static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, u16 portstatus); static inline char *portspeed(struct usb_hub *hub, int portstatus) { if (hub_is_superspeedplus(hub->hdev)) return "10.0 Gb/s"; if (hub_is_superspeed(hub->hdev)) return "5.0 Gb/s"; if (portstatus & USB_PORT_STAT_HIGH_SPEED) return "480 Mb/s"; else if (portstatus & USB_PORT_STAT_LOW_SPEED) return "1.5 Mb/s"; else return "12 Mb/s"; } /* Note that hdev or one of its children must be locked! */ struct usb_hub *usb_hub_to_struct_hub(struct usb_device *hdev) { if (!hdev || !hdev->actconfig || !hdev->maxchild) return NULL; return usb_get_intfdata(hdev->actconfig->interface[0]); } int usb_device_supports_lpm(struct usb_device *udev) { /* Some devices have trouble with LPM */ if (udev->quirks & USB_QUIRK_NO_LPM) return 0; /* Skip if the device BOS descriptor couldn't be read */ if (!udev->bos) return 0; /* USB 2.1 (and greater) devices indicate LPM support through * their USB 2.0 Extended Capabilities BOS descriptor. */ if (udev->speed == USB_SPEED_HIGH || udev->speed == USB_SPEED_FULL) { if (udev->bos->ext_cap && (USB_LPM_SUPPORT & le32_to_cpu(udev->bos->ext_cap->bmAttributes))) return 1; return 0; } /* * According to the USB 3.0 spec, all USB 3.0 devices must support LPM. * However, there are some that don't, and they set the U1/U2 exit * latencies to zero. */ if (!udev->bos->ss_cap) { dev_info(&udev->dev, "No LPM exit latency info found, disabling LPM.\n"); return 0; } if (udev->bos->ss_cap->bU1devExitLat == 0 && udev->bos->ss_cap->bU2DevExitLat == 0) { if (udev->parent) dev_info(&udev->dev, "LPM exit latency is zeroed, disabling LPM.\n"); else dev_info(&udev->dev, "We don't know the algorithms for LPM for this host, disabling LPM.\n"); return 0; } if (!udev->parent || udev->parent->lpm_capable) return 1; return 0; } /* * Set the Maximum Exit Latency (MEL) for the host to wakup up the path from * U1/U2, send a PING to the device and receive a PING_RESPONSE. * See USB 3.1 section C.1.5.2 */ static void usb_set_lpm_mel(struct usb_device *udev, struct usb3_lpm_parameters *udev_lpm_params, unsigned int udev_exit_latency, struct usb_hub *hub, struct usb3_lpm_parameters *hub_lpm_params, unsigned int hub_exit_latency) { unsigned int total_mel; /* * tMEL1. time to transition path from host to device into U0. * MEL for parent already contains the delay up to parent, so only add * the exit latency for the last link (pick the slower exit latency), * and the hub header decode latency. See USB 3.1 section C 2.2.1 * Store MEL in nanoseconds */ total_mel = hub_lpm_params->mel + max(udev_exit_latency, hub_exit_latency) * 1000 + hub->descriptor->u.ss.bHubHdrDecLat * 100; /* * tMEL2. Time to submit PING packet. Sum of tTPTransmissionDelay for * each link + wHubDelay for each hub. Add only for last link. * tMEL4, the time for PING_RESPONSE to traverse upstream is similar. * Multiply by 2 to include it as well. */ total_mel += (__le16_to_cpu(hub->descriptor->u.ss.wHubDelay) + USB_TP_TRANSMISSION_DELAY) * 2; /* * tMEL3, tPingResponse. Time taken by device to generate PING_RESPONSE * after receiving PING. Also add 2100ns as stated in USB 3.1 C 1.5.2.4 * to cover the delay if the PING_RESPONSE is queued behind a Max Packet * Size DP. * Note these delays should be added only once for the entire path, so * add them to the MEL of the device connected to the roothub. */ if (!hub->hdev->parent) total_mel += USB_PING_RESPONSE_TIME + 2100; udev_lpm_params->mel = total_mel; } /* * Set the maximum Device to Host Exit Latency (PEL) for the device to initiate * a transition from either U1 or U2. */ static void usb_set_lpm_pel(struct usb_device *udev, struct usb3_lpm_parameters *udev_lpm_params, unsigned int udev_exit_latency, struct usb_hub *hub, struct usb3_lpm_parameters *hub_lpm_params, unsigned int hub_exit_latency, unsigned int port_to_port_exit_latency) { unsigned int first_link_pel; unsigned int hub_pel; /* * First, the device sends an LFPS to transition the link between the * device and the parent hub into U0. The exit latency is the bigger of * the device exit latency or the hub exit latency. */ if (udev_exit_latency > hub_exit_latency) first_link_pel = udev_exit_latency * 1000; else first_link_pel = hub_exit_latency * 1000; /* * When the hub starts to receive the LFPS, there is a slight delay for * it to figure out that one of the ports is sending an LFPS. Then it * will forward the LFPS to its upstream link. The exit latency is the * delay, plus the PEL that we calculated for this hub. */ hub_pel = port_to_port_exit_latency * 1000 + hub_lpm_params->pel; /* * According to figure C-7 in the USB 3.0 spec, the PEL for this device * is the greater of the two exit latencies. */ if (first_link_pel > hub_pel) udev_lpm_params->pel = first_link_pel; else udev_lpm_params->pel = hub_pel; } /* * Set the System Exit Latency (SEL) to indicate the total worst-case time from * when a device initiates a transition to U0, until when it will receive the * first packet from the host controller. * * Section C.1.5.1 describes the four components to this: * - t1: device PEL * - t2: time for the ERDY to make it from the device to the host. * - t3: a host-specific delay to process the ERDY. * - t4: time for the packet to make it from the host to the device. * * t3 is specific to both the xHCI host and the platform the host is integrated * into. The Intel HW folks have said it's negligible, FIXME if a different * vendor says otherwise. */ static void usb_set_lpm_sel(struct usb_device *udev, struct usb3_lpm_parameters *udev_lpm_params) { struct usb_device *parent; unsigned int num_hubs; unsigned int total_sel; /* t1 = device PEL */ total_sel = udev_lpm_params->pel; /* How many external hubs are in between the device & the root port. */ for (parent = udev->parent, num_hubs = 0; parent->parent; parent = parent->parent) num_hubs++; /* t2 = 2.1us + 250ns * (num_hubs - 1) */ if (num_hubs > 0) total_sel += 2100 + 250 * (num_hubs - 1); /* t4 = 250ns * num_hubs */ total_sel += 250 * num_hubs; udev_lpm_params->sel = total_sel; } static void usb_set_lpm_parameters(struct usb_device *udev) { struct usb_hub *hub; unsigned int port_to_port_delay; unsigned int udev_u1_del; unsigned int udev_u2_del; unsigned int hub_u1_del; unsigned int hub_u2_del; if (!udev->lpm_capable || udev->speed < USB_SPEED_SUPER) return; /* Skip if the device BOS descriptor couldn't be read */ if (!udev->bos) return; hub = usb_hub_to_struct_hub(udev->parent); /* It doesn't take time to transition the roothub into U0, since it * doesn't have an upstream link. */ if (!hub) return; udev_u1_del = udev->bos->ss_cap->bU1devExitLat; udev_u2_del = le16_to_cpu(udev->bos->ss_cap->bU2DevExitLat); hub_u1_del = udev->parent->bos->ss_cap->bU1devExitLat; hub_u2_del = le16_to_cpu(udev->parent->bos->ss_cap->bU2DevExitLat); usb_set_lpm_mel(udev, &udev->u1_params, udev_u1_del, hub, &udev->parent->u1_params, hub_u1_del); usb_set_lpm_mel(udev, &udev->u2_params, udev_u2_del, hub, &udev->parent->u2_params, hub_u2_del); /* * Appendix C, section C.2.2.2, says that there is a slight delay from * when the parent hub notices the downstream port is trying to * transition to U0 to when the hub initiates a U0 transition on its * upstream port. The section says the delays are tPort2PortU1EL and * tPort2PortU2EL, but it doesn't define what they are. * * The hub chapter, sections 10.4.2.4 and 10.4.2.5 seem to be talking * about the same delays. Use the maximum delay calculations from those * sections. For U1, it's tHubPort2PortExitLat, which is 1us max. For * U2, it's tHubPort2PortExitLat + U2DevExitLat - U1DevExitLat. I * assume the device exit latencies they are talking about are the hub * exit latencies. * * What do we do if the U2 exit latency is less than the U1 exit * latency? It's possible, although not likely... */ port_to_port_delay = 1; usb_set_lpm_pel(udev, &udev->u1_params, udev_u1_del, hub, &udev->parent->u1_params, hub_u1_del, port_to_port_delay); if (hub_u2_del > hub_u1_del) port_to_port_delay = 1 + hub_u2_del - hub_u1_del; else port_to_port_delay = 1 + hub_u1_del; usb_set_lpm_pel(udev, &udev->u2_params, udev_u2_del, hub, &udev->parent->u2_params, hub_u2_del, port_to_port_delay); /* Now that we've got PEL, calculate SEL. */ usb_set_lpm_sel(udev, &udev->u1_params); usb_set_lpm_sel(udev, &udev->u2_params); } /* USB 2.0 spec Section 11.24.4.5 */ static int get_hub_descriptor(struct usb_device *hdev, struct usb_hub_descriptor *desc) { int i, ret, size; unsigned dtype; if (hub_is_superspeed(hdev)) { dtype = USB_DT_SS_HUB; size = USB_DT_SS_HUB_SIZE; } else { dtype = USB_DT_HUB; size = sizeof(struct usb_hub_descriptor); } for (i = 0; i < 3; i++) { ret = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN | USB_RT_HUB, dtype << 8, 0, desc, size, USB_CTRL_GET_TIMEOUT); if (hub_is_superspeed(hdev)) { if (ret == size) return ret; } else if (ret >= USB_DT_HUB_NONVAR_SIZE + 2) { /* Make sure we have the DeviceRemovable field. */ size = USB_DT_HUB_NONVAR_SIZE + desc->bNbrPorts / 8 + 1; if (ret < size) return -EMSGSIZE; return ret; } } return -EINVAL; } /* * USB 2.0 spec Section 11.24.2.1 */ static int clear_hub_feature(struct usb_device *hdev, int feature) { return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), USB_REQ_CLEAR_FEATURE, USB_RT_HUB, feature, 0, NULL, 0, 1000); } /* * USB 2.0 spec Section 11.24.2.2 */ int usb_clear_port_feature(struct usb_device *hdev, int port1, int feature) { return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), USB_REQ_CLEAR_FEATURE, USB_RT_PORT, feature, port1, NULL, 0, 1000); } /* * USB 2.0 spec Section 11.24.2.13 */ static int set_port_feature(struct usb_device *hdev, int port1, int feature) { return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), USB_REQ_SET_FEATURE, USB_RT_PORT, feature, port1, NULL, 0, 1000); } static char *to_led_name(int selector) { switch (selector) { case HUB_LED_AMBER: return "amber"; case HUB_LED_GREEN: return "green"; case HUB_LED_OFF: return "off"; case HUB_LED_AUTO: return "auto"; default: return "??"; } } /* * USB 2.0 spec Section 11.24.2.7.1.10 and table 11-7 * for info about using port indicators */ static void set_port_led(struct usb_hub *hub, int port1, int selector) { struct usb_port *port_dev = hub->ports[port1 - 1]; int status; status = set_port_feature(hub->hdev, (selector << 8) | port1, USB_PORT_FEAT_INDICATOR); dev_dbg(&port_dev->dev, "indicator %s status %d\n", to_led_name(selector), status); } #define LED_CYCLE_PERIOD ((2*HZ)/3) static void led_work(struct work_struct *work) { struct usb_hub *hub = container_of(work, struct usb_hub, leds.work); struct usb_device *hdev = hub->hdev; unsigned i; unsigned changed = 0; int cursor = -1; if (hdev->state != USB_STATE_CONFIGURED || hub->quiescing) return; for (i = 0; i < hdev->maxchild; i++) { unsigned selector, mode; /* 30%-50% duty cycle */ switch (hub->indicator[i]) { /* cycle marker */ case INDICATOR_CYCLE: cursor = i; selector = HUB_LED_AUTO; mode = INDICATOR_AUTO; break; /* blinking green = sw attention */ case INDICATOR_GREEN_BLINK: selector = HUB_LED_GREEN; mode = INDICATOR_GREEN_BLINK_OFF; break; case INDICATOR_GREEN_BLINK_OFF: selector = HUB_LED_OFF; mode = INDICATOR_GREEN_BLINK; break; /* blinking amber = hw attention */ case INDICATOR_AMBER_BLINK: selector = HUB_LED_AMBER; mode = INDICATOR_AMBER_BLINK_OFF; break; case INDICATOR_AMBER_BLINK_OFF: selector = HUB_LED_OFF; mode = INDICATOR_AMBER_BLINK; break; /* blink green/amber = reserved */ case INDICATOR_ALT_BLINK: selector = HUB_LED_GREEN; mode = INDICATOR_ALT_BLINK_OFF; break; case INDICATOR_ALT_BLINK_OFF: selector = HUB_LED_AMBER; mode = INDICATOR_ALT_BLINK; break; default: continue; } if (selector != HUB_LED_AUTO) changed = 1; set_port_led(hub, i + 1, selector); hub->indicator[i] = mode; } if (!changed && blinkenlights) { cursor++; cursor %= hdev->maxchild; set_port_led(hub, cursor + 1, HUB_LED_GREEN); hub->indicator[cursor] = INDICATOR_CYCLE; changed++; } if (changed) queue_delayed_work(system_power_efficient_wq, &hub->leds, LED_CYCLE_PERIOD); } /* use a short timeout for hub/port status fetches */ #define USB_STS_TIMEOUT 1000 #define USB_STS_RETRIES 5 /* * USB 2.0 spec Section 11.24.2.6 */ static int get_hub_status(struct usb_device *hdev, struct usb_hub_status *data) { int i, status = -ETIMEDOUT; for (i = 0; i < USB_STS_RETRIES && (status == -ETIMEDOUT || status == -EPIPE); i++) { status = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | USB_RT_HUB, 0, 0, data, sizeof(*data), USB_STS_TIMEOUT); } return status; } /* * USB 2.0 spec Section 11.24.2.7 * USB 3.1 takes into use the wValue and wLength fields, spec Section 10.16.2.6 */ static int get_port_status(struct usb_device *hdev, int port1, void *data, u16 value, u16 length) { int i, status = -ETIMEDOUT; for (i = 0; i < USB_STS_RETRIES && (status == -ETIMEDOUT || status == -EPIPE); i++) { status = usb_control_msg(hdev, usb_rcvctrlpipe(hdev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | USB_RT_PORT, value, port1, data, length, USB_STS_TIMEOUT); } return status; } static int hub_ext_port_status(struct usb_hub *hub, int port1, int type, u16 *status, u16 *change, u32 *ext_status) { int ret; int len = 4; if (type != HUB_PORT_STATUS) len = 8; mutex_lock(&hub->status_mutex); ret = get_port_status(hub->hdev, port1, &hub->status->port, type, len); if (ret < len) { if (ret != -ENODEV) dev_err(hub->intfdev, "%s failed (err = %d)\n", __func__, ret); if (ret >= 0) ret = -EIO; } else { *status = le16_to_cpu(hub->status->port.wPortStatus); *change = le16_to_cpu(hub->status->port.wPortChange); if (type != HUB_PORT_STATUS && ext_status) *ext_status = le32_to_cpu( hub->status->port.dwExtPortStatus); ret = 0; } mutex_unlock(&hub->status_mutex); /* * There is no need to lock status_mutex here, because status_mutex * protects hub->status, and the phy driver only checks the port * status without changing the status. */ if (!ret) { struct usb_device *hdev = hub->hdev; /* * Only roothub will be notified of connection changes, * since the USB PHY only cares about changes at the next * level. */ if (is_root_hub(hdev)) { struct usb_hcd *hcd = bus_to_hcd(hdev->bus); bool connect; bool connect_change; connect_change = *change & USB_PORT_STAT_C_CONNECTION; connect = *status & USB_PORT_STAT_CONNECTION; if (connect_change && connect) usb_phy_roothub_notify_connect(hcd->phy_roothub, port1 - 1); else if (connect_change) usb_phy_roothub_notify_disconnect(hcd->phy_roothub, port1 - 1); } } return ret; } int usb_hub_port_status(struct usb_hub *hub, int port1, u16 *status, u16 *change) { return hub_ext_port_status(hub, port1, HUB_PORT_STATUS, status, change, NULL); } static void hub_resubmit_irq_urb(struct usb_hub *hub) { unsigned long flags; int status; spin_lock_irqsave(&hub->irq_urb_lock, flags); if (hub->quiescing) { spin_unlock_irqrestore(&hub->irq_urb_lock, flags); return; } status = usb_submit_urb(hub->urb, GFP_ATOMIC); if (status && status != -ENODEV && status != -EPERM && status != -ESHUTDOWN) { dev_err(hub->intfdev, "resubmit --> %d\n", status); mod_timer(&hub->irq_urb_retry, jiffies + HZ); } spin_unlock_irqrestore(&hub->irq_urb_lock, flags); } static void hub_retry_irq_urb(struct timer_list *t) { struct usb_hub *hub = from_timer(hub, t, irq_urb_retry); hub_resubmit_irq_urb(hub); } static void kick_hub_wq(struct usb_hub *hub) { struct usb_interface *intf; if (hub->disconnected || work_pending(&hub->events)) return; /* * Suppress autosuspend until the event is proceed. * * Be careful and make sure that the symmetric operation is * always called. We are here only when there is no pending * work for this hub. Therefore put the interface either when * the new work is called or when it is canceled. */ intf = to_usb_interface(hub->intfdev); usb_autopm_get_interface_no_resume(intf); hub_get(hub); if (queue_work(hub_wq, &hub->events)) return; /* the work has already been scheduled */ usb_autopm_put_interface_async(intf); hub_put(hub); } void usb_kick_hub_wq(struct usb_device *hdev) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); if (hub) kick_hub_wq(hub); } /* * Let the USB core know that a USB 3.0 device has sent a Function Wake Device * Notification, which indicates it had initiated remote wakeup. * * USB 3.0 hubs do not report the port link state change from U3 to U0 when the * device initiates resume, so the USB core will not receive notice of the * resume through the normal hub interrupt URB. */ void usb_wakeup_notification(struct usb_device *hdev, unsigned int portnum) { struct usb_hub *hub; struct usb_port *port_dev; if (!hdev) return; hub = usb_hub_to_struct_hub(hdev); if (hub) { port_dev = hub->ports[portnum - 1]; if (port_dev && port_dev->child) pm_wakeup_event(&port_dev->child->dev, 0); set_bit(portnum, hub->wakeup_bits); kick_hub_wq(hub); } } EXPORT_SYMBOL_GPL(usb_wakeup_notification); /* completion function, fires on port status changes and various faults */ static void hub_irq(struct urb *urb) { struct usb_hub *hub = urb->context; int status = urb->status; unsigned i; unsigned long bits; switch (status) { case -ENOENT: /* synchronous unlink */ case -ECONNRESET: /* async unlink */ case -ESHUTDOWN: /* hardware going away */ return; default: /* presumably an error */ /* Cause a hub reset after 10 consecutive errors */ dev_dbg(hub->intfdev, "transfer --> %d\n", status); if ((++hub->nerrors < 10) || hub->error) goto resubmit; hub->error = status; fallthrough; /* let hub_wq handle things */ case 0: /* we got data: port status changed */ bits = 0; for (i = 0; i < urb->actual_length; ++i) bits |= ((unsigned long) ((*hub->buffer)[i])) << (i*8); hub->event_bits[0] = bits; break; } hub->nerrors = 0; /* Something happened, let hub_wq figure it out */ kick_hub_wq(hub); resubmit: hub_resubmit_irq_urb(hub); } /* USB 2.0 spec Section 11.24.2.3 */ static inline int hub_clear_tt_buffer(struct usb_device *hdev, u16 devinfo, u16 tt) { /* Need to clear both directions for control ep */ if (((devinfo >> 11) & USB_ENDPOINT_XFERTYPE_MASK) == USB_ENDPOINT_XFER_CONTROL) { int status = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo ^ 0x8000, tt, NULL, 0, 1000); if (status) return status; } return usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_CLEAR_TT_BUFFER, USB_RT_PORT, devinfo, tt, NULL, 0, 1000); } /* * enumeration blocks hub_wq for a long time. we use keventd instead, since * long blocking there is the exception, not the rule. accordingly, HCDs * talking to TTs must queue control transfers (not just bulk and iso), so * both can talk to the same hub concurrently. */ static void hub_tt_work(struct work_struct *work) { struct usb_hub *hub = container_of(work, struct usb_hub, tt.clear_work); unsigned long flags; spin_lock_irqsave(&hub->tt.lock, flags); while (!list_empty(&hub->tt.clear_list)) { struct list_head *next; struct usb_tt_clear *clear; struct usb_device *hdev = hub->hdev; const struct hc_driver *drv; int status; next = hub->tt.clear_list.next; clear = list_entry(next, struct usb_tt_clear, clear_list); list_del(&clear->clear_list); /* drop lock so HCD can concurrently report other TT errors */ spin_unlock_irqrestore(&hub->tt.lock, flags); status = hub_clear_tt_buffer(hdev, clear->devinfo, clear->tt); if (status && status != -ENODEV) dev_err(&hdev->dev, "clear tt %d (%04x) error %d\n", clear->tt, clear->devinfo, status); /* Tell the HCD, even if the operation failed */ drv = clear->hcd->driver; if (drv->clear_tt_buffer_complete) (drv->clear_tt_buffer_complete)(clear->hcd, clear->ep); kfree(clear); spin_lock_irqsave(&hub->tt.lock, flags); } spin_unlock_irqrestore(&hub->tt.lock, flags); } /** * usb_hub_set_port_power - control hub port's power state * @hdev: USB device belonging to the usb hub * @hub: target hub * @port1: port index * @set: expected status * * call this function to control port's power via setting or * clearing the port's PORT_POWER feature. * * Return: 0 if successful. A negative error code otherwise. */ int usb_hub_set_port_power(struct usb_device *hdev, struct usb_hub *hub, int port1, bool set) { int ret; if (set) ret = set_port_feature(hdev, port1, USB_PORT_FEAT_POWER); else ret = usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_POWER); if (ret) return ret; if (set) set_bit(port1, hub->power_bits); else clear_bit(port1, hub->power_bits); return 0; } /** * usb_hub_clear_tt_buffer - clear control/bulk TT state in high speed hub * @urb: an URB associated with the failed or incomplete split transaction * * High speed HCDs use this to tell the hub driver that some split control or * bulk transaction failed in a way that requires clearing internal state of * a transaction translator. This is normally detected (and reported) from * interrupt context. * * It may not be possible for that hub to handle additional full (or low) * speed transactions until that state is fully cleared out. * * Return: 0 if successful. A negative error code otherwise. */ int usb_hub_clear_tt_buffer(struct urb *urb) { struct usb_device *udev = urb->dev; int pipe = urb->pipe; struct usb_tt *tt = udev->tt; unsigned long flags; struct usb_tt_clear *clear; /* we've got to cope with an arbitrary number of pending TT clears, * since each TT has "at least two" buffers that can need it (and * there can be many TTs per hub). even if they're uncommon. */ clear = kmalloc(sizeof *clear, GFP_ATOMIC); if (clear == NULL) { dev_err(&udev->dev, "can't save CLEAR_TT_BUFFER state\n"); /* FIXME recover somehow ... RESET_TT? */ return -ENOMEM; } /* info that CLEAR_TT_BUFFER needs */ clear->tt = tt->multi ? udev->ttport : 1; clear->devinfo = usb_pipeendpoint (pipe); clear->devinfo |= ((u16)udev->devaddr) << 4; clear->devinfo |= usb_pipecontrol(pipe) ? (USB_ENDPOINT_XFER_CONTROL << 11) : (USB_ENDPOINT_XFER_BULK << 11); if (usb_pipein(pipe)) clear->devinfo |= 1 << 15; /* info for completion callback */ clear->hcd = bus_to_hcd(udev->bus); clear->ep = urb->ep; /* tell keventd to clear state for this TT */ spin_lock_irqsave(&tt->lock, flags); list_add_tail(&clear->clear_list, &tt->clear_list); schedule_work(&tt->clear_work); spin_unlock_irqrestore(&tt->lock, flags); return 0; } EXPORT_SYMBOL_GPL(usb_hub_clear_tt_buffer); static void hub_power_on(struct usb_hub *hub, bool do_delay) { int port1; /* Enable power on each port. Some hubs have reserved values * of LPSM (> 2) in their descriptors, even though they are * USB 2.0 hubs. Some hubs do not implement port-power switching * but only emulate it. In all cases, the ports won't work * unless we send these messages to the hub. */ if (hub_is_port_power_switchable(hub)) dev_dbg(hub->intfdev, "enabling power on all ports\n"); else dev_dbg(hub->intfdev, "trying to enable port power on " "non-switchable hub\n"); for (port1 = 1; port1 <= hub->hdev->maxchild; port1++) if (test_bit(port1, hub->power_bits)) set_port_feature(hub->hdev, port1, USB_PORT_FEAT_POWER); else usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_POWER); if (do_delay) msleep(hub_power_on_good_delay(hub)); } static int hub_hub_status(struct usb_hub *hub, u16 *status, u16 *change) { int ret; mutex_lock(&hub->status_mutex); ret = get_hub_status(hub->hdev, &hub->status->hub); if (ret < 0) { if (ret != -ENODEV) dev_err(hub->intfdev, "%s failed (err = %d)\n", __func__, ret); } else { *status = le16_to_cpu(hub->status->hub.wHubStatus); *change = le16_to_cpu(hub->status->hub.wHubChange); ret = 0; } mutex_unlock(&hub->status_mutex); return ret; } static int hub_set_port_link_state(struct usb_hub *hub, int port1, unsigned int link_status) { return set_port_feature(hub->hdev, port1 | (link_status << 3), USB_PORT_FEAT_LINK_STATE); } /* * Disable a port and mark a logical connect-change event, so that some * time later hub_wq will disconnect() any existing usb_device on the port * and will re-enumerate if there actually is a device attached. */ static void hub_port_logical_disconnect(struct usb_hub *hub, int port1) { dev_dbg(&hub->ports[port1 - 1]->dev, "logical disconnect\n"); hub_port_disable(hub, port1, 1); /* FIXME let caller ask to power down the port: * - some devices won't enumerate without a VBUS power cycle * - SRP saves power that way * - ... new call, TBD ... * That's easy if this hub can switch power per-port, and * hub_wq reactivates the port later (timer, SRP, etc). * Powerdown must be optional, because of reset/DFU. */ set_bit(port1, hub->change_bits); kick_hub_wq(hub); } /** * usb_remove_device - disable a device's port on its parent hub * @udev: device to be disabled and removed * Context: @udev locked, must be able to sleep. * * After @udev's port has been disabled, hub_wq is notified and it will * see that the device has been disconnected. When the device is * physically unplugged and something is plugged in, the events will * be received and processed normally. * * Return: 0 if successful. A negative error code otherwise. */ int usb_remove_device(struct usb_device *udev) { struct usb_hub *hub; struct usb_interface *intf; int ret; if (!udev->parent) /* Can't remove a root hub */ return -EINVAL; hub = usb_hub_to_struct_hub(udev->parent); intf = to_usb_interface(hub->intfdev); ret = usb_autopm_get_interface(intf); if (ret < 0) return ret; set_bit(udev->portnum, hub->removed_bits); hub_port_logical_disconnect(hub, udev->portnum); usb_autopm_put_interface(intf); return 0; } enum hub_activation_type { HUB_INIT, HUB_INIT2, HUB_INIT3, /* INITs must come first */ HUB_POST_RESET, HUB_RESUME, HUB_RESET_RESUME, }; static void hub_init_func2(struct work_struct *ws); static void hub_init_func3(struct work_struct *ws); static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) { struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd; int ret; int port1; int status; bool need_debounce_delay = false; unsigned delay; /* Continue a partial initialization */ if (type == HUB_INIT2 || type == HUB_INIT3) { device_lock(&hdev->dev); /* Was the hub disconnected while we were waiting? */ if (hub->disconnected) goto disconnected; if (type == HUB_INIT2) goto init2; goto init3; } hub_get(hub); /* The superspeed hub except for root hub has to use Hub Depth * value as an offset into the route string to locate the bits * it uses to determine the downstream port number. So hub driver * should send a set hub depth request to superspeed hub after * the superspeed hub is set configuration in initialization or * reset procedure. * * After a resume, port power should still be on. * For any other type of activation, turn it on. */ if (type != HUB_RESUME) { if (hdev->parent && hub_is_superspeed(hdev)) { ret = usb_control_msg(hdev, usb_sndctrlpipe(hdev, 0), HUB_SET_DEPTH, USB_RT_HUB, hdev->level - 1, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (ret < 0) dev_err(hub->intfdev, "set hub depth failed\n"); } /* Speed up system boot by using a delayed_work for the * hub's initial power-up delays. This is pretty awkward * and the implementation looks like a home-brewed sort of * setjmp/longjmp, but it saves at least 100 ms for each * root hub (assuming usbcore is compiled into the kernel * rather than as a module). It adds up. * * This can't be done for HUB_RESUME or HUB_RESET_RESUME * because for those activation types the ports have to be * operational when we return. In theory this could be done * for HUB_POST_RESET, but it's easier not to. */ if (type == HUB_INIT) { delay = hub_power_on_good_delay(hub); hub_power_on(hub, false); INIT_DELAYED_WORK(&hub->init_work, hub_init_func2); queue_delayed_work(system_power_efficient_wq, &hub->init_work, msecs_to_jiffies(delay)); /* Suppress autosuspend until init is done */ usb_autopm_get_interface_no_resume( to_usb_interface(hub->intfdev)); return; /* Continues at init2: below */ } else if (type == HUB_RESET_RESUME) { /* The internal host controller state for the hub device * may be gone after a host power loss on system resume. * Update the device's info so the HW knows it's a hub. */ hcd = bus_to_hcd(hdev->bus); if (hcd->driver->update_hub_device) { ret = hcd->driver->update_hub_device(hcd, hdev, &hub->tt, GFP_NOIO); if (ret < 0) { dev_err(hub->intfdev, "Host not accepting hub info update\n"); dev_err(hub->intfdev, "LS/FS devices and hubs may not work under this hub\n"); } } hub_power_on(hub, true); } else { hub_power_on(hub, true); } /* Give some time on remote wakeup to let links to transit to U0 */ } else if (hub_is_superspeed(hub->hdev)) msleep(20); init2: /* * Check each port and set hub->change_bits to let hub_wq know * which ports need attention. */ for (port1 = 1; port1 <= hdev->maxchild; ++port1) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; u16 portstatus, portchange; portstatus = portchange = 0; status = usb_hub_port_status(hub, port1, &portstatus, &portchange); if (status) goto abort; if (udev || (portstatus & USB_PORT_STAT_CONNECTION)) dev_dbg(&port_dev->dev, "status %04x change %04x\n", portstatus, portchange); /* * After anything other than HUB_RESUME (i.e., initialization * or any sort of reset), every port should be disabled. * Unconnected ports should likewise be disabled (paranoia), * and so should ports for which we have no usb_device. */ if ((portstatus & USB_PORT_STAT_ENABLE) && ( type != HUB_RESUME || !(portstatus & USB_PORT_STAT_CONNECTION) || !udev || udev->state == USB_STATE_NOTATTACHED)) { /* * USB3 protocol ports will automatically transition * to Enabled state when detect an USB3.0 device attach. * Do not disable USB3 protocol ports, just pretend * power was lost */ portstatus &= ~USB_PORT_STAT_ENABLE; if (!hub_is_superspeed(hdev)) usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_ENABLE); } /* Make sure a warm-reset request is handled by port_event */ if (type == HUB_RESUME && hub_port_warm_reset_required(hub, port1, portstatus)) set_bit(port1, hub->event_bits); /* * Add debounce if USB3 link is in polling/link training state. * Link will automatically transition to Enabled state after * link training completes. */ if (hub_is_superspeed(hdev) && ((portstatus & USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_POLLING)) need_debounce_delay = true; /* Clear status-change flags; we'll debounce later */ if (portchange & USB_PORT_STAT_C_CONNECTION) { need_debounce_delay = true; usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); } if (portchange & USB_PORT_STAT_C_ENABLE) { need_debounce_delay = true; usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_ENABLE); } if (portchange & USB_PORT_STAT_C_RESET) { need_debounce_delay = true; usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_RESET); } if ((portchange & USB_PORT_STAT_C_BH_RESET) && hub_is_superspeed(hub->hdev)) { need_debounce_delay = true; usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_BH_PORT_RESET); } /* We can forget about a "removed" device when there's a * physical disconnect or the connect status changes. */ if (!(portstatus & USB_PORT_STAT_CONNECTION) || (portchange & USB_PORT_STAT_C_CONNECTION)) clear_bit(port1, hub->removed_bits); if (!udev || udev->state == USB_STATE_NOTATTACHED) { /* Tell hub_wq to disconnect the device or * check for a new connection or over current condition. * Based on USB2.0 Spec Section 11.12.5, * C_PORT_OVER_CURRENT could be set while * PORT_OVER_CURRENT is not. So check for any of them. */ if (udev || (portstatus & USB_PORT_STAT_CONNECTION) || (portchange & USB_PORT_STAT_C_CONNECTION) || (portstatus & USB_PORT_STAT_OVERCURRENT) || (portchange & USB_PORT_STAT_C_OVERCURRENT)) set_bit(port1, hub->change_bits); } else if (portstatus & USB_PORT_STAT_ENABLE) { bool port_resumed = (portstatus & USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_U0; /* The power session apparently survived the resume. * If there was an overcurrent or suspend change * (i.e., remote wakeup request), have hub_wq * take care of it. Look at the port link state * for USB 3.0 hubs, since they don't have a suspend * change bit, and they don't set the port link change * bit on device-initiated resume. */ if (portchange || (hub_is_superspeed(hub->hdev) && port_resumed)) set_bit(port1, hub->event_bits); } else if (udev->persist_enabled) { #ifdef CONFIG_PM udev->reset_resume = 1; #endif /* Don't set the change_bits when the device * was powered off. */ if (test_bit(port1, hub->power_bits)) set_bit(port1, hub->change_bits); } else { /* The power session is gone; tell hub_wq */ usb_set_device_state(udev, USB_STATE_NOTATTACHED); set_bit(port1, hub->change_bits); } } /* If no port-status-change flags were set, we don't need any * debouncing. If flags were set we can try to debounce the * ports all at once right now, instead of letting hub_wq do them * one at a time later on. * * If any port-status changes do occur during this delay, hub_wq * will see them later and handle them normally. */ if (need_debounce_delay) { delay = HUB_DEBOUNCE_STABLE; /* Don't do a long sleep inside a workqueue routine */ if (type == HUB_INIT2) { INIT_DELAYED_WORK(&hub->init_work, hub_init_func3); queue_delayed_work(system_power_efficient_wq, &hub->init_work, msecs_to_jiffies(delay)); device_unlock(&hdev->dev); return; /* Continues at init3: below */ } else { msleep(delay); } } init3: hub->quiescing = 0; status = usb_submit_urb(hub->urb, GFP_NOIO); if (status < 0) dev_err(hub->intfdev, "activate --> %d\n", status); if (hub->has_indicators && blinkenlights) queue_delayed_work(system_power_efficient_wq, &hub->leds, LED_CYCLE_PERIOD); /* Scan all ports that need attention */ kick_hub_wq(hub); abort: if (type == HUB_INIT2 || type == HUB_INIT3) { /* Allow autosuspend if it was suppressed */ disconnected: usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); device_unlock(&hdev->dev); } hub_put(hub); } /* Implement the continuations for the delays above */ static void hub_init_func2(struct work_struct *ws) { struct usb_hub *hub = container_of(ws, struct usb_hub, init_work.work); hub_activate(hub, HUB_INIT2); } static void hub_init_func3(struct work_struct *ws) { struct usb_hub *hub = container_of(ws, struct usb_hub, init_work.work); hub_activate(hub, HUB_INIT3); } enum hub_quiescing_type { HUB_DISCONNECT, HUB_PRE_RESET, HUB_SUSPEND }; static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) { struct usb_device *hdev = hub->hdev; unsigned long flags; int i; /* hub_wq and related activity won't re-trigger */ spin_lock_irqsave(&hub->irq_urb_lock, flags); hub->quiescing = 1; spin_unlock_irqrestore(&hub->irq_urb_lock, flags); if (type != HUB_SUSPEND) { /* Disconnect all the children */ for (i = 0; i < hdev->maxchild; ++i) { if (hub->ports[i]->child) usb_disconnect(&hub->ports[i]->child); } } /* Stop hub_wq and related activity */ timer_delete_sync(&hub->irq_urb_retry); usb_kill_urb(hub->urb); if (hub->has_indicators) cancel_delayed_work_sync(&hub->leds); if (hub->tt.hub) flush_work(&hub->tt.clear_work); } static void hub_pm_barrier_for_all_ports(struct usb_hub *hub) { int i; for (i = 0; i < hub->hdev->maxchild; ++i) pm_runtime_barrier(&hub->ports[i]->dev); } /* caller has locked the hub device */ static int hub_pre_reset(struct usb_interface *intf) { struct usb_hub *hub = usb_get_intfdata(intf); hub_quiesce(hub, HUB_PRE_RESET); hub->in_reset = 1; hub_pm_barrier_for_all_ports(hub); return 0; } /* caller has locked the hub device */ static int hub_post_reset(struct usb_interface *intf) { struct usb_hub *hub = usb_get_intfdata(intf); hub->in_reset = 0; hub_pm_barrier_for_all_ports(hub); hub_activate(hub, HUB_POST_RESET); return 0; } static int hub_configure(struct usb_hub *hub, struct usb_endpoint_descriptor *endpoint) { struct usb_hcd *hcd; struct usb_device *hdev = hub->hdev; struct device *hub_dev = hub->intfdev; u16 hubstatus, hubchange; u16 wHubCharacteristics; unsigned int pipe; int maxp, ret, i; char *message = "out of memory"; unsigned unit_load; unsigned full_load; unsigned maxchild; hub->buffer = kmalloc(sizeof(*hub->buffer), GFP_KERNEL); if (!hub->buffer) { ret = -ENOMEM; goto fail; } hub->status = kmalloc(sizeof(*hub->status), GFP_KERNEL); if (!hub->status) { ret = -ENOMEM; goto fail; } mutex_init(&hub->status_mutex); hub->descriptor = kzalloc(sizeof(*hub->descriptor), GFP_KERNEL); if (!hub->descriptor) { ret = -ENOMEM; goto fail; } /* Request the entire hub descriptor. * hub->descriptor can handle USB_MAXCHILDREN ports, * but a (non-SS) hub can/will return fewer bytes here. */ ret = get_hub_descriptor(hdev, hub->descriptor); if (ret < 0) { message = "can't read hub descriptor"; goto fail; } maxchild = USB_MAXCHILDREN; if (hub_is_superspeed(hdev)) maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS); if (hub->descriptor->bNbrPorts > maxchild) { message = "hub has too many ports!"; ret = -ENODEV; goto fail; } else if (hub->descriptor->bNbrPorts == 0) { message = "hub doesn't have any ports!"; ret = -ENODEV; goto fail; } /* * Accumulate wHubDelay + 40ns for every hub in the tree of devices. * The resulting value will be used for SetIsochDelay() request. */ if (hub_is_superspeed(hdev) || hub_is_superspeedplus(hdev)) { u32 delay = __le16_to_cpu(hub->descriptor->u.ss.wHubDelay); if (hdev->parent) delay += hdev->parent->hub_delay; delay += USB_TP_TRANSMISSION_DELAY; hdev->hub_delay = min_t(u32, delay, USB_TP_TRANSMISSION_DELAY_MAX); } maxchild = hub->descriptor->bNbrPorts; dev_info(hub_dev, "%d port%s detected\n", maxchild, str_plural(maxchild)); hub->ports = kcalloc(maxchild, sizeof(struct usb_port *), GFP_KERNEL); if (!hub->ports) { ret = -ENOMEM; goto fail; } wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics); if (hub_is_superspeed(hdev)) { unit_load = 150; full_load = 900; } else { unit_load = 100; full_load = 500; } /* FIXME for USB 3.0, skip for now */ if ((wHubCharacteristics & HUB_CHAR_COMPOUND) && !(hub_is_superspeed(hdev))) { char portstr[USB_MAXCHILDREN + 1]; for (i = 0; i < maxchild; i++) portstr[i] = hub->descriptor->u.hs.DeviceRemovable [((i + 1) / 8)] & (1 << ((i + 1) % 8)) ? 'F' : 'R'; portstr[maxchild] = 0; dev_dbg(hub_dev, "compound device; port removable status: %s\n", portstr); } else dev_dbg(hub_dev, "standalone hub\n"); switch (wHubCharacteristics & HUB_CHAR_LPSM) { case HUB_CHAR_COMMON_LPSM: dev_dbg(hub_dev, "ganged power switching\n"); break; case HUB_CHAR_INDV_PORT_LPSM: dev_dbg(hub_dev, "individual port power switching\n"); break; case HUB_CHAR_NO_LPSM: case HUB_CHAR_LPSM: dev_dbg(hub_dev, "no power switching (usb 1.0)\n"); break; } switch (wHubCharacteristics & HUB_CHAR_OCPM) { case HUB_CHAR_COMMON_OCPM: dev_dbg(hub_dev, "global over-current protection\n"); break; case HUB_CHAR_INDV_PORT_OCPM: dev_dbg(hub_dev, "individual port over-current protection\n"); break; case HUB_CHAR_NO_OCPM: case HUB_CHAR_OCPM: dev_dbg(hub_dev, "no over-current protection\n"); break; } spin_lock_init(&hub->tt.lock); INIT_LIST_HEAD(&hub->tt.clear_list); INIT_WORK(&hub->tt.clear_work, hub_tt_work); switch (hdev->descriptor.bDeviceProtocol) { case USB_HUB_PR_FS: break; case USB_HUB_PR_HS_SINGLE_TT: dev_dbg(hub_dev, "Single TT\n"); hub->tt.hub = hdev; break; case USB_HUB_PR_HS_MULTI_TT: ret = usb_set_interface(hdev, 0, 1); if (ret == 0) { dev_dbg(hub_dev, "TT per port\n"); hub->tt.multi = 1; } else dev_err(hub_dev, "Using single TT (err %d)\n", ret); hub->tt.hub = hdev; break; case USB_HUB_PR_SS: /* USB 3.0 hubs don't have a TT */ break; default: dev_dbg(hub_dev, "Unrecognized hub protocol %d\n", hdev->descriptor.bDeviceProtocol); break; } /* Note 8 FS bit times == (8 bits / 12000000 bps) ~= 666ns */ switch (wHubCharacteristics & HUB_CHAR_TTTT) { case HUB_TTTT_8_BITS: if (hdev->descriptor.bDeviceProtocol != 0) { hub->tt.think_time = 666; dev_dbg(hub_dev, "TT requires at most %d " "FS bit times (%d ns)\n", 8, hub->tt.think_time); } break; case HUB_TTTT_16_BITS: hub->tt.think_time = 666 * 2; dev_dbg(hub_dev, "TT requires at most %d " "FS bit times (%d ns)\n", 16, hub->tt.think_time); break; case HUB_TTTT_24_BITS: hub->tt.think_time = 666 * 3; dev_dbg(hub_dev, "TT requires at most %d " "FS bit times (%d ns)\n", 24, hub->tt.think_time); break; case HUB_TTTT_32_BITS: hub->tt.think_time = 666 * 4; dev_dbg(hub_dev, "TT requires at most %d " "FS bit times (%d ns)\n", 32, hub->tt.think_time); break; } /* probe() zeroes hub->indicator[] */ if (wHubCharacteristics & HUB_CHAR_PORTIND) { hub->has_indicators = 1; dev_dbg(hub_dev, "Port indicators are supported\n"); } dev_dbg(hub_dev, "power on to power good time: %dms\n", hub->descriptor->bPwrOn2PwrGood * 2); /* power budgeting mostly matters with bus-powered hubs, * and battery-powered root hubs (may provide just 8 mA). */ ret = usb_get_std_status(hdev, USB_RECIP_DEVICE, 0, &hubstatus); if (ret) { message = "can't get hub status"; goto fail; } hcd = bus_to_hcd(hdev->bus); if (hdev == hdev->bus->root_hub) { if (hcd->power_budget > 0) hdev->bus_mA = hcd->power_budget; else hdev->bus_mA = full_load * maxchild; if (hdev->bus_mA >= full_load) hub->mA_per_port = full_load; else { hub->mA_per_port = hdev->bus_mA; hub->limited_power = 1; } } else if ((hubstatus & (1 << USB_DEVICE_SELF_POWERED)) == 0) { int remaining = hdev->bus_mA - hub->descriptor->bHubContrCurrent; dev_dbg(hub_dev, "hub controller current requirement: %dmA\n", hub->descriptor->bHubContrCurrent); hub->limited_power = 1; if (remaining < maxchild * unit_load) dev_warn(hub_dev, "insufficient power available " "to use all downstream ports\n"); hub->mA_per_port = unit_load; /* 7.2.1 */ } else { /* Self-powered external hub */ /* FIXME: What about battery-powered external hubs that * provide less current per port? */ hub->mA_per_port = full_load; } if (hub->mA_per_port < full_load) dev_dbg(hub_dev, "%umA bus power budget for each child\n", hub->mA_per_port); ret = hub_hub_status(hub, &hubstatus, &hubchange); if (ret < 0) { message = "can't get hub status"; goto fail; } /* local power status reports aren't always correct */ if (hdev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_SELFPOWER) dev_dbg(hub_dev, "local power source is %s\n", (hubstatus & HUB_STATUS_LOCAL_POWER) ? "lost (inactive)" : "good"); if ((wHubCharacteristics & HUB_CHAR_OCPM) == 0) dev_dbg(hub_dev, "%sover-current condition exists\n", (hubstatus & HUB_STATUS_OVERCURRENT) ? "" : "no "); /* set up the interrupt endpoint * We use the EP's maxpacket size instead of (PORTS+1+7)/8 * bytes as USB2.0[11.12.3] says because some hubs are known * to send more data (and thus cause overflow). For root hubs, * maxpktsize is defined in hcd.c's fake endpoint descriptors * to be big enough for at least USB_MAXCHILDREN ports. */ pipe = usb_rcvintpipe(hdev, endpoint->bEndpointAddress); maxp = usb_maxpacket(hdev, pipe); if (maxp > sizeof(*hub->buffer)) maxp = sizeof(*hub->buffer); hub->urb = usb_alloc_urb(0, GFP_KERNEL); if (!hub->urb) { ret = -ENOMEM; goto fail; } usb_fill_int_urb(hub->urb, hdev, pipe, *hub->buffer, maxp, hub_irq, hub, endpoint->bInterval); /* maybe cycle the hub leds */ if (hub->has_indicators && blinkenlights) hub->indicator[0] = INDICATOR_CYCLE; mutex_lock(&usb_port_peer_mutex); for (i = 0; i < maxchild; i++) { ret = usb_hub_create_port_device(hub, i + 1); if (ret < 0) { dev_err(hub->intfdev, "couldn't create port%d device.\n", i + 1); break; } } hdev->maxchild = i; for (i = 0; i < hdev->maxchild; i++) { struct usb_port *port_dev = hub->ports[i]; pm_runtime_put(&port_dev->dev); } mutex_unlock(&usb_port_peer_mutex); if (ret < 0) goto fail; /* Update the HCD's internal representation of this hub before hub_wq * starts getting port status changes for devices under the hub. */ if (hcd->driver->update_hub_device) { ret = hcd->driver->update_hub_device(hcd, hdev, &hub->tt, GFP_KERNEL); if (ret < 0) { message = "can't update HCD hub info"; goto fail; } } usb_hub_adjust_deviceremovable(hdev, hub->descriptor); hub_activate(hub, HUB_INIT); return 0; fail: dev_err(hub_dev, "config failed, %s (err %d)\n", message, ret); /* hub_disconnect() frees urb and descriptor */ return ret; } static void hub_release(struct kref *kref) { struct usb_hub *hub = container_of(kref, struct usb_hub, kref); usb_put_dev(hub->hdev); usb_put_intf(to_usb_interface(hub->intfdev)); kfree(hub); } void hub_get(struct usb_hub *hub) { kref_get(&hub->kref); } void hub_put(struct usb_hub *hub) { kref_put(&hub->kref, hub_release); } static unsigned highspeed_hubs; static void hub_disconnect(struct usb_interface *intf) { struct usb_hub *hub = usb_get_intfdata(intf); struct usb_device *hdev = interface_to_usbdev(intf); int port1; /* * Stop adding new hub events. We do not want to block here and thus * will not try to remove any pending work item. */ hub->disconnected = 1; /* Disconnect all children and quiesce the hub */ hub->error = 0; hub_quiesce(hub, HUB_DISCONNECT); mutex_lock(&usb_port_peer_mutex); /* Avoid races with recursively_mark_NOTATTACHED() */ spin_lock_irq(&device_state_lock); port1 = hdev->maxchild; hdev->maxchild = 0; usb_set_intfdata(intf, NULL); spin_unlock_irq(&device_state_lock); for (; port1 > 0; --port1) usb_hub_remove_port_device(hub, port1); mutex_unlock(&usb_port_peer_mutex); if (hub->hdev->speed == USB_SPEED_HIGH) highspeed_hubs--; usb_free_urb(hub->urb); kfree(hub->ports); kfree(hub->descriptor); kfree(hub->status); kfree(hub->buffer); pm_suspend_ignore_children(&intf->dev, false); if (hub->quirk_disable_autosuspend) usb_autopm_put_interface(intf); onboard_dev_destroy_pdevs(&hub->onboard_devs); hub_put(hub); } static bool hub_descriptor_is_sane(struct usb_host_interface *desc) { /* Some hubs have a subclass of 1, which AFAICT according to the */ /* specs is not defined, but it works */ if (desc->desc.bInterfaceSubClass != 0 && desc->desc.bInterfaceSubClass != 1) return false; /* Multiple endpoints? What kind of mutant ninja-hub is this? */ if (desc->desc.bNumEndpoints != 1) return false; /* If the first endpoint is not interrupt IN, we'd better punt! */ if (!usb_endpoint_is_int_in(&desc->endpoint[0].desc)) return false; return true; } static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_host_interface *desc; struct usb_device *hdev; struct usb_hub *hub; desc = intf->cur_altsetting; hdev = interface_to_usbdev(intf); /* * The USB 2.0 spec prohibits hubs from having more than one * configuration or interface, and we rely on this prohibition. * Refuse to accept a device that violates it. */ if (hdev->descriptor.bNumConfigurations > 1 || hdev->actconfig->desc.bNumInterfaces > 1) { dev_err(&intf->dev, "Invalid hub with more than one config or interface\n"); return -EINVAL; } /* * Set default autosuspend delay as 0 to speedup bus suspend, * based on the below considerations: * * - Unlike other drivers, the hub driver does not rely on the * autosuspend delay to provide enough time to handle a wakeup * event, and the submitted status URB is just to check future * change on hub downstream ports, so it is safe to do it. * * - The patch might cause one or more auto supend/resume for * below very rare devices when they are plugged into hub * first time: * * devices having trouble initializing, and disconnect * themselves from the bus and then reconnect a second * or so later * * devices just for downloading firmware, and disconnects * themselves after completing it * * For these quite rare devices, their drivers may change the * autosuspend delay of their parent hub in the probe() to one * appropriate value to avoid the subtle problem if someone * does care it. * * - The patch may cause one or more auto suspend/resume on * hub during running 'lsusb', but it is probably too * infrequent to worry about. * * - Change autosuspend delay of hub can avoid unnecessary auto * suspend timer for hub, also may decrease power consumption * of USB bus. * * - If user has indicated to prevent autosuspend by passing * usbcore.autosuspend = -1 then keep autosuspend disabled. */ #ifdef CONFIG_PM if (hdev->dev.power.autosuspend_delay >= 0) pm_runtime_set_autosuspend_delay(&hdev->dev, 0); #endif /* * Hubs have proper suspend/resume support, except for root hubs * where the controller driver doesn't have bus_suspend and * bus_resume methods. */ if (hdev->parent) { /* normal device */ usb_enable_autosuspend(hdev); } else { /* root hub */ const struct hc_driver *drv = bus_to_hcd(hdev->bus)->driver; if (drv->bus_suspend && drv->bus_resume) usb_enable_autosuspend(hdev); } if (hdev->level == MAX_TOPO_LEVEL) { dev_err(&intf->dev, "Unsupported bus topology: hub nested too deep\n"); return -E2BIG; } #ifdef CONFIG_USB_OTG_DISABLE_EXTERNAL_HUB if (hdev->parent) { dev_warn(&intf->dev, "ignoring external hub\n"); return -ENODEV; } #endif if (!hub_descriptor_is_sane(desc)) { dev_err(&intf->dev, "bad descriptor, ignoring hub\n"); return -EIO; } /* We found a hub */ dev_info(&intf->dev, "USB hub found\n"); hub = kzalloc(sizeof(*hub), GFP_KERNEL); if (!hub) return -ENOMEM; kref_init(&hub->kref); hub->intfdev = &intf->dev; hub->hdev = hdev; INIT_DELAYED_WORK(&hub->leds, led_work); INIT_DELAYED_WORK(&hub->init_work, NULL); INIT_WORK(&hub->events, hub_event); INIT_LIST_HEAD(&hub->onboard_devs); spin_lock_init(&hub->irq_urb_lock); timer_setup(&hub->irq_urb_retry, hub_retry_irq_urb, 0); usb_get_intf(intf); usb_get_dev(hdev); usb_set_intfdata(intf, hub); intf->needs_remote_wakeup = 1; pm_suspend_ignore_children(&intf->dev, true); if (hdev->speed == USB_SPEED_HIGH) highspeed_hubs++; if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND) hub->quirk_check_port_auto_suspend = 1; if (id->driver_info & HUB_QUIRK_DISABLE_AUTOSUSPEND) { hub->quirk_disable_autosuspend = 1; usb_autopm_get_interface_no_resume(intf); } if ((id->driver_info & HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL) && desc->endpoint[0].desc.bInterval > USB_REDUCE_FRAME_INTR_BINTERVAL) { desc->endpoint[0].desc.bInterval = USB_REDUCE_FRAME_INTR_BINTERVAL; /* Tell the HCD about the interrupt ep's new bInterval */ usb_set_interface(hdev, 0, 0); } if (hub_configure(hub, &desc->endpoint[0].desc) >= 0) { onboard_dev_create_pdevs(hdev, &hub->onboard_devs); return 0; } hub_disconnect(intf); return -ENODEV; } static int hub_ioctl(struct usb_interface *intf, unsigned int code, void *user_data) { struct usb_device *hdev = interface_to_usbdev(intf); struct usb_hub *hub = usb_hub_to_struct_hub(hdev); /* assert ifno == 0 (part of hub spec) */ switch (code) { case USBDEVFS_HUB_PORTINFO: { struct usbdevfs_hub_portinfo *info = user_data; int i; spin_lock_irq(&device_state_lock); if (hdev->devnum <= 0) info->nports = 0; else { info->nports = hdev->maxchild; for (i = 0; i < info->nports; i++) { if (hub->ports[i]->child == NULL) info->port[i] = 0; else info->port[i] = hub->ports[i]->child->devnum; } } spin_unlock_irq(&device_state_lock); return info->nports + 1; } default: return -ENOSYS; } } /* * Allow user programs to claim ports on a hub. When a device is attached * to one of these "claimed" ports, the program will "own" the device. */ static int find_port_owner(struct usb_device *hdev, unsigned port1, struct usb_dev_state ***ppowner) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); if (hdev->state == USB_STATE_NOTATTACHED) return -ENODEV; if (port1 == 0 || port1 > hdev->maxchild) return -EINVAL; /* Devices not managed by the hub driver * will always have maxchild equal to 0. */ *ppowner = &(hub->ports[port1 - 1]->port_owner); return 0; } /* In the following three functions, the caller must hold hdev's lock */ int usb_hub_claim_port(struct usb_device *hdev, unsigned port1, struct usb_dev_state *owner) { int rc; struct usb_dev_state **powner; rc = find_port_owner(hdev, port1, &powner); if (rc) return rc; if (*powner) return -EBUSY; *powner = owner; return rc; } EXPORT_SYMBOL_GPL(usb_hub_claim_port); int usb_hub_release_port(struct usb_device *hdev, unsigned port1, struct usb_dev_state *owner) { int rc; struct usb_dev_state **powner; rc = find_port_owner(hdev, port1, &powner); if (rc) return rc; if (*powner != owner) return -ENOENT; *powner = NULL; return rc; } EXPORT_SYMBOL_GPL(usb_hub_release_port); void usb_hub_release_all_ports(struct usb_device *hdev, struct usb_dev_state *owner) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); int n; for (n = 0; n < hdev->maxchild; n++) { if (hub->ports[n]->port_owner == owner) hub->ports[n]->port_owner = NULL; } } /* The caller must hold udev's lock */ bool usb_device_is_owned(struct usb_device *udev) { struct usb_hub *hub; if (udev->state == USB_STATE_NOTATTACHED || !udev->parent) return false; hub = usb_hub_to_struct_hub(udev->parent); return !!hub->ports[udev->portnum - 1]->port_owner; } static void update_port_device_state(struct usb_device *udev) { struct usb_hub *hub; struct usb_port *port_dev; if (udev->parent) { hub = usb_hub_to_struct_hub(udev->parent); /* * The Link Layer Validation System Driver (lvstest) * has a test step to unbind the hub before running the * rest of the procedure. This triggers hub_disconnect * which will set the hub's maxchild to 0, further * resulting in usb_hub_to_struct_hub returning NULL. */ if (hub) { port_dev = hub->ports[udev->portnum - 1]; WRITE_ONCE(port_dev->state, udev->state); sysfs_notify_dirent(port_dev->state_kn); } } } static void recursively_mark_NOTATTACHED(struct usb_device *udev) { struct usb_hub *hub = usb_hub_to_struct_hub(udev); int i; for (i = 0; i < udev->maxchild; ++i) { if (hub->ports[i]->child) recursively_mark_NOTATTACHED(hub->ports[i]->child); } if (udev->state == USB_STATE_SUSPENDED) udev->active_duration -= jiffies; udev->state = USB_STATE_NOTATTACHED; update_port_device_state(udev); } /** * usb_set_device_state - change a device's current state (usbcore, hcds) * @udev: pointer to device whose state should be changed * @new_state: new state value to be stored * * udev->state is _not_ fully protected by the device lock. Although * most transitions are made only while holding the lock, the state can * can change to USB_STATE_NOTATTACHED at almost any time. This * is so that devices can be marked as disconnected as soon as possible, * without having to wait for any semaphores to be released. As a result, * all changes to any device's state must be protected by the * device_state_lock spinlock. * * Once a device has been added to the device tree, all changes to its state * should be made using this routine. The state should _not_ be set directly. * * If udev->state is already USB_STATE_NOTATTACHED then no change is made. * Otherwise udev->state is set to new_state, and if new_state is * USB_STATE_NOTATTACHED then all of udev's descendants' states are also set * to USB_STATE_NOTATTACHED. */ void usb_set_device_state(struct usb_device *udev, enum usb_device_state new_state) { unsigned long flags; int wakeup = -1; spin_lock_irqsave(&device_state_lock, flags); if (udev->state == USB_STATE_NOTATTACHED) ; /* do nothing */ else if (new_state != USB_STATE_NOTATTACHED) { /* root hub wakeup capabilities are managed out-of-band * and may involve silicon errata ... ignore them here. */ if (udev->parent) { if (udev->state == USB_STATE_SUSPENDED || new_state == USB_STATE_SUSPENDED) ; /* No change to wakeup settings */ else if (new_state == USB_STATE_CONFIGURED) wakeup = (udev->quirks & USB_QUIRK_IGNORE_REMOTE_WAKEUP) ? 0 : udev->actconfig->desc.bmAttributes & USB_CONFIG_ATT_WAKEUP; else wakeup = 0; } if (udev->state == USB_STATE_SUSPENDED && new_state != USB_STATE_SUSPENDED) udev->active_duration -= jiffies; else if (new_state == USB_STATE_SUSPENDED && udev->state != USB_STATE_SUSPENDED) udev->active_duration += jiffies; udev->state = new_state; update_port_device_state(udev); } else recursively_mark_NOTATTACHED(udev); spin_unlock_irqrestore(&device_state_lock, flags); if (wakeup >= 0) device_set_wakeup_capable(&udev->dev, wakeup); } EXPORT_SYMBOL_GPL(usb_set_device_state); /* * Choose a device number. * * Device numbers are used as filenames in usbfs. On USB-1.1 and * USB-2.0 buses they are also used as device addresses, however on * USB-3.0 buses the address is assigned by the controller hardware * and it usually is not the same as the device number. * * Devices connected under xHCI are not as simple. The host controller * supports virtualization, so the hardware assigns device addresses and * the HCD must setup data structures before issuing a set address * command to the hardware. */ static void choose_devnum(struct usb_device *udev) { int devnum; struct usb_bus *bus = udev->bus; /* be safe when more hub events are proceed in parallel */ mutex_lock(&bus->devnum_next_mutex); /* Try to allocate the next devnum beginning at bus->devnum_next. */ devnum = find_next_zero_bit(bus->devmap, 128, bus->devnum_next); if (devnum >= 128) devnum = find_next_zero_bit(bus->devmap, 128, 1); bus->devnum_next = (devnum >= 127 ? 1 : devnum + 1); if (devnum < 128) { set_bit(devnum, bus->devmap); udev->devnum = devnum; } mutex_unlock(&bus->devnum_next_mutex); } static void release_devnum(struct usb_device *udev) { if (udev->devnum > 0) { clear_bit(udev->devnum, udev->bus->devmap); udev->devnum = -1; } } static void update_devnum(struct usb_device *udev, int devnum) { udev->devnum = devnum; if (!udev->devaddr) udev->devaddr = (u8)devnum; } static void hub_free_dev(struct usb_device *udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); /* Root hubs aren't real devices, so don't free HCD resources */ if (hcd->driver->free_dev && udev->parent) hcd->driver->free_dev(hcd, udev); } static void hub_disconnect_children(struct usb_device *udev) { struct usb_hub *hub = usb_hub_to_struct_hub(udev); int i; /* Free up all the children before we remove this device */ for (i = 0; i < udev->maxchild; i++) { if (hub->ports[i]->child) usb_disconnect(&hub->ports[i]->child); } } /** * usb_disconnect - disconnect a device (usbcore-internal) * @pdev: pointer to device being disconnected * * Context: task context, might sleep * * Something got disconnected. Get rid of it and all of its children. * * If *pdev is a normal device then the parent hub must already be locked. * If *pdev is a root hub then the caller must hold the usb_bus_idr_lock, * which protects the set of root hubs as well as the list of buses. * * Only hub drivers (including virtual root hub drivers for host * controllers) should ever call this. * * This call is synchronous, and may not be used in an interrupt context. */ void usb_disconnect(struct usb_device **pdev) { struct usb_port *port_dev = NULL; struct usb_device *udev = *pdev; struct usb_hub *hub = NULL; int port1 = 1; /* mark the device as inactive, so any further urb submissions for * this device (and any of its children) will fail immediately. * this quiesces everything except pending urbs. */ usb_set_device_state(udev, USB_STATE_NOTATTACHED); dev_info(&udev->dev, "USB disconnect, device number %d\n", udev->devnum); /* * Ensure that the pm runtime code knows that the USB device * is in the process of being disconnected. */ pm_runtime_barrier(&udev->dev); usb_lock_device(udev); hub_disconnect_children(udev); /* deallocate hcd/hardware state ... nuking all pending urbs and * cleaning up all state associated with the current configuration * so that the hardware is now fully quiesced. */ dev_dbg(&udev->dev, "unregistering device\n"); usb_disable_device(udev, 0); usb_hcd_synchronize_unlinks(udev); if (udev->parent) { port1 = udev->portnum; hub = usb_hub_to_struct_hub(udev->parent); port_dev = hub->ports[port1 - 1]; sysfs_remove_link(&udev->dev.kobj, "port"); sysfs_remove_link(&port_dev->dev.kobj, "device"); /* * As usb_port_runtime_resume() de-references udev, make * sure no resumes occur during removal */ if (!test_and_set_bit(port1, hub->child_usage_bits)) pm_runtime_get_sync(&port_dev->dev); typec_deattach(port_dev->connector, &udev->dev); } usb_remove_ep_devs(&udev->ep0); usb_unlock_device(udev); /* Unregister the device. The device driver is responsible * for de-configuring the device and invoking the remove-device * notifier chain (used by usbfs and possibly others). */ device_del(&udev->dev); /* Free the device number and delete the parent's children[] * (or root_hub) pointer. */ release_devnum(udev); /* Avoid races with recursively_mark_NOTATTACHED() */ spin_lock_irq(&device_state_lock); *pdev = NULL; spin_unlock_irq(&device_state_lock); if (port_dev && test_and_clear_bit(port1, hub->child_usage_bits)) pm_runtime_put(&port_dev->dev); hub_free_dev(udev); put_device(&udev->dev); } #ifdef CONFIG_USB_ANNOUNCE_NEW_DEVICES static void show_string(struct usb_device *udev, char *id, char *string) { if (!string) return; dev_info(&udev->dev, "%s: %s\n", id, string); } static void announce_device(struct usb_device *udev) { u16 bcdDevice = le16_to_cpu(udev->descriptor.bcdDevice); dev_info(&udev->dev, "New USB device found, idVendor=%04x, idProduct=%04x, bcdDevice=%2x.%02x\n", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct), bcdDevice >> 8, bcdDevice & 0xff); dev_info(&udev->dev, "New USB device strings: Mfr=%d, Product=%d, SerialNumber=%d\n", udev->descriptor.iManufacturer, udev->descriptor.iProduct, udev->descriptor.iSerialNumber); show_string(udev, "Product", udev->product); show_string(udev, "Manufacturer", udev->manufacturer); show_string(udev, "SerialNumber", udev->serial); } #else static inline void announce_device(struct usb_device *udev) { } #endif /** * usb_enumerate_device_otg - FIXME (usbcore-internal) * @udev: newly addressed device (in ADDRESS state) * * Finish enumeration for On-The-Go devices * * Return: 0 if successful. A negative error code otherwise. */ static int usb_enumerate_device_otg(struct usb_device *udev) { int err = 0; #ifdef CONFIG_USB_OTG /* * OTG-aware devices on OTG-capable root hubs may be able to use SRP, * to wake us after we've powered off VBUS; and HNP, switching roles * "host" to "peripheral". The OTG descriptor helps figure this out. */ if (!udev->bus->is_b_host && udev->config && udev->parent == udev->bus->root_hub) { struct usb_otg_descriptor *desc = NULL; struct usb_bus *bus = udev->bus; unsigned port1 = udev->portnum; /* descriptor may appear anywhere in config */ err = __usb_get_extra_descriptor(udev->rawdescriptors[0], le16_to_cpu(udev->config[0].desc.wTotalLength), USB_DT_OTG, (void **) &desc, sizeof(*desc)); if (err || !(desc->bmAttributes & USB_OTG_HNP)) return 0; dev_info(&udev->dev, "Dual-Role OTG device on %sHNP port\n", (port1 == bus->otg_port) ? "" : "non-"); /* enable HNP before suspend, it's simpler */ if (port1 == bus->otg_port) { bus->b_hnp_enable = 1; err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, 0, USB_DEVICE_B_HNP_ENABLE, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (err < 0) { /* * OTG MESSAGE: report errors here, * customize to match your product. */ dev_err(&udev->dev, "can't set HNP mode: %d\n", err); bus->b_hnp_enable = 0; } } else if (desc->bLength == sizeof (struct usb_otg_descriptor)) { /* * We are operating on a legacy OTP device * These should be told that they are operating * on the wrong port if we have another port that does * support HNP */ if (bus->otg_port != 0) { /* Set a_alt_hnp_support for legacy otg device */ err = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, 0, USB_DEVICE_A_ALT_HNP_SUPPORT, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (err < 0) dev_err(&udev->dev, "set a_alt_hnp_support failed: %d\n", err); } } } #endif return err; } /** * usb_enumerate_device - Read device configs/intfs/otg (usbcore-internal) * @udev: newly addressed device (in ADDRESS state) * * This is only called by usb_new_device() -- all comments that apply there * apply here wrt to environment. * * If the device is WUSB and not authorized, we don't attempt to read * the string descriptors, as they will be errored out by the device * until it has been authorized. * * Return: 0 if successful. A negative error code otherwise. */ static int usb_enumerate_device(struct usb_device *udev) { int err; struct usb_hcd *hcd = bus_to_hcd(udev->bus); if (udev->config == NULL) { err = usb_get_configuration(udev); if (err < 0) { if (err != -ENODEV) dev_err(&udev->dev, "can't read configurations, error %d\n", err); return err; } } /* read the standard strings and cache them if present */ udev->product = usb_cache_string(udev, udev->descriptor.iProduct); udev->manufacturer = usb_cache_string(udev, udev->descriptor.iManufacturer); udev->serial = usb_cache_string(udev, udev->descriptor.iSerialNumber); err = usb_enumerate_device_otg(udev); if (err < 0) return err; if (IS_ENABLED(CONFIG_USB_OTG_PRODUCTLIST) && hcd->tpl_support && !is_targeted(udev)) { /* Maybe it can talk to us, though we can't talk to it. * (Includes HNP test device.) */ if (IS_ENABLED(CONFIG_USB_OTG) && (udev->bus->b_hnp_enable || udev->bus->is_b_host)) { err = usb_port_suspend(udev, PMSG_AUTO_SUSPEND); if (err < 0) dev_dbg(&udev->dev, "HNP fail, %d\n", err); } return -ENOTSUPP; } usb_detect_interface_quirks(udev); return 0; } static void set_usb_port_removable(struct usb_device *udev) { struct usb_device *hdev = udev->parent; struct usb_hub *hub; u8 port = udev->portnum; u16 wHubCharacteristics; bool removable = true; dev_set_removable(&udev->dev, DEVICE_REMOVABLE_UNKNOWN); if (!hdev) return; hub = usb_hub_to_struct_hub(udev->parent); /* * If the platform firmware has provided information about a port, * use that to determine whether it's removable. */ switch (hub->ports[udev->portnum - 1]->connect_type) { case USB_PORT_CONNECT_TYPE_HOT_PLUG: dev_set_removable(&udev->dev, DEVICE_REMOVABLE); return; case USB_PORT_CONNECT_TYPE_HARD_WIRED: case USB_PORT_NOT_USED: dev_set_removable(&udev->dev, DEVICE_FIXED); return; default: break; } /* * Otherwise, check whether the hub knows whether a port is removable * or not */ wHubCharacteristics = le16_to_cpu(hub->descriptor->wHubCharacteristics); if (!(wHubCharacteristics & HUB_CHAR_COMPOUND)) return; if (hub_is_superspeed(hdev)) { if (le16_to_cpu(hub->descriptor->u.ss.DeviceRemovable) & (1 << port)) removable = false; } else { if (hub->descriptor->u.hs.DeviceRemovable[port / 8] & (1 << (port % 8))) removable = false; } if (removable) dev_set_removable(&udev->dev, DEVICE_REMOVABLE); else dev_set_removable(&udev->dev, DEVICE_FIXED); } /** * usb_new_device - perform initial device setup (usbcore-internal) * @udev: newly addressed device (in ADDRESS state) * * This is called with devices which have been detected but not fully * enumerated. The device descriptor is available, but not descriptors * for any device configuration. The caller must have locked either * the parent hub (if udev is a normal device) or else the * usb_bus_idr_lock (if udev is a root hub). The parent's pointer to * udev has already been installed, but udev is not yet visible through * sysfs or other filesystem code. * * This call is synchronous, and may not be used in an interrupt context. * * Only the hub driver or root-hub registrar should ever call this. * * Return: Whether the device is configured properly or not. Zero if the * interface was registered with the driver core; else a negative errno * value. * */ int usb_new_device(struct usb_device *udev) { int err; if (udev->parent) { /* Initialize non-root-hub device wakeup to disabled; * device (un)configuration controls wakeup capable * sysfs power/wakeup controls wakeup enabled/disabled */ device_init_wakeup(&udev->dev, 0); } /* Tell the runtime-PM framework the device is active */ pm_runtime_set_active(&udev->dev); pm_runtime_get_noresume(&udev->dev); pm_runtime_use_autosuspend(&udev->dev); pm_runtime_enable(&udev->dev); /* By default, forbid autosuspend for all devices. It will be * allowed for hubs during binding. */ usb_disable_autosuspend(udev); err = usb_enumerate_device(udev); /* Read descriptors */ if (err < 0) goto fail; dev_dbg(&udev->dev, "udev %d, busnum %d, minor = %d\n", udev->devnum, udev->bus->busnum, (((udev->bus->busnum-1) * 128) + (udev->devnum-1))); /* export the usbdev device-node for libusb */ udev->dev.devt = MKDEV(USB_DEVICE_MAJOR, (((udev->bus->busnum-1) * 128) + (udev->devnum-1))); /* Tell the world! */ announce_device(udev); if (udev->serial) add_device_randomness(udev->serial, strlen(udev->serial)); if (udev->product) add_device_randomness(udev->product, strlen(udev->product)); if (udev->manufacturer) add_device_randomness(udev->manufacturer, strlen(udev->manufacturer)); device_enable_async_suspend(&udev->dev); /* check whether the hub or firmware marks this port as non-removable */ set_usb_port_removable(udev); /* Register the device. The device driver is responsible * for configuring the device and invoking the add-device * notifier chain (used by usbfs and possibly others). */ err = device_add(&udev->dev); if (err) { dev_err(&udev->dev, "can't device_add, error %d\n", err); goto fail; } /* Create link files between child device and usb port device. */ if (udev->parent) { struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); int port1 = udev->portnum; struct usb_port *port_dev = hub->ports[port1 - 1]; err = sysfs_create_link(&udev->dev.kobj, &port_dev->dev.kobj, "port"); if (err) goto out_del_dev; err = sysfs_create_link(&port_dev->dev.kobj, &udev->dev.kobj, "device"); if (err) { sysfs_remove_link(&udev->dev.kobj, "port"); goto out_del_dev; } if (!test_and_set_bit(port1, hub->child_usage_bits)) pm_runtime_get_sync(&port_dev->dev); typec_attach(port_dev->connector, &udev->dev); } (void) usb_create_ep_devs(&udev->dev, &udev->ep0, udev); usb_mark_last_busy(udev); pm_runtime_put_sync_autosuspend(&udev->dev); return err; out_del_dev: device_del(&udev->dev); fail: usb_set_device_state(udev, USB_STATE_NOTATTACHED); pm_runtime_disable(&udev->dev); pm_runtime_set_suspended(&udev->dev); return err; } /** * usb_deauthorize_device - deauthorize a device (usbcore-internal) * @usb_dev: USB device * * Move the USB device to a very basic state where interfaces are disabled * and the device is in fact unconfigured and unusable. * * We share a lock (that we have) with device_del(), so we need to * defer its call. * * Return: 0. */ int usb_deauthorize_device(struct usb_device *usb_dev) { usb_lock_device(usb_dev); if (usb_dev->authorized == 0) goto out_unauthorized; usb_dev->authorized = 0; usb_set_configuration(usb_dev, -1); out_unauthorized: usb_unlock_device(usb_dev); return 0; } int usb_authorize_device(struct usb_device *usb_dev) { int result = 0, c; usb_lock_device(usb_dev); if (usb_dev->authorized == 1) goto out_authorized; result = usb_autoresume_device(usb_dev); if (result < 0) { dev_err(&usb_dev->dev, "can't autoresume for authorization: %d\n", result); goto error_autoresume; } usb_dev->authorized = 1; /* Choose and set the configuration. This registers the interfaces * with the driver core and lets interface drivers bind to them. */ c = usb_choose_configuration(usb_dev); if (c >= 0) { result = usb_set_configuration(usb_dev, c); if (result) { dev_err(&usb_dev->dev, "can't set config #%d, error %d\n", c, result); /* This need not be fatal. The user can try to * set other configurations. */ } } dev_info(&usb_dev->dev, "authorized to connect\n"); usb_autosuspend_device(usb_dev); error_autoresume: out_authorized: usb_unlock_device(usb_dev); /* complements locktree */ return result; } /** * get_port_ssp_rate - Match the extended port status to SSP rate * @hdev: The hub device * @ext_portstatus: extended port status * * Match the extended port status speed id to the SuperSpeed Plus sublink speed * capability attributes. Base on the number of connected lanes and speed, * return the corresponding enum usb_ssp_rate. */ static enum usb_ssp_rate get_port_ssp_rate(struct usb_device *hdev, u32 ext_portstatus) { struct usb_ssp_cap_descriptor *ssp_cap; u32 attr; u8 speed_id; u8 ssac; u8 lanes; int i; if (!hdev->bos) goto out; ssp_cap = hdev->bos->ssp_cap; if (!ssp_cap) goto out; speed_id = ext_portstatus & USB_EXT_PORT_STAT_RX_SPEED_ID; lanes = USB_EXT_PORT_RX_LANES(ext_portstatus) + 1; ssac = le32_to_cpu(ssp_cap->bmAttributes) & USB_SSP_SUBLINK_SPEED_ATTRIBS; for (i = 0; i <= ssac; i++) { u8 ssid; attr = le32_to_cpu(ssp_cap->bmSublinkSpeedAttr[i]); ssid = FIELD_GET(USB_SSP_SUBLINK_SPEED_SSID, attr); if (speed_id == ssid) { u16 mantissa; u8 lse; u8 type; /* * Note: currently asymmetric lane types are only * applicable for SSIC operate in SuperSpeed protocol */ type = FIELD_GET(USB_SSP_SUBLINK_SPEED_ST, attr); if (type == USB_SSP_SUBLINK_SPEED_ST_ASYM_RX || type == USB_SSP_SUBLINK_SPEED_ST_ASYM_TX) goto out; if (FIELD_GET(USB_SSP_SUBLINK_SPEED_LP, attr) != USB_SSP_SUBLINK_SPEED_LP_SSP) goto out; lse = FIELD_GET(USB_SSP_SUBLINK_SPEED_LSE, attr); mantissa = FIELD_GET(USB_SSP_SUBLINK_SPEED_LSM, attr); /* Convert to Gbps */ for (; lse < USB_SSP_SUBLINK_SPEED_LSE_GBPS; lse++) mantissa /= 1000; if (mantissa >= 10 && lanes == 1) return USB_SSP_GEN_2x1; if (mantissa >= 10 && lanes == 2) return USB_SSP_GEN_2x2; if (mantissa >= 5 && lanes == 2) return USB_SSP_GEN_1x2; goto out; } } out: return USB_SSP_GEN_UNKNOWN; } #ifdef CONFIG_USB_FEW_INIT_RETRIES #define PORT_RESET_TRIES 2 #define SET_ADDRESS_TRIES 1 #define GET_DESCRIPTOR_TRIES 1 #define GET_MAXPACKET0_TRIES 1 #define PORT_INIT_TRIES 4 #else #define PORT_RESET_TRIES 5 #define SET_ADDRESS_TRIES 2 #define GET_DESCRIPTOR_TRIES 2 #define GET_MAXPACKET0_TRIES 3 #define PORT_INIT_TRIES 4 #endif /* CONFIG_USB_FEW_INIT_RETRIES */ #define DETECT_DISCONNECT_TRIES 5 #define HUB_ROOT_RESET_TIME 60 /* times are in msec */ #define HUB_SHORT_RESET_TIME 10 #define HUB_BH_RESET_TIME 50 #define HUB_LONG_RESET_TIME 200 #define HUB_RESET_TIMEOUT 800 static bool use_new_scheme(struct usb_device *udev, int retry, struct usb_port *port_dev) { int old_scheme_first_port = (port_dev->quirks & USB_PORT_QUIRK_OLD_SCHEME) || old_scheme_first; /* * "New scheme" enumeration causes an extra state transition to be * exposed to an xhci host and causes USB3 devices to receive control * commands in the default state. This has been seen to cause * enumeration failures, so disable this enumeration scheme for USB3 * devices. */ if (udev->speed >= USB_SPEED_SUPER) return false; /* * If use_both_schemes is set, use the first scheme (whichever * it is) for the larger half of the retries, then use the other * scheme. Otherwise, use the first scheme for all the retries. */ if (use_both_schemes && retry >= (PORT_INIT_TRIES + 1) / 2) return old_scheme_first_port; /* Second half */ return !old_scheme_first_port; /* First half or all */ } /* Is a USB 3.0 port in the Inactive or Compliance Mode state? * Port warm reset is required to recover */ static bool hub_port_warm_reset_required(struct usb_hub *hub, int port1, u16 portstatus) { u16 link_state; if (!hub_is_superspeed(hub->hdev)) return false; if (test_bit(port1, hub->warm_reset_bits)) return true; link_state = portstatus & USB_PORT_STAT_LINK_STATE; return link_state == USB_SS_PORT_LS_SS_INACTIVE || link_state == USB_SS_PORT_LS_COMP_MOD; } static int hub_port_wait_reset(struct usb_hub *hub, int port1, struct usb_device *udev, unsigned int delay, bool warm) { int delay_time, ret; u16 portstatus; u16 portchange; u32 ext_portstatus = 0; for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT; delay_time += delay) { /* wait to give the device a chance to reset */ msleep(delay); /* read and decode port status */ if (hub_is_superspeedplus(hub->hdev)) ret = hub_ext_port_status(hub, port1, HUB_EXT_PORT_STATUS, &portstatus, &portchange, &ext_portstatus); else ret = usb_hub_port_status(hub, port1, &portstatus, &portchange); if (ret < 0) return ret; /* * The port state is unknown until the reset completes. * * On top of that, some chips may require additional time * to re-establish a connection after the reset is complete, * so also wait for the connection to be re-established. */ if (!(portstatus & USB_PORT_STAT_RESET) && (portstatus & USB_PORT_STAT_CONNECTION)) break; /* switch to the long delay after two short delay failures */ if (delay_time >= 2 * HUB_SHORT_RESET_TIME) delay = HUB_LONG_RESET_TIME; dev_dbg(&hub->ports[port1 - 1]->dev, "not %sreset yet, waiting %dms\n", warm ? "warm " : "", delay); } if ((portstatus & USB_PORT_STAT_RESET)) return -EBUSY; if (hub_port_warm_reset_required(hub, port1, portstatus)) return -ENOTCONN; /* Device went away? */ if (!(portstatus & USB_PORT_STAT_CONNECTION)) return -ENOTCONN; /* Retry if connect change is set but status is still connected. * A USB 3.0 connection may bounce if multiple warm resets were issued, * but the device may have successfully re-connected. Ignore it. */ if (!hub_is_superspeed(hub->hdev) && (portchange & USB_PORT_STAT_C_CONNECTION)) { usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); return -EAGAIN; } if (!(portstatus & USB_PORT_STAT_ENABLE)) return -EBUSY; if (!udev) return 0; if (hub_is_superspeedplus(hub->hdev)) { /* extended portstatus Rx and Tx lane count are zero based */ udev->rx_lanes = USB_EXT_PORT_RX_LANES(ext_portstatus) + 1; udev->tx_lanes = USB_EXT_PORT_TX_LANES(ext_portstatus) + 1; udev->ssp_rate = get_port_ssp_rate(hub->hdev, ext_portstatus); } else { udev->rx_lanes = 1; udev->tx_lanes = 1; udev->ssp_rate = USB_SSP_GEN_UNKNOWN; } if (udev->ssp_rate != USB_SSP_GEN_UNKNOWN) udev->speed = USB_SPEED_SUPER_PLUS; else if (hub_is_superspeed(hub->hdev)) udev->speed = USB_SPEED_SUPER; else if (portstatus & USB_PORT_STAT_HIGH_SPEED) udev->speed = USB_SPEED_HIGH; else if (portstatus & USB_PORT_STAT_LOW_SPEED) udev->speed = USB_SPEED_LOW; else udev->speed = USB_SPEED_FULL; return 0; } /* Handle port reset and port warm(BH) reset (for USB3 protocol ports) */ static int hub_port_reset(struct usb_hub *hub, int port1, struct usb_device *udev, unsigned int delay, bool warm) { int i, status; u16 portchange, portstatus; struct usb_port *port_dev = hub->ports[port1 - 1]; int reset_recovery_time; if (!hub_is_superspeed(hub->hdev)) { if (warm) { dev_err(hub->intfdev, "only USB3 hub support " "warm reset\n"); return -EINVAL; } /* Block EHCI CF initialization during the port reset. * Some companion controllers don't like it when they mix. */ down_read(&ehci_cf_port_reset_rwsem); } else if (!warm) { /* * If the caller hasn't explicitly requested a warm reset, * double check and see if one is needed. */ if (usb_hub_port_status(hub, port1, &portstatus, &portchange) == 0) if (hub_port_warm_reset_required(hub, port1, portstatus)) warm = true; } clear_bit(port1, hub->warm_reset_bits); /* Reset the port */ for (i = 0; i < PORT_RESET_TRIES; i++) { status = set_port_feature(hub->hdev, port1, (warm ? USB_PORT_FEAT_BH_PORT_RESET : USB_PORT_FEAT_RESET)); if (status == -ENODEV) { ; /* The hub is gone */ } else if (status) { dev_err(&port_dev->dev, "cannot %sreset (err = %d)\n", warm ? "warm " : "", status); } else { status = hub_port_wait_reset(hub, port1, udev, delay, warm); if (status && status != -ENOTCONN && status != -ENODEV) dev_dbg(hub->intfdev, "port_wait_reset: err = %d\n", status); } /* * Check for disconnect or reset, and bail out after several * reset attempts to avoid warm reset loop. */ if (status == 0 || status == -ENOTCONN || status == -ENODEV || (status == -EBUSY && i == PORT_RESET_TRIES - 1)) { usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_RESET); if (!hub_is_superspeed(hub->hdev)) goto done; usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_BH_PORT_RESET); usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_PORT_LINK_STATE); if (udev) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); /* * If a USB 3.0 device migrates from reset to an error * state, re-issue the warm reset. */ if (usb_hub_port_status(hub, port1, &portstatus, &portchange) < 0) goto done; if (!hub_port_warm_reset_required(hub, port1, portstatus)) goto done; /* * If the port is in SS.Inactive or Compliance Mode, the * hot or warm reset failed. Try another warm reset. */ if (!warm) { dev_dbg(&port_dev->dev, "hot reset failed, warm reset\n"); warm = true; } } dev_dbg(&port_dev->dev, "not enabled, trying %sreset again...\n", warm ? "warm " : ""); delay = HUB_LONG_RESET_TIME; } dev_err(&port_dev->dev, "Cannot enable. Maybe the USB cable is bad?\n"); done: if (status == 0) { if (port_dev->quirks & USB_PORT_QUIRK_FAST_ENUM) usleep_range(10000, 12000); else { /* TRSTRCY = 10 ms; plus some extra */ reset_recovery_time = 10 + 40; /* Hub needs extra delay after resetting its port. */ if (hub->hdev->quirks & USB_QUIRK_HUB_SLOW_RESET) reset_recovery_time += 100; msleep(reset_recovery_time); } if (udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); update_devnum(udev, 0); /* The xHC may think the device is already reset, * so ignore the status. */ if (hcd->driver->reset_device) hcd->driver->reset_device(hcd, udev); usb_set_device_state(udev, USB_STATE_DEFAULT); } } else { if (udev) usb_set_device_state(udev, USB_STATE_NOTATTACHED); } if (!hub_is_superspeed(hub->hdev)) up_read(&ehci_cf_port_reset_rwsem); return status; } /* * hub_port_stop_enumerate - stop USB enumeration or ignore port events * @hub: target hub * @port1: port num of the port * @retries: port retries number of hub_port_init() * * Return: * true: ignore port actions/events or give up connection attempts. * false: keep original behavior. * * This function will be based on retries to check whether the port which is * marked with early_stop attribute would stop enumeration or ignore events. * * Note: * This function didn't change anything if early_stop is not set, and it will * prevent all connection attempts when early_stop is set and the attempts of * the port are more than 1. */ static bool hub_port_stop_enumerate(struct usb_hub *hub, int port1, int retries) { struct usb_port *port_dev = hub->ports[port1 - 1]; if (port_dev->early_stop) { if (port_dev->ignore_event) return true; /* * We want unsuccessful attempts to fail quickly. * Since some devices may need one failure during * port initialization, we allow two tries but no * more. */ if (retries < 2) return false; port_dev->ignore_event = 1; } else port_dev->ignore_event = 0; return port_dev->ignore_event; } /* Check if a port is power on */ int usb_port_is_power_on(struct usb_hub *hub, unsigned int portstatus) { int ret = 0; if (hub_is_superspeed(hub->hdev)) { if (portstatus & USB_SS_PORT_STAT_POWER) ret = 1; } else { if (portstatus & USB_PORT_STAT_POWER) ret = 1; } return ret; } static void usb_lock_port(struct usb_port *port_dev) __acquires(&port_dev->status_lock) { mutex_lock(&port_dev->status_lock); __acquire(&port_dev->status_lock); } static void usb_unlock_port(struct usb_port *port_dev) __releases(&port_dev->status_lock) { mutex_unlock(&port_dev->status_lock); __release(&port_dev->status_lock); } #ifdef CONFIG_PM /* Check if a port is suspended(USB2.0 port) or in U3 state(USB3.0 port) */ static int port_is_suspended(struct usb_hub *hub, unsigned portstatus) { int ret = 0; if (hub_is_superspeed(hub->hdev)) { if ((portstatus & USB_PORT_STAT_LINK_STATE) == USB_SS_PORT_LS_U3) ret = 1; } else { if (portstatus & USB_PORT_STAT_SUSPEND) ret = 1; } return ret; } /* Determine whether the device on a port is ready for a normal resume, * is ready for a reset-resume, or should be disconnected. */ static int check_port_resume_type(struct usb_device *udev, struct usb_hub *hub, int port1, int status, u16 portchange, u16 portstatus) { struct usb_port *port_dev = hub->ports[port1 - 1]; int retries = 3; retry: /* Is a warm reset needed to recover the connection? */ if (status == 0 && udev->reset_resume && hub_port_warm_reset_required(hub, port1, portstatus)) { /* pass */; } /* Is the device still present? */ else if (status || port_is_suspended(hub, portstatus) || !usb_port_is_power_on(hub, portstatus)) { if (status >= 0) status = -ENODEV; } else if (!(portstatus & USB_PORT_STAT_CONNECTION)) { if (retries--) { usleep_range(200, 300); status = usb_hub_port_status(hub, port1, &portstatus, &portchange); goto retry; } status = -ENODEV; } /* Can't do a normal resume if the port isn't enabled, * so try a reset-resume instead. */ else if (!(portstatus & USB_PORT_STAT_ENABLE) && !udev->reset_resume) { if (udev->persist_enabled) udev->reset_resume = 1; else status = -ENODEV; } if (status) { dev_dbg(&port_dev->dev, "status %04x.%04x after resume, %d\n", portchange, portstatus, status); } else if (udev->reset_resume) { /* Late port handoff can set status-change bits */ if (portchange & USB_PORT_STAT_C_CONNECTION) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); if (portchange & USB_PORT_STAT_C_ENABLE) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_ENABLE); /* * Whatever made this reset-resume necessary may have * turned on the port1 bit in hub->change_bits. But after * a successful reset-resume we want the bit to be clear; * if it was on it would indicate that something happened * following the reset-resume. */ clear_bit(port1, hub->change_bits); } return status; } int usb_disable_ltm(struct usb_device *udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); /* Check if the roothub and device supports LTM. */ if (!usb_device_supports_ltm(hcd->self.root_hub) || !usb_device_supports_ltm(udev)) return 0; /* Clear Feature LTM Enable can only be sent if the device is * configured. */ if (!udev->actconfig) return 0; return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, USB_DEVICE_LTM_ENABLE, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } EXPORT_SYMBOL_GPL(usb_disable_ltm); void usb_enable_ltm(struct usb_device *udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); /* Check if the roothub and device supports LTM. */ if (!usb_device_supports_ltm(hcd->self.root_hub) || !usb_device_supports_ltm(udev)) return; /* Set Feature LTM Enable can only be sent if the device is * configured. */ if (!udev->actconfig) return; usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, USB_DEVICE_LTM_ENABLE, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } EXPORT_SYMBOL_GPL(usb_enable_ltm); /* * usb_enable_remote_wakeup - enable remote wakeup for a device * @udev: target device * * For USB-2 devices: Set the device's remote wakeup feature. * * For USB-3 devices: Assume there's only one function on the device and * enable remote wake for the first interface. FIXME if the interface * association descriptor shows there's more than one function. */ static int usb_enable_remote_wakeup(struct usb_device *udev) { if (udev->speed < USB_SPEED_SUPER) return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, USB_DEVICE_REMOTE_WAKEUP, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); else return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, USB_RECIP_INTERFACE, USB_INTRF_FUNC_SUSPEND, USB_INTRF_FUNC_SUSPEND_RW | USB_INTRF_FUNC_SUSPEND_LP, NULL, 0, USB_CTRL_SET_TIMEOUT); } /* * usb_disable_remote_wakeup - disable remote wakeup for a device * @udev: target device * * For USB-2 devices: Clear the device's remote wakeup feature. * * For USB-3 devices: Assume there's only one function on the device and * disable remote wake for the first interface. FIXME if the interface * association descriptor shows there's more than one function. */ static int usb_disable_remote_wakeup(struct usb_device *udev) { if (udev->speed < USB_SPEED_SUPER) return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, USB_DEVICE_REMOTE_WAKEUP, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); else return usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, USB_RECIP_INTERFACE, USB_INTRF_FUNC_SUSPEND, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } /* Count of wakeup-enabled devices at or below udev */ unsigned usb_wakeup_enabled_descendants(struct usb_device *udev) { struct usb_hub *hub = usb_hub_to_struct_hub(udev); return udev->do_remote_wakeup + (hub ? hub->wakeup_enabled_descendants : 0); } EXPORT_SYMBOL_GPL(usb_wakeup_enabled_descendants); /* * usb_port_suspend - suspend a usb device's upstream port * @udev: device that's no longer in active use, not a root hub * Context: must be able to sleep; device not locked; pm locks held * * Suspends a USB device that isn't in active use, conserving power. * Devices may wake out of a suspend, if anything important happens, * using the remote wakeup mechanism. They may also be taken out of * suspend by the host, using usb_port_resume(). It's also routine * to disconnect devices while they are suspended. * * This only affects the USB hardware for a device; its interfaces * (and, for hubs, child devices) must already have been suspended. * * Selective port suspend reduces power; most suspended devices draw * less than 500 uA. It's also used in OTG, along with remote wakeup. * All devices below the suspended port are also suspended. * * Devices leave suspend state when the host wakes them up. Some devices * also support "remote wakeup", where the device can activate the USB * tree above them to deliver data, such as a keypress or packet. In * some cases, this wakes the USB host. * * Suspending OTG devices may trigger HNP, if that's been enabled * between a pair of dual-role devices. That will change roles, such * as from A-Host to A-Peripheral or from B-Host back to B-Peripheral. * * Devices on USB hub ports have only one "suspend" state, corresponding * to ACPI D2, "may cause the device to lose some context". * State transitions include: * * - suspend, resume ... when the VBUS power link stays live * - suspend, disconnect ... VBUS lost * * Once VBUS drop breaks the circuit, the port it's using has to go through * normal re-enumeration procedures, starting with enabling VBUS power. * Other than re-initializing the hub (plug/unplug, except for root hubs), * Linux (2.6) currently has NO mechanisms to initiate that: no hub_wq * timer, no SRP, no requests through sysfs. * * If Runtime PM isn't enabled or used, non-SuperSpeed devices may not get * suspended until their bus goes into global suspend (i.e., the root * hub is suspended). Nevertheless, we change @udev->state to * USB_STATE_SUSPENDED as this is the device's "logical" state. The actual * upstream port setting is stored in @udev->port_is_suspended. * * Returns 0 on success, else negative errno. */ int usb_port_suspend(struct usb_device *udev, pm_message_t msg) { struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); struct usb_port *port_dev = hub->ports[udev->portnum - 1]; int port1 = udev->portnum; int status; bool really_suspend = true; usb_lock_port(port_dev); /* enable remote wakeup when appropriate; this lets the device * wake up the upstream hub (including maybe the root hub). * * NOTE: OTG devices may issue remote wakeup (or SRP) even when * we don't explicitly enable it here. */ if (udev->do_remote_wakeup) { status = usb_enable_remote_wakeup(udev); if (status) { dev_dbg(&udev->dev, "won't remote wakeup, status %d\n", status); /* bail if autosuspend is requested */ if (PMSG_IS_AUTO(msg)) goto err_wakeup; } } /* disable USB2 hardware LPM */ usb_disable_usb2_hardware_lpm(udev); if (usb_disable_ltm(udev)) { dev_err(&udev->dev, "Failed to disable LTM before suspend\n"); status = -ENOMEM; if (PMSG_IS_AUTO(msg)) goto err_ltm; } /* see 7.1.7.6 */ if (hub_is_superspeed(hub->hdev)) status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U3); /* * For system suspend, we do not need to enable the suspend feature * on individual USB-2 ports. The devices will automatically go * into suspend a few ms after the root hub stops sending packets. * The USB 2.0 spec calls this "global suspend". * * However, many USB hubs have a bug: They don't relay wakeup requests * from a downstream port if the port's suspend feature isn't on. * Therefore we will turn on the suspend feature if udev or any of its * descendants is enabled for remote wakeup. */ else if (PMSG_IS_AUTO(msg) || usb_wakeup_enabled_descendants(udev) > 0) status = set_port_feature(hub->hdev, port1, USB_PORT_FEAT_SUSPEND); else { really_suspend = false; status = 0; } if (status) { /* Check if the port has been suspended for the timeout case * to prevent the suspended port from incorrect handling. */ if (status == -ETIMEDOUT) { int ret; u16 portstatus, portchange; portstatus = portchange = 0; ret = usb_hub_port_status(hub, port1, &portstatus, &portchange); dev_dbg(&port_dev->dev, "suspend timeout, status %04x\n", portstatus); if (ret == 0 && port_is_suspended(hub, portstatus)) { status = 0; goto suspend_done; } } dev_dbg(&port_dev->dev, "can't suspend, status %d\n", status); /* Try to enable USB3 LTM again */ usb_enable_ltm(udev); err_ltm: /* Try to enable USB2 hardware LPM again */ usb_enable_usb2_hardware_lpm(udev); if (udev->do_remote_wakeup) (void) usb_disable_remote_wakeup(udev); err_wakeup: /* System sleep transitions should never fail */ if (!PMSG_IS_AUTO(msg)) status = 0; } else { suspend_done: dev_dbg(&udev->dev, "usb %ssuspend, wakeup %d\n", (PMSG_IS_AUTO(msg) ? "auto-" : ""), udev->do_remote_wakeup); if (really_suspend) { udev->port_is_suspended = 1; /* device has up to 10 msec to fully suspend */ msleep(10); } usb_set_device_state(udev, USB_STATE_SUSPENDED); } if (status == 0 && !udev->do_remote_wakeup && udev->persist_enabled && test_and_clear_bit(port1, hub->child_usage_bits)) pm_runtime_put_sync(&port_dev->dev); usb_mark_last_busy(hub->hdev); usb_unlock_port(port_dev); return status; } /* * If the USB "suspend" state is in use (rather than "global suspend"), * many devices will be individually taken out of suspend state using * special "resume" signaling. This routine kicks in shortly after * hardware resume signaling is finished, either because of selective * resume (by host) or remote wakeup (by device) ... now see what changed * in the tree that's rooted at this device. * * If @udev->reset_resume is set then the device is reset before the * status check is done. */ static int finish_port_resume(struct usb_device *udev) { int status = 0; u16 devstatus = 0; /* caller owns the udev device lock */ dev_dbg(&udev->dev, "%s\n", udev->reset_resume ? "finish reset-resume" : "finish resume"); /* usb ch9 identifies four variants of SUSPENDED, based on what * state the device resumes to. Linux currently won't see the * first two on the host side; they'd be inside hub_port_init() * during many timeouts, but hub_wq can't suspend until later. */ usb_set_device_state(udev, udev->actconfig ? USB_STATE_CONFIGURED : USB_STATE_ADDRESS); /* 10.5.4.5 says not to reset a suspended port if the attached * device is enabled for remote wakeup. Hence the reset * operation is carried out here, after the port has been * resumed. */ if (udev->reset_resume) { /* * If the device morphs or switches modes when it is reset, * we don't want to perform a reset-resume. We'll fail the * resume, which will cause a logical disconnect, and then * the device will be rediscovered. */ retry_reset_resume: if (udev->quirks & USB_QUIRK_RESET) status = -ENODEV; else status = usb_reset_and_verify_device(udev); } /* 10.5.4.5 says be sure devices in the tree are still there. * For now let's assume the device didn't go crazy on resume, * and device drivers will know about any resume quirks. */ if (status == 0) { devstatus = 0; status = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, &devstatus); /* If a normal resume failed, try doing a reset-resume */ if (status && !udev->reset_resume && udev->persist_enabled) { dev_dbg(&udev->dev, "retry with reset-resume\n"); udev->reset_resume = 1; goto retry_reset_resume; } } if (status) { dev_dbg(&udev->dev, "gone after usb resume? status %d\n", status); /* * There are a few quirky devices which violate the standard * by claiming to have remote wakeup enabled after a reset, * which crash if the feature is cleared, hence check for * udev->reset_resume */ } else if (udev->actconfig && !udev->reset_resume) { if (udev->speed < USB_SPEED_SUPER) { if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)) status = usb_disable_remote_wakeup(udev); } else { status = usb_get_std_status(udev, USB_RECIP_INTERFACE, 0, &devstatus); if (!status && devstatus & (USB_INTRF_STAT_FUNC_RW_CAP | USB_INTRF_STAT_FUNC_RW)) status = usb_disable_remote_wakeup(udev); } if (status) dev_dbg(&udev->dev, "disable remote wakeup, status %d\n", status); status = 0; } return status; } /* * There are some SS USB devices which take longer time for link training. * XHCI specs 4.19.4 says that when Link training is successful, port * sets CCS bit to 1. So if SW reads port status before successful link * training, then it will not find device to be present. * USB Analyzer log with such buggy devices show that in some cases * device switch on the RX termination after long delay of host enabling * the VBUS. In few other cases it has been seen that device fails to * negotiate link training in first attempt. It has been * reported till now that few devices take as long as 2000 ms to train * the link after host enabling its VBUS and termination. Following * routine implements a 2000 ms timeout for link training. If in a case * link trains before timeout, loop will exit earlier. * * There are also some 2.0 hard drive based devices and 3.0 thumb * drives that, when plugged into a 2.0 only port, take a long * time to set CCS after VBUS enable. * * FIXME: If a device was connected before suspend, but was removed * while system was asleep, then the loop in the following routine will * only exit at timeout. * * This routine should only be called when persist is enabled. */ static int wait_for_connected(struct usb_device *udev, struct usb_hub *hub, int port1, u16 *portchange, u16 *portstatus) { int status = 0, delay_ms = 0; while (delay_ms < 2000) { if (status || *portstatus & USB_PORT_STAT_CONNECTION) break; if (!usb_port_is_power_on(hub, *portstatus)) { status = -ENODEV; break; } msleep(20); delay_ms += 20; status = usb_hub_port_status(hub, port1, portstatus, portchange); } dev_dbg(&udev->dev, "Waited %dms for CONNECT\n", delay_ms); return status; } /* * usb_port_resume - re-activate a suspended usb device's upstream port * @udev: device to re-activate, not a root hub * Context: must be able to sleep; device not locked; pm locks held * * This will re-activate the suspended device, increasing power usage * while letting drivers communicate again with its endpoints. * USB resume explicitly guarantees that the power session between * the host and the device is the same as it was when the device * suspended. * * If @udev->reset_resume is set then this routine won't check that the * port is still enabled. Furthermore, finish_port_resume() above will * reset @udev. The end result is that a broken power session can be * recovered and @udev will appear to persist across a loss of VBUS power. * * For example, if a host controller doesn't maintain VBUS suspend current * during a system sleep or is reset when the system wakes up, all the USB * power sessions below it will be broken. This is especially troublesome * for mass-storage devices containing mounted filesystems, since the * device will appear to have disconnected and all the memory mappings * to it will be lost. Using the USB_PERSIST facility, the device can be * made to appear as if it had not disconnected. * * This facility can be dangerous. Although usb_reset_and_verify_device() makes * every effort to insure that the same device is present after the * reset as before, it cannot provide a 100% guarantee. Furthermore it's * quite possible for a device to remain unaltered but its media to be * changed. If the user replaces a flash memory card while the system is * asleep, he will have only himself to blame when the filesystem on the * new card is corrupted and the system crashes. * * Returns 0 on success, else negative errno. */ int usb_port_resume(struct usb_device *udev, pm_message_t msg) { struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); struct usb_port *port_dev = hub->ports[udev->portnum - 1]; int port1 = udev->portnum; int status; u16 portchange, portstatus; if (!test_and_set_bit(port1, hub->child_usage_bits)) { status = pm_runtime_resume_and_get(&port_dev->dev); if (status < 0) { dev_dbg(&udev->dev, "can't resume usb port, status %d\n", status); return status; } } usb_lock_port(port_dev); /* Skip the initial Clear-Suspend step for a remote wakeup */ status = usb_hub_port_status(hub, port1, &portstatus, &portchange); if (status == 0 && !port_is_suspended(hub, portstatus)) { if (portchange & USB_PORT_STAT_C_SUSPEND) pm_wakeup_event(&udev->dev, 0); goto SuspendCleared; } /* see 7.1.7.7; affects power usage, but not budgeting */ if (hub_is_superspeed(hub->hdev)) status = hub_set_port_link_state(hub, port1, USB_SS_PORT_LS_U0); else status = usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_SUSPEND); if (status) { dev_dbg(&port_dev->dev, "can't resume, status %d\n", status); } else { /* drive resume for USB_RESUME_TIMEOUT msec */ dev_dbg(&udev->dev, "usb %sresume\n", (PMSG_IS_AUTO(msg) ? "auto-" : "")); msleep(USB_RESUME_TIMEOUT); /* Virtual root hubs can trigger on GET_PORT_STATUS to * stop resume signaling. Then finish the resume * sequence. */ status = usb_hub_port_status(hub, port1, &portstatus, &portchange); } SuspendCleared: if (status == 0) { udev->port_is_suspended = 0; if (hub_is_superspeed(hub->hdev)) { if (portchange & USB_PORT_STAT_C_LINK_STATE) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_PORT_LINK_STATE); } else { if (portchange & USB_PORT_STAT_C_SUSPEND) usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_SUSPEND); } /* TRSMRCY = 10 msec */ msleep(10); } if (udev->persist_enabled) status = wait_for_connected(udev, hub, port1, &portchange, &portstatus); status = check_port_resume_type(udev, hub, port1, status, portchange, portstatus); if (status == 0) status = finish_port_resume(udev); if (status < 0) { dev_dbg(&udev->dev, "can't resume, status %d\n", status); hub_port_logical_disconnect(hub, port1); } else { /* Try to enable USB2 hardware LPM */ usb_enable_usb2_hardware_lpm(udev); /* Try to enable USB3 LTM */ usb_enable_ltm(udev); } usb_unlock_port(port_dev); return status; } int usb_remote_wakeup(struct usb_device *udev) { int status = 0; usb_lock_device(udev); if (udev->state == USB_STATE_SUSPENDED) { dev_dbg(&udev->dev, "usb %sresume\n", "wakeup-"); status = usb_autoresume_device(udev); if (status == 0) { /* Let the drivers do their thing, then... */ usb_autosuspend_device(udev); } } usb_unlock_device(udev); return status; } /* Returns 1 if there was a remote wakeup and a connect status change. */ static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port, u16 portstatus, u16 portchange) __must_hold(&port_dev->status_lock) { struct usb_port *port_dev = hub->ports[port - 1]; struct usb_device *hdev; struct usb_device *udev; int connect_change = 0; u16 link_state; int ret; hdev = hub->hdev; udev = port_dev->child; if (!hub_is_superspeed(hdev)) { if (!(portchange & USB_PORT_STAT_C_SUSPEND)) return 0; usb_clear_port_feature(hdev, port, USB_PORT_FEAT_C_SUSPEND); } else { link_state = portstatus & USB_PORT_STAT_LINK_STATE; if (!udev || udev->state != USB_STATE_SUSPENDED || (link_state != USB_SS_PORT_LS_U0 && link_state != USB_SS_PORT_LS_U1 && link_state != USB_SS_PORT_LS_U2)) return 0; } if (udev) { /* TRSMRCY = 10 msec */ msleep(10); usb_unlock_port(port_dev); ret = usb_remote_wakeup(udev); usb_lock_port(port_dev); if (ret < 0) connect_change = 1; } else { ret = -ENODEV; hub_port_disable(hub, port, 1); } dev_dbg(&port_dev->dev, "resume, status %d\n", ret); return connect_change; } static int check_ports_changed(struct usb_hub *hub) { int port1; for (port1 = 1; port1 <= hub->hdev->maxchild; ++port1) { u16 portstatus, portchange; int status; status = usb_hub_port_status(hub, port1, &portstatus, &portchange); if (!status && portchange) return 1; } return 0; } static int hub_suspend(struct usb_interface *intf, pm_message_t msg) { struct usb_hub *hub = usb_get_intfdata(intf); struct usb_device *hdev = hub->hdev; unsigned port1; /* * Warn if children aren't already suspended. * Also, add up the number of wakeup-enabled descendants. */ hub->wakeup_enabled_descendants = 0; for (port1 = 1; port1 <= hdev->maxchild; port1++) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; if (udev && udev->can_submit) { dev_warn(&port_dev->dev, "device %s not suspended yet\n", dev_name(&udev->dev)); if (PMSG_IS_AUTO(msg)) return -EBUSY; } if (udev) hub->wakeup_enabled_descendants += usb_wakeup_enabled_descendants(udev); } if (hdev->do_remote_wakeup && hub->quirk_check_port_auto_suspend) { /* check if there are changes pending on hub ports */ if (check_ports_changed(hub)) { if (PMSG_IS_AUTO(msg)) return -EBUSY; pm_wakeup_event(&hdev->dev, 2000); } } if (hub_is_superspeed(hdev) && hdev->do_remote_wakeup) { /* Enable hub to send remote wakeup for all ports. */ for (port1 = 1; port1 <= hdev->maxchild; port1++) { set_port_feature(hdev, port1 | USB_PORT_FEAT_REMOTE_WAKE_CONNECT | USB_PORT_FEAT_REMOTE_WAKE_DISCONNECT | USB_PORT_FEAT_REMOTE_WAKE_OVER_CURRENT, USB_PORT_FEAT_REMOTE_WAKE_MASK); } } dev_dbg(&intf->dev, "%s\n", __func__); /* stop hub_wq and related activity */ hub_quiesce(hub, HUB_SUSPEND); return 0; } /* Report wakeup requests from the ports of a resuming root hub */ static void report_wakeup_requests(struct usb_hub *hub) { struct usb_device *hdev = hub->hdev; struct usb_device *udev; struct usb_hcd *hcd; unsigned long resuming_ports; int i; if (hdev->parent) return; /* Not a root hub */ hcd = bus_to_hcd(hdev->bus); if (hcd->driver->get_resuming_ports) { /* * The get_resuming_ports() method returns a bitmap (origin 0) * of ports which have started wakeup signaling but have not * yet finished resuming. During system resume we will * resume all the enabled ports, regardless of any wakeup * signals, which means the wakeup requests would be lost. * To prevent this, report them to the PM core here. */ resuming_ports = hcd->driver->get_resuming_ports(hcd); for (i = 0; i < hdev->maxchild; ++i) { if (test_bit(i, &resuming_ports)) { udev = hub->ports[i]->child; if (udev) pm_wakeup_event(&udev->dev, 0); } } } } static int hub_resume(struct usb_interface *intf) { struct usb_hub *hub = usb_get_intfdata(intf); dev_dbg(&intf->dev, "%s\n", __func__); hub_activate(hub, HUB_RESUME); /* * This should be called only for system resume, not runtime resume. * We can't tell the difference here, so some wakeup requests will be * reported at the wrong time or more than once. This shouldn't * matter much, so long as they do get reported. */ report_wakeup_requests(hub); return 0; } static int hub_reset_resume(struct usb_interface *intf) { struct usb_hub *hub = usb_get_intfdata(intf); dev_dbg(&intf->dev, "%s\n", __func__); hub_activate(hub, HUB_RESET_RESUME); return 0; } /** * usb_root_hub_lost_power - called by HCD if the root hub lost Vbus power * @rhdev: struct usb_device for the root hub * * The USB host controller driver calls this function when its root hub * is resumed and Vbus power has been interrupted or the controller * has been reset. The routine marks @rhdev as having lost power. * When the hub driver is resumed it will take notice and carry out * power-session recovery for all the "USB-PERSIST"-enabled child devices; * the others will be disconnected. */ void usb_root_hub_lost_power(struct usb_device *rhdev) { dev_notice(&rhdev->dev, "root hub lost power or was reset\n"); rhdev->reset_resume = 1; } EXPORT_SYMBOL_GPL(usb_root_hub_lost_power); static const char * const usb3_lpm_names[] = { "U0", "U1", "U2", "U3", }; /* * Send a Set SEL control transfer to the device, prior to enabling * device-initiated U1 or U2. This lets the device know the exit latencies from * the time the device initiates a U1 or U2 exit, to the time it will receive a * packet from the host. * * This function will fail if the SEL or PEL values for udev are greater than * the maximum allowed values for the link state to be enabled. */ static int usb_req_set_sel(struct usb_device *udev) { struct usb_set_sel_req *sel_values; unsigned long long u1_sel; unsigned long long u1_pel; unsigned long long u2_sel; unsigned long long u2_pel; int ret; if (!udev->parent || udev->speed < USB_SPEED_SUPER || !udev->lpm_capable) return 0; /* Convert SEL and PEL stored in ns to us */ u1_sel = DIV_ROUND_UP(udev->u1_params.sel, 1000); u1_pel = DIV_ROUND_UP(udev->u1_params.pel, 1000); u2_sel = DIV_ROUND_UP(udev->u2_params.sel, 1000); u2_pel = DIV_ROUND_UP(udev->u2_params.pel, 1000); /* * Make sure that the calculated SEL and PEL values for the link * state we're enabling aren't bigger than the max SEL/PEL * value that will fit in the SET SEL control transfer. * Otherwise the device would get an incorrect idea of the exit * latency for the link state, and could start a device-initiated * U1/U2 when the exit latencies are too high. */ if (u1_sel > USB3_LPM_MAX_U1_SEL_PEL || u1_pel > USB3_LPM_MAX_U1_SEL_PEL || u2_sel > USB3_LPM_MAX_U2_SEL_PEL || u2_pel > USB3_LPM_MAX_U2_SEL_PEL) { dev_dbg(&udev->dev, "Device-initiated U1/U2 disabled due to long SEL or PEL\n"); return -EINVAL; } /* * usb_enable_lpm() can be called as part of a failed device reset, * which may be initiated by an error path of a mass storage driver. * Therefore, use GFP_NOIO. */ sel_values = kmalloc(sizeof *(sel_values), GFP_NOIO); if (!sel_values) return -ENOMEM; sel_values->u1_sel = u1_sel; sel_values->u1_pel = u1_pel; sel_values->u2_sel = cpu_to_le16(u2_sel); sel_values->u2_pel = cpu_to_le16(u2_pel); ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_SEL, USB_RECIP_DEVICE, 0, 0, sel_values, sizeof *(sel_values), USB_CTRL_SET_TIMEOUT); kfree(sel_values); if (ret > 0) udev->lpm_devinit_allow = 1; return ret; } /* * Enable or disable device-initiated U1 or U2 transitions. */ static int usb_set_device_initiated_lpm(struct usb_device *udev, enum usb3_link_state state, bool enable) { int ret; int feature; switch (state) { case USB3_LPM_U1: feature = USB_DEVICE_U1_ENABLE; break; case USB3_LPM_U2: feature = USB_DEVICE_U2_ENABLE; break; default: dev_warn(&udev->dev, "%s: Can't %s non-U1 or U2 state.\n", __func__, str_enable_disable(enable)); return -EINVAL; } if (udev->state != USB_STATE_CONFIGURED) { dev_dbg(&udev->dev, "%s: Can't %s %s state " "for unconfigured device.\n", __func__, str_enable_disable(enable), usb3_lpm_names[state]); return 0; } if (enable) { /* * Now send the control transfer to enable device-initiated LPM * for either U1 or U2. */ ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } else { ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_CLEAR_FEATURE, USB_RECIP_DEVICE, feature, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); } if (ret < 0) { dev_warn(&udev->dev, "%s of device-initiated %s failed.\n", str_enable_disable(enable), usb3_lpm_names[state]); return -EBUSY; } return 0; } static int usb_set_lpm_timeout(struct usb_device *udev, enum usb3_link_state state, int timeout) { int ret; int feature; switch (state) { case USB3_LPM_U1: feature = USB_PORT_FEAT_U1_TIMEOUT; break; case USB3_LPM_U2: feature = USB_PORT_FEAT_U2_TIMEOUT; break; default: dev_warn(&udev->dev, "%s: Can't set timeout for non-U1 or U2 state.\n", __func__); return -EINVAL; } if (state == USB3_LPM_U1 && timeout > USB3_LPM_U1_MAX_TIMEOUT && timeout != USB3_LPM_DEVICE_INITIATED) { dev_warn(&udev->dev, "Failed to set %s timeout to 0x%x, " "which is a reserved value.\n", usb3_lpm_names[state], timeout); return -EINVAL; } ret = set_port_feature(udev->parent, USB_PORT_LPM_TIMEOUT(timeout) | udev->portnum, feature); if (ret < 0) { dev_warn(&udev->dev, "Failed to set %s timeout to 0x%x," "error code %i\n", usb3_lpm_names[state], timeout, ret); return -EBUSY; } if (state == USB3_LPM_U1) udev->u1_params.timeout = timeout; else udev->u2_params.timeout = timeout; return 0; } /* * Don't allow device intiated U1/U2 if the system exit latency + one bus * interval is greater than the minimum service interval of any active * periodic endpoint. See USB 3.2 section 9.4.9 */ static bool usb_device_may_initiate_lpm(struct usb_device *udev, enum usb3_link_state state) { unsigned int sel; /* us */ int i, j; if (!udev->lpm_devinit_allow) return false; if (state == USB3_LPM_U1) sel = DIV_ROUND_UP(udev->u1_params.sel, 1000); else if (state == USB3_LPM_U2) sel = DIV_ROUND_UP(udev->u2_params.sel, 1000); else return false; for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { struct usb_interface *intf; struct usb_endpoint_descriptor *desc; unsigned int interval; intf = udev->actconfig->interface[i]; if (!intf) continue; for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) { desc = &intf->cur_altsetting->endpoint[j].desc; if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { interval = (1 << (desc->bInterval - 1)) * 125; if (sel + 125 > interval) return false; } } } return true; } /* * Enable the hub-initiated U1/U2 idle timeouts, and enable device-initiated * U1/U2 entry. * * We will attempt to enable U1 or U2, but there are no guarantees that the * control transfers to set the hub timeout or enable device-initiated U1/U2 * will be successful. * * If the control transfer to enable device-initiated U1/U2 entry fails, then * hub-initiated U1/U2 will be disabled. * * If we cannot set the parent hub U1/U2 timeout, we attempt to let the xHCI * driver know about it. If that call fails, it should be harmless, and just * take up more slightly more bus bandwidth for unnecessary U1/U2 exit latency. */ static void usb_enable_link_state(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { int timeout; __u8 u1_mel; __le16 u2_mel; /* Skip if the device BOS descriptor couldn't be read */ if (!udev->bos) return; u1_mel = udev->bos->ss_cap->bU1devExitLat; u2_mel = udev->bos->ss_cap->bU2DevExitLat; /* If the device says it doesn't have *any* exit latency to come out of * U1 or U2, it's probably lying. Assume it doesn't implement that link * state. */ if ((state == USB3_LPM_U1 && u1_mel == 0) || (state == USB3_LPM_U2 && u2_mel == 0)) return; /* We allow the host controller to set the U1/U2 timeout internally * first, so that it can change its schedule to account for the * additional latency to send data to a device in a lower power * link state. */ timeout = hcd->driver->enable_usb3_lpm_timeout(hcd, udev, state); /* xHCI host controller doesn't want to enable this LPM state. */ if (timeout == 0) return; if (timeout < 0) { dev_warn(&udev->dev, "Could not enable %s link state, " "xHCI error %i.\n", usb3_lpm_names[state], timeout); return; } if (usb_set_lpm_timeout(udev, state, timeout)) { /* If we can't set the parent hub U1/U2 timeout, * device-initiated LPM won't be allowed either, so let the xHCI * host know that this link state won't be enabled. */ hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); return; } /* Only a configured device will accept the Set Feature * U1/U2_ENABLE */ if (udev->actconfig && usb_device_may_initiate_lpm(udev, state)) { if (usb_set_device_initiated_lpm(udev, state, true)) { /* * Request to enable device initiated U1/U2 failed, * better to turn off lpm in this case. */ usb_set_lpm_timeout(udev, state, 0); hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state); return; } } if (state == USB3_LPM_U1) udev->usb3_lpm_u1_enabled = 1; else if (state == USB3_LPM_U2) udev->usb3_lpm_u2_enabled = 1; } /* * Disable the hub-initiated U1/U2 idle timeouts, and disable device-initiated * U1/U2 entry. * * If this function returns -EBUSY, the parent hub will still allow U1/U2 entry. * If zero is returned, the parent will not allow the link to go into U1/U2. * * If zero is returned, device-initiated U1/U2 entry may still be enabled, but * it won't have an effect on the bus link state because the parent hub will * still disallow device-initiated U1/U2 entry. * * If zero is returned, the xHCI host controller may still think U1/U2 entry is * possible. The result will be slightly more bus bandwidth will be taken up * (to account for U1/U2 exit latency), but it should be harmless. */ static int usb_disable_link_state(struct usb_hcd *hcd, struct usb_device *udev, enum usb3_link_state state) { switch (state) { case USB3_LPM_U1: case USB3_LPM_U2: break; default: dev_warn(&udev->dev, "%s: Can't disable non-U1 or U2 state.\n", __func__); return -EINVAL; } if (usb_set_lpm_timeout(udev, state, 0)) return -EBUSY; usb_set_device_initiated_lpm(udev, state, false); if (hcd->driver->disable_usb3_lpm_timeout(hcd, udev, state)) dev_warn(&udev->dev, "Could not disable xHCI %s timeout, " "bus schedule bandwidth may be impacted.\n", usb3_lpm_names[state]); /* As soon as usb_set_lpm_timeout(0) return 0, hub initiated LPM * is disabled. Hub will disallows link to enter U1/U2 as well, * even device is initiating LPM. Hence LPM is disabled if hub LPM * timeout set to 0, no matter device-initiated LPM is disabled or * not. */ if (state == USB3_LPM_U1) udev->usb3_lpm_u1_enabled = 0; else if (state == USB3_LPM_U2) udev->usb3_lpm_u2_enabled = 0; return 0; } /* * Disable hub-initiated and device-initiated U1 and U2 entry. * Caller must own the bandwidth_mutex. * * This will call usb_enable_lpm() on failure, which will decrement * lpm_disable_count, and will re-enable LPM if lpm_disable_count reaches zero. */ int usb_disable_lpm(struct usb_device *udev) { struct usb_hcd *hcd; if (!udev || !udev->parent || udev->speed < USB_SPEED_SUPER || !udev->lpm_capable || udev->state < USB_STATE_CONFIGURED) return 0; hcd = bus_to_hcd(udev->bus); if (!hcd || !hcd->driver->disable_usb3_lpm_timeout) return 0; udev->lpm_disable_count++; if ((udev->u1_params.timeout == 0 && udev->u2_params.timeout == 0)) return 0; /* If LPM is enabled, attempt to disable it. */ if (usb_disable_link_state(hcd, udev, USB3_LPM_U1)) goto enable_lpm; if (usb_disable_link_state(hcd, udev, USB3_LPM_U2)) goto enable_lpm; return 0; enable_lpm: usb_enable_lpm(udev); return -EBUSY; } EXPORT_SYMBOL_GPL(usb_disable_lpm); /* Grab the bandwidth_mutex before calling usb_disable_lpm() */ int usb_unlocked_disable_lpm(struct usb_device *udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); int ret; if (!hcd) return -EINVAL; mutex_lock(hcd->bandwidth_mutex); ret = usb_disable_lpm(udev); mutex_unlock(hcd->bandwidth_mutex); return ret; } EXPORT_SYMBOL_GPL(usb_unlocked_disable_lpm); /* * Attempt to enable device-initiated and hub-initiated U1 and U2 entry. The * xHCI host policy may prevent U1 or U2 from being enabled. * * Other callers may have disabled link PM, so U1 and U2 entry will be disabled * until the lpm_disable_count drops to zero. Caller must own the * bandwidth_mutex. */ void usb_enable_lpm(struct usb_device *udev) { struct usb_hcd *hcd; struct usb_hub *hub; struct usb_port *port_dev; if (!udev || !udev->parent || udev->speed < USB_SPEED_SUPER || !udev->lpm_capable || udev->state < USB_STATE_CONFIGURED) return; udev->lpm_disable_count--; hcd = bus_to_hcd(udev->bus); /* Double check that we can both enable and disable LPM. * Device must be configured to accept set feature U1/U2 timeout. */ if (!hcd || !hcd->driver->enable_usb3_lpm_timeout || !hcd->driver->disable_usb3_lpm_timeout) return; if (udev->lpm_disable_count > 0) return; hub = usb_hub_to_struct_hub(udev->parent); if (!hub) return; port_dev = hub->ports[udev->portnum - 1]; if (port_dev->usb3_lpm_u1_permit) usb_enable_link_state(hcd, udev, USB3_LPM_U1); if (port_dev->usb3_lpm_u2_permit) usb_enable_link_state(hcd, udev, USB3_LPM_U2); } EXPORT_SYMBOL_GPL(usb_enable_lpm); /* Grab the bandwidth_mutex before calling usb_enable_lpm() */ void usb_unlocked_enable_lpm(struct usb_device *udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); if (!hcd) return; mutex_lock(hcd->bandwidth_mutex); usb_enable_lpm(udev); mutex_unlock(hcd->bandwidth_mutex); } EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); /* usb3 devices use U3 for disabled, make sure remote wakeup is disabled */ static void hub_usb3_port_prepare_disable(struct usb_hub *hub, struct usb_port *port_dev) { struct usb_device *udev = port_dev->child; int ret; if (udev && udev->port_is_suspended && udev->do_remote_wakeup) { ret = hub_set_port_link_state(hub, port_dev->portnum, USB_SS_PORT_LS_U0); if (!ret) { msleep(USB_RESUME_TIMEOUT); ret = usb_disable_remote_wakeup(udev); } if (ret) dev_warn(&udev->dev, "Port disable: can't disable remote wake\n"); udev->do_remote_wakeup = 0; } } #else /* CONFIG_PM */ #define hub_suspend NULL #define hub_resume NULL #define hub_reset_resume NULL static inline void hub_usb3_port_prepare_disable(struct usb_hub *hub, struct usb_port *port_dev) { } int usb_disable_lpm(struct usb_device *udev) { return 0; } EXPORT_SYMBOL_GPL(usb_disable_lpm); void usb_enable_lpm(struct usb_device *udev) { } EXPORT_SYMBOL_GPL(usb_enable_lpm); int usb_unlocked_disable_lpm(struct usb_device *udev) { return 0; } EXPORT_SYMBOL_GPL(usb_unlocked_disable_lpm); void usb_unlocked_enable_lpm(struct usb_device *udev) { } EXPORT_SYMBOL_GPL(usb_unlocked_enable_lpm); int usb_disable_ltm(struct usb_device *udev) { return 0; } EXPORT_SYMBOL_GPL(usb_disable_ltm); void usb_enable_ltm(struct usb_device *udev) { } EXPORT_SYMBOL_GPL(usb_enable_ltm); static int hub_handle_remote_wakeup(struct usb_hub *hub, unsigned int port, u16 portstatus, u16 portchange) { return 0; } static int usb_req_set_sel(struct usb_device *udev) { return 0; } #endif /* CONFIG_PM */ /* * USB-3 does not have a similar link state as USB-2 that will avoid negotiating * a connection with a plugged-in cable but will signal the host when the cable * is unplugged. Disable remote wake and set link state to U3 for USB-3 devices */ static int hub_port_disable(struct usb_hub *hub, int port1, int set_state) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *hdev = hub->hdev; int ret = 0; if (!hub->error) { if (hub_is_superspeed(hub->hdev)) { hub_usb3_port_prepare_disable(hub, port_dev); ret = hub_set_port_link_state(hub, port_dev->portnum, USB_SS_PORT_LS_U3); } else { ret = usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_ENABLE); } } if (port_dev->child && set_state) usb_set_device_state(port_dev->child, USB_STATE_NOTATTACHED); if (ret && ret != -ENODEV) dev_err(&port_dev->dev, "cannot disable (err = %d)\n", ret); return ret; } /* * usb_port_disable - disable a usb device's upstream port * @udev: device to disable * Context: @udev locked, must be able to sleep. * * Disables a USB device that isn't in active use. */ int usb_port_disable(struct usb_device *udev) { struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); return hub_port_disable(hub, udev->portnum, 0); } /* USB 2.0 spec, 7.1.7.3 / fig 7-29: * * Between connect detection and reset signaling there must be a delay * of 100ms at least for debounce and power-settling. The corresponding * timer shall restart whenever the downstream port detects a disconnect. * * Apparently there are some bluetooth and irda-dongles and a number of * low-speed devices for which this debounce period may last over a second. * Not covered by the spec - but easy to deal with. * * This implementation uses a 1500ms total debounce timeout; if the * connection isn't stable by then it returns -ETIMEDOUT. It checks * every 25ms for transient disconnects. When the port status has been * unchanged for 100ms it returns the port status. */ int hub_port_debounce(struct usb_hub *hub, int port1, bool must_be_connected) { int ret; u16 portchange, portstatus; unsigned connection = 0xffff; int total_time, stable_time = 0; struct usb_port *port_dev = hub->ports[port1 - 1]; for (total_time = 0; ; total_time += HUB_DEBOUNCE_STEP) { ret = usb_hub_port_status(hub, port1, &portstatus, &portchange); if (ret < 0) return ret; if (!(portchange & USB_PORT_STAT_C_CONNECTION) && (portstatus & USB_PORT_STAT_CONNECTION) == connection) { if (!must_be_connected || (connection == USB_PORT_STAT_CONNECTION)) stable_time += HUB_DEBOUNCE_STEP; if (stable_time >= HUB_DEBOUNCE_STABLE) break; } else { stable_time = 0; connection = portstatus & USB_PORT_STAT_CONNECTION; } if (portchange & USB_PORT_STAT_C_CONNECTION) { usb_clear_port_feature(hub->hdev, port1, USB_PORT_FEAT_C_CONNECTION); } if (total_time >= HUB_DEBOUNCE_TIMEOUT) break; msleep(HUB_DEBOUNCE_STEP); } dev_dbg(&port_dev->dev, "debounce total %dms stable %dms status 0x%x\n", total_time, stable_time, portstatus); if (stable_time < HUB_DEBOUNCE_STABLE) return -ETIMEDOUT; return portstatus; } void usb_ep0_reinit(struct usb_device *udev) { usb_disable_endpoint(udev, 0 + USB_DIR_IN, true); usb_disable_endpoint(udev, 0 + USB_DIR_OUT, true); usb_enable_endpoint(udev, &udev->ep0, true); } EXPORT_SYMBOL_GPL(usb_ep0_reinit); static int hub_set_address(struct usb_device *udev, int devnum) { int retval; unsigned int timeout_ms = USB_CTRL_SET_TIMEOUT; struct usb_hcd *hcd = bus_to_hcd(udev->bus); struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); if (hub->hdev->quirks & USB_QUIRK_SHORT_SET_ADDRESS_REQ_TIMEOUT) timeout_ms = USB_SHORT_SET_ADDRESS_REQ_TIMEOUT; /* * The host controller will choose the device address, * instead of the core having chosen it earlier */ if (!hcd->driver->address_device && devnum <= 1) return -EINVAL; if (udev->state == USB_STATE_ADDRESS) return 0; if (udev->state != USB_STATE_DEFAULT) return -EINVAL; if (hcd->driver->address_device) retval = hcd->driver->address_device(hcd, udev, timeout_ms); else retval = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_ADDRESS, 0, devnum, 0, NULL, 0, timeout_ms); if (retval == 0) { update_devnum(udev, devnum); /* Device now using proper address. */ usb_set_device_state(udev, USB_STATE_ADDRESS); usb_ep0_reinit(udev); } return retval; } /* * There are reports of USB 3.0 devices that say they support USB 2.0 Link PM * when they're plugged into a USB 2.0 port, but they don't work when LPM is * enabled. * * Only enable USB 2.0 Link PM if the port is internal (hardwired), or the * device says it supports the new USB 2.0 Link PM errata by setting the BESL * support bit in the BOS descriptor. */ static void hub_set_initial_usb2_lpm_policy(struct usb_device *udev) { struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); int connect_type = USB_PORT_CONNECT_TYPE_UNKNOWN; if (!udev->usb2_hw_lpm_capable || !udev->bos) return; if (hub) connect_type = hub->ports[udev->portnum - 1]->connect_type; if ((udev->bos->ext_cap->bmAttributes & cpu_to_le32(USB_BESL_SUPPORT)) || connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) { udev->usb2_hw_lpm_allowed = 1; usb_enable_usb2_hardware_lpm(udev); } } static int hub_enable_device(struct usb_device *udev) { struct usb_hcd *hcd = bus_to_hcd(udev->bus); if (!hcd->driver->enable_device) return 0; if (udev->state == USB_STATE_ADDRESS) return 0; if (udev->state != USB_STATE_DEFAULT) return -EINVAL; return hcd->driver->enable_device(hcd, udev); } /* * Get the bMaxPacketSize0 value during initialization by reading the * device's device descriptor. Since we don't already know this value, * the transfer is unsafe and it ignores I/O errors, only testing for * reasonable received values. * * For "old scheme" initialization, size will be 8 so we read just the * start of the device descriptor, which should work okay regardless of * the actual bMaxPacketSize0 value. For "new scheme" initialization, * size will be 64 (and buf will point to a sufficiently large buffer), * which might not be kosher according to the USB spec but it's what * Windows does and what many devices expect. * * Returns: bMaxPacketSize0 or a negative error code. */ static int get_bMaxPacketSize0(struct usb_device *udev, struct usb_device_descriptor *buf, int size, bool first_time) { int i, rc; /* * Retry on all errors; some devices are flakey. * 255 is for WUSB devices, we actually need to use * 512 (WUSB1.0[4.8.1]). */ for (i = 0; i < GET_MAXPACKET0_TRIES; ++i) { /* Start with invalid values in case the transfer fails */ buf->bDescriptorType = buf->bMaxPacketSize0 = 0; rc = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), USB_REQ_GET_DESCRIPTOR, USB_DIR_IN, USB_DT_DEVICE << 8, 0, buf, size, initial_descriptor_timeout); switch (buf->bMaxPacketSize0) { case 8: case 16: case 32: case 64: case 9: if (buf->bDescriptorType == USB_DT_DEVICE) { rc = buf->bMaxPacketSize0; break; } fallthrough; default: if (rc >= 0) rc = -EPROTO; break; } /* * Some devices time out if they are powered on * when already connected. They need a second * reset, so return early. But only on the first * attempt, lest we get into a time-out/reset loop. */ if (rc > 0 || (rc == -ETIMEDOUT && first_time && udev->speed > USB_SPEED_FULL)) break; } return rc; } #define GET_DESCRIPTOR_BUFSIZE 64 /* Reset device, (re)assign address, get device descriptor. * Device connection must be stable, no more debouncing needed. * Returns device in USB_STATE_ADDRESS, except on error. * * If this is called for an already-existing device (as part of * usb_reset_and_verify_device), the caller must own the device lock and * the port lock. For a newly detected device that is not accessible * through any global pointers, it's not necessary to lock the device, * but it is still necessary to lock the port. * * For a newly detected device, @dev_descr must be NULL. The device * descriptor retrieved from the device will then be stored in * @udev->descriptor. For an already existing device, @dev_descr * must be non-NULL. The device descriptor will be stored there, * not in @udev->descriptor, because descriptors for registered * devices are meant to be immutable. */ static int hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, int retry_counter, struct usb_device_descriptor *dev_descr) { struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); struct usb_port *port_dev = hub->ports[port1 - 1]; int retries, operations, retval, i; unsigned delay = HUB_SHORT_RESET_TIME; enum usb_device_speed oldspeed = udev->speed; const char *speed; int devnum = udev->devnum; const char *driver_name; bool do_new_scheme; const bool initial = !dev_descr; int maxp0; struct usb_device_descriptor *buf, *descr; buf = kmalloc(GET_DESCRIPTOR_BUFSIZE, GFP_NOIO); if (!buf) return -ENOMEM; /* root hub ports have a slightly longer reset period * (from USB 2.0 spec, section 7.1.7.5) */ if (!hdev->parent) { delay = HUB_ROOT_RESET_TIME; if (port1 == hdev->bus->otg_port) hdev->bus->b_hnp_enable = 0; } /* Some low speed devices have problems with the quick delay, so */ /* be a bit pessimistic with those devices. RHbug #23670 */ if (oldspeed == USB_SPEED_LOW) delay = HUB_LONG_RESET_TIME; /* Reset the device; full speed may morph to high speed */ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ retval = hub_port_reset(hub, port1, udev, delay, false); if (retval < 0) /* error or disconnect */ goto fail; /* success, speed is known */ retval = -ENODEV; /* Don't allow speed changes at reset, except usb 3.0 to faster */ if (oldspeed != USB_SPEED_UNKNOWN && oldspeed != udev->speed && !(oldspeed == USB_SPEED_SUPER && udev->speed > oldspeed)) { dev_dbg(&udev->dev, "device reset changed speed!\n"); goto fail; } oldspeed = udev->speed; if (initial) { /* USB 2.0 section 5.5.3 talks about ep0 maxpacket ... * it's fixed size except for full speed devices. */ switch (udev->speed) { case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: udev->ep0.desc.wMaxPacketSize = cpu_to_le16(512); break; case USB_SPEED_HIGH: /* fixed at 64 */ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); break; case USB_SPEED_FULL: /* 8, 16, 32, or 64 */ /* to determine the ep0 maxpacket size, try to read * the device descriptor to get bMaxPacketSize0 and * then correct our initial guess. */ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(64); break; case USB_SPEED_LOW: /* fixed at 8 */ udev->ep0.desc.wMaxPacketSize = cpu_to_le16(8); break; default: goto fail; } } speed = usb_speed_string(udev->speed); /* * The controller driver may be NULL if the controller device * is the middle device between platform device and roothub. * This middle device may not need a device driver due to * all hardware control can be at platform device driver, this * platform device is usually a dual-role USB controller device. */ if (udev->bus->controller->driver) driver_name = udev->bus->controller->driver->name; else driver_name = udev->bus->sysdev->driver->name; if (udev->speed < USB_SPEED_SUPER) dev_info(&udev->dev, "%s %s USB device number %d using %s\n", (initial ? "new" : "reset"), speed, devnum, driver_name); if (initial) { /* Set up TT records, if needed */ if (hdev->tt) { udev->tt = hdev->tt; udev->ttport = hdev->ttport; } else if (udev->speed != USB_SPEED_HIGH && hdev->speed == USB_SPEED_HIGH) { if (!hub->tt.hub) { dev_err(&udev->dev, "parent hub has no TT\n"); retval = -EINVAL; goto fail; } udev->tt = &hub->tt; udev->ttport = port1; } } /* Why interleave GET_DESCRIPTOR and SET_ADDRESS this way? * Because device hardware and firmware is sometimes buggy in * this area, and this is how Linux has done it for ages. * Change it cautiously. * * NOTE: If use_new_scheme() is true we will start by issuing * a 64-byte GET_DESCRIPTOR request. This is what Windows does, * so it may help with some non-standards-compliant devices. * Otherwise we start with SET_ADDRESS and then try to read the * first 8 bytes of the device descriptor to get the ep0 maxpacket * value. */ do_new_scheme = use_new_scheme(udev, retry_counter, port_dev); for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) { if (hub_port_stop_enumerate(hub, port1, retries)) { retval = -ENODEV; break; } if (do_new_scheme) { retval = hub_enable_device(udev); if (retval < 0) { dev_err(&udev->dev, "hub failed to enable device, error %d\n", retval); goto fail; } maxp0 = get_bMaxPacketSize0(udev, buf, GET_DESCRIPTOR_BUFSIZE, retries == 0); if (maxp0 > 0 && !initial && maxp0 != udev->descriptor.bMaxPacketSize0) { dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n"); retval = -ENODEV; goto fail; } retval = hub_port_reset(hub, port1, udev, delay, false); if (retval < 0) /* error or disconnect */ goto fail; if (oldspeed != udev->speed) { dev_dbg(&udev->dev, "device reset changed speed!\n"); retval = -ENODEV; goto fail; } if (maxp0 < 0) { if (maxp0 != -ENODEV) dev_err(&udev->dev, "device descriptor read/64, error %d\n", maxp0); retval = maxp0; continue; } } for (operations = 0; operations < SET_ADDRESS_TRIES; ++operations) { retval = hub_set_address(udev, devnum); if (retval >= 0) break; msleep(200); } if (retval < 0) { if (retval != -ENODEV) dev_err(&udev->dev, "device not accepting address %d, error %d\n", devnum, retval); goto fail; } if (udev->speed >= USB_SPEED_SUPER) { devnum = udev->devnum; dev_info(&udev->dev, "%s SuperSpeed%s%s USB device number %d using %s\n", (udev->config) ? "reset" : "new", (udev->speed == USB_SPEED_SUPER_PLUS) ? " Plus" : "", (udev->ssp_rate == USB_SSP_GEN_2x2) ? " Gen 2x2" : (udev->ssp_rate == USB_SSP_GEN_2x1) ? " Gen 2x1" : (udev->ssp_rate == USB_SSP_GEN_1x2) ? " Gen 1x2" : "", devnum, driver_name); } /* * cope with hardware quirkiness: * - let SET_ADDRESS settle, some device hardware wants it * - read ep0 maxpacket even for high and low speed, */ msleep(10); if (do_new_scheme) break; maxp0 = get_bMaxPacketSize0(udev, buf, 8, retries == 0); if (maxp0 < 0) { retval = maxp0; if (retval != -ENODEV) dev_err(&udev->dev, "device descriptor read/8, error %d\n", retval); } else { u32 delay; if (!initial && maxp0 != udev->descriptor.bMaxPacketSize0) { dev_err(&udev->dev, "device reset changed ep0 maxpacket size!\n"); retval = -ENODEV; goto fail; } delay = udev->parent->hub_delay; udev->hub_delay = min_t(u32, delay, USB_TP_TRANSMISSION_DELAY_MAX); retval = usb_set_isoch_delay(udev); if (retval) { dev_dbg(&udev->dev, "Failed set isoch delay, error %d\n", retval); retval = 0; } break; } } if (retval) goto fail; /* * Check the ep0 maxpacket guess and correct it if necessary. * maxp0 is the value stored in the device descriptor; * i is the value it encodes (logarithmic for SuperSpeed or greater). */ i = maxp0; if (udev->speed >= USB_SPEED_SUPER) { if (maxp0 <= 16) i = 1 << maxp0; else i = 0; /* Invalid */ } if (usb_endpoint_maxp(&udev->ep0.desc) == i) { ; /* Initial ep0 maxpacket guess is right */ } else if (((udev->speed == USB_SPEED_FULL || udev->speed == USB_SPEED_HIGH) && (i == 8 || i == 16 || i == 32 || i == 64)) || (udev->speed >= USB_SPEED_SUPER && i > 0)) { /* Initial guess is wrong; use the descriptor's value */ if (udev->speed == USB_SPEED_FULL) dev_dbg(&udev->dev, "ep0 maxpacket = %d\n", i); else dev_warn(&udev->dev, "Using ep0 maxpacket: %d\n", i); udev->ep0.desc.wMaxPacketSize = cpu_to_le16(i); usb_ep0_reinit(udev); } else { /* Initial guess is wrong and descriptor's value is invalid */ dev_err(&udev->dev, "Invalid ep0 maxpacket: %d\n", maxp0); retval = -EMSGSIZE; goto fail; } descr = usb_get_device_descriptor(udev); if (IS_ERR(descr)) { retval = PTR_ERR(descr); if (retval != -ENODEV) dev_err(&udev->dev, "device descriptor read/all, error %d\n", retval); goto fail; } if (initial) udev->descriptor = *descr; else *dev_descr = *descr; kfree(descr); /* * Some superspeed devices have finished the link training process * and attached to a superspeed hub port, but the device descriptor * got from those devices show they aren't superspeed devices. Warm * reset the port attached by the devices can fix them. */ if ((udev->speed >= USB_SPEED_SUPER) && (le16_to_cpu(udev->descriptor.bcdUSB) < 0x0300)) { dev_err(&udev->dev, "got a wrong device descriptor, warm reset device\n"); hub_port_reset(hub, port1, udev, HUB_BH_RESET_TIME, true); retval = -EINVAL; goto fail; } usb_detect_quirks(udev); if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0201) { retval = usb_get_bos_descriptor(udev); if (!retval) { udev->lpm_capable = usb_device_supports_lpm(udev); udev->lpm_disable_count = 1; usb_set_lpm_parameters(udev); usb_req_set_sel(udev); } } retval = 0; /* notify HCD that we have a device connected and addressed */ if (hcd->driver->update_device) hcd->driver->update_device(hcd, udev); hub_set_initial_usb2_lpm_policy(udev); fail: if (retval) { hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } kfree(buf); return retval; } static void check_highspeed(struct usb_hub *hub, struct usb_device *udev, int port1) { struct usb_qualifier_descriptor *qual; int status; if (udev->quirks & USB_QUIRK_DEVICE_QUALIFIER) return; qual = kmalloc(sizeof *qual, GFP_KERNEL); if (qual == NULL) return; status = usb_get_descriptor(udev, USB_DT_DEVICE_QUALIFIER, 0, qual, sizeof *qual); if (status == sizeof *qual) { dev_info(&udev->dev, "not running at top speed; " "connect to a high speed hub\n"); /* hub LEDs are probably harder to miss than syslog */ if (hub->has_indicators) { hub->indicator[port1-1] = INDICATOR_GREEN_BLINK; queue_delayed_work(system_power_efficient_wq, &hub->leds, 0); } } kfree(qual); } static unsigned hub_power_remaining(struct usb_hub *hub) { struct usb_device *hdev = hub->hdev; int remaining; int port1; if (!hub->limited_power) return 0; remaining = hdev->bus_mA - hub->descriptor->bHubContrCurrent; for (port1 = 1; port1 <= hdev->maxchild; ++port1) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; unsigned unit_load; int delta; if (!udev) continue; if (hub_is_superspeed(udev)) unit_load = 150; else unit_load = 100; /* * Unconfigured devices may not use more than one unit load, * or 8mA for OTG ports */ if (udev->actconfig) delta = usb_get_max_power(udev, udev->actconfig); else if (port1 != udev->bus->otg_port || hdev->parent) delta = unit_load; else delta = 8; if (delta > hub->mA_per_port) dev_warn(&port_dev->dev, "%dmA is over %umA budget!\n", delta, hub->mA_per_port); remaining -= delta; } if (remaining < 0) { dev_warn(hub->intfdev, "%dmA over power budget!\n", -remaining); remaining = 0; } return remaining; } static int descriptors_changed(struct usb_device *udev, struct usb_device_descriptor *new_device_descriptor, struct usb_host_bos *old_bos) { int changed = 0; unsigned index; unsigned serial_len = 0; unsigned len; unsigned old_length; int length; char *buf; if (memcmp(&udev->descriptor, new_device_descriptor, sizeof(*new_device_descriptor)) != 0) return 1; if ((old_bos && !udev->bos) || (!old_bos && udev->bos)) return 1; if (udev->bos) { len = le16_to_cpu(udev->bos->desc->wTotalLength); if (len != le16_to_cpu(old_bos->desc->wTotalLength)) return 1; if (memcmp(udev->bos->desc, old_bos->desc, len)) return 1; } /* Since the idVendor, idProduct, and bcdDevice values in the * device descriptor haven't changed, we will assume the * Manufacturer and Product strings haven't changed either. * But the SerialNumber string could be different (e.g., a * different flash card of the same brand). */ if (udev->serial) serial_len = strlen(udev->serial) + 1; len = serial_len; for (index = 0; index < udev->descriptor.bNumConfigurations; index++) { old_length = le16_to_cpu(udev->config[index].desc.wTotalLength); len = max(len, old_length); } buf = kmalloc(len, GFP_NOIO); if (!buf) /* assume the worst */ return 1; for (index = 0; index < udev->descriptor.bNumConfigurations; index++) { old_length = le16_to_cpu(udev->config[index].desc.wTotalLength); length = usb_get_descriptor(udev, USB_DT_CONFIG, index, buf, old_length); if (length != old_length) { dev_dbg(&udev->dev, "config index %d, error %d\n", index, length); changed = 1; break; } if (memcmp(buf, udev->rawdescriptors[index], old_length) != 0) { dev_dbg(&udev->dev, "config index %d changed (#%d)\n", index, ((struct usb_config_descriptor *) buf)-> bConfigurationValue); changed = 1; break; } } if (!changed && serial_len) { length = usb_string(udev, udev->descriptor.iSerialNumber, buf, serial_len); if (length + 1 != serial_len) { dev_dbg(&udev->dev, "serial string error %d\n", length); changed = 1; } else if (memcmp(buf, udev->serial, length) != 0) { dev_dbg(&udev->dev, "serial string changed\n"); changed = 1; } } kfree(buf); return changed; } static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) { int status = -ENODEV; int i; unsigned unit_load; struct usb_device *hdev = hub->hdev; struct usb_hcd *hcd = bus_to_hcd(hdev->bus); struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; static int unreliable_port = -1; bool retry_locked; /* Disconnect any existing devices under this port */ if (udev) { if (hcd->usb_phy && !hdev->parent) usb_phy_notify_disconnect(hcd->usb_phy, udev->speed); usb_disconnect(&port_dev->child); } /* We can forget about a "removed" device when there's a physical * disconnect or the connect status changes. */ if (!(portstatus & USB_PORT_STAT_CONNECTION) || (portchange & USB_PORT_STAT_C_CONNECTION)) clear_bit(port1, hub->removed_bits); if (portchange & (USB_PORT_STAT_C_CONNECTION | USB_PORT_STAT_C_ENABLE)) { status = hub_port_debounce_be_stable(hub, port1); if (status < 0) { if (status != -ENODEV && port1 != unreliable_port && printk_ratelimit()) dev_err(&port_dev->dev, "connect-debounce failed\n"); portstatus &= ~USB_PORT_STAT_CONNECTION; unreliable_port = port1; } else { portstatus = status; } } /* Return now if debouncing failed or nothing is connected or * the device was "removed". */ if (!(portstatus & USB_PORT_STAT_CONNECTION) || test_bit(port1, hub->removed_bits)) { /* * maybe switch power back on (e.g. root hub was reset) * but only if the port isn't owned by someone else. */ if (hub_is_port_power_switchable(hub) && !usb_port_is_power_on(hub, portstatus) && !port_dev->port_owner) set_port_feature(hdev, port1, USB_PORT_FEAT_POWER); if (portstatus & USB_PORT_STAT_ENABLE) goto done; return; } if (hub_is_superspeed(hub->hdev)) unit_load = 150; else unit_load = 100; status = 0; for (i = 0; i < PORT_INIT_TRIES; i++) { if (hub_port_stop_enumerate(hub, port1, i)) { status = -ENODEV; break; } usb_lock_port(port_dev); mutex_lock(hcd->address0_mutex); retry_locked = true; /* reallocate for each attempt, since references * to the previous one can escape in various ways */ udev = usb_alloc_dev(hdev, hdev->bus, port1); if (!udev) { dev_err(&port_dev->dev, "couldn't allocate usb_device\n"); mutex_unlock(hcd->address0_mutex); usb_unlock_port(port_dev); goto done; } usb_set_device_state(udev, USB_STATE_POWERED); udev->bus_mA = hub->mA_per_port; udev->level = hdev->level + 1; /* Devices connected to SuperSpeed hubs are USB 3.0 or later */ if (hub_is_superspeed(hub->hdev)) udev->speed = USB_SPEED_SUPER; else udev->speed = USB_SPEED_UNKNOWN; choose_devnum(udev); if (udev->devnum <= 0) { status = -ENOTCONN; /* Don't retry */ goto loop; } /* reset (non-USB 3.0 devices) and get descriptor */ status = hub_port_init(hub, udev, port1, i, NULL); if (status < 0) goto loop; mutex_unlock(hcd->address0_mutex); usb_unlock_port(port_dev); retry_locked = false; if (udev->quirks & USB_QUIRK_DELAY_INIT) msleep(2000); /* consecutive bus-powered hubs aren't reliable; they can * violate the voltage drop budget. if the new child has * a "powered" LED, users should notice we didn't enable it * (without reading syslog), even without per-port LEDs * on the parent. */ if (udev->descriptor.bDeviceClass == USB_CLASS_HUB && udev->bus_mA <= unit_load) { u16 devstat; status = usb_get_std_status(udev, USB_RECIP_DEVICE, 0, &devstat); if (status) { dev_dbg(&udev->dev, "get status %d ?\n", status); goto loop_disable; } if ((devstat & (1 << USB_DEVICE_SELF_POWERED)) == 0) { dev_err(&udev->dev, "can't connect bus-powered hub " "to this port\n"); if (hub->has_indicators) { hub->indicator[port1-1] = INDICATOR_AMBER_BLINK; queue_delayed_work( system_power_efficient_wq, &hub->leds, 0); } status = -ENOTCONN; /* Don't retry */ goto loop_disable; } } /* check for devices running slower than they could */ if (le16_to_cpu(udev->descriptor.bcdUSB) >= 0x0200 && udev->speed == USB_SPEED_FULL && highspeed_hubs != 0) check_highspeed(hub, udev, port1); /* Store the parent's children[] pointer. At this point * udev becomes globally accessible, although presumably * no one will look at it until hdev is unlocked. */ status = 0; mutex_lock(&usb_port_peer_mutex); /* We mustn't add new devices if the parent hub has * been disconnected; we would race with the * recursively_mark_NOTATTACHED() routine. */ spin_lock_irq(&device_state_lock); if (hdev->state == USB_STATE_NOTATTACHED) status = -ENOTCONN; else port_dev->child = udev; spin_unlock_irq(&device_state_lock); mutex_unlock(&usb_port_peer_mutex); /* Run it through the hoops (find a driver, etc) */ if (!status) { status = usb_new_device(udev); if (status) { mutex_lock(&usb_port_peer_mutex); spin_lock_irq(&device_state_lock); port_dev->child = NULL; spin_unlock_irq(&device_state_lock); mutex_unlock(&usb_port_peer_mutex); } else { if (hcd->usb_phy && !hdev->parent) usb_phy_notify_connect(hcd->usb_phy, udev->speed); } } if (status) goto loop_disable; status = hub_power_remaining(hub); if (status) dev_dbg(hub->intfdev, "%dmA power budget left\n", status); return; loop_disable: hub_port_disable(hub, port1, 1); loop: usb_ep0_reinit(udev); release_devnum(udev); hub_free_dev(udev); if (retry_locked) { mutex_unlock(hcd->address0_mutex); usb_unlock_port(port_dev); } usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; /* When halfway through our retry count, power-cycle the port */ if (i == (PORT_INIT_TRIES - 1) / 2) { dev_info(&port_dev->dev, "attempt power cycle\n"); usb_hub_set_port_power(hdev, hub, port1, false); msleep(2 * hub_power_on_good_delay(hub)); usb_hub_set_port_power(hdev, hub, port1, true); msleep(hub_power_on_good_delay(hub)); } } if (hub->hdev->parent || !hcd->driver->port_handed_over || !(hcd->driver->port_handed_over)(hcd, port1)) { if (status != -ENOTCONN && status != -ENODEV) dev_err(&port_dev->dev, "unable to enumerate USB device\n"); } done: hub_port_disable(hub, port1, 1); if (hcd->driver->relinquish_port && !hub->hdev->parent) { if (status != -ENOTCONN && status != -ENODEV) hcd->driver->relinquish_port(hcd, port1); } } /* Handle physical or logical connection change events. * This routine is called when: * a port connection-change occurs; * a port enable-change occurs (often caused by EMI); * usb_reset_and_verify_device() encounters changed descriptors (as from * a firmware download) * caller already locked the hub */ static void hub_port_connect_change(struct usb_hub *hub, int port1, u16 portstatus, u16 portchange) __must_hold(&port_dev->status_lock) { struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; struct usb_device_descriptor *descr; int status = -ENODEV; dev_dbg(&port_dev->dev, "status %04x, change %04x, %s\n", portstatus, portchange, portspeed(hub, portstatus)); if (hub->has_indicators) { set_port_led(hub, port1, HUB_LED_AUTO); hub->indicator[port1-1] = INDICATOR_AUTO; } #ifdef CONFIG_USB_OTG /* during HNP, don't repeat the debounce */ if (hub->hdev->bus->is_b_host) portchange &= ~(USB_PORT_STAT_C_CONNECTION | USB_PORT_STAT_C_ENABLE); #endif /* Try to resuscitate an existing device */ if ((portstatus & USB_PORT_STAT_CONNECTION) && udev && udev->state != USB_STATE_NOTATTACHED) { if (portstatus & USB_PORT_STAT_ENABLE) { /* * USB-3 connections are initialized automatically by * the hostcontroller hardware. Therefore check for * changed device descriptors before resuscitating the * device. */ descr = usb_get_device_descriptor(udev); if (IS_ERR(descr)) { dev_dbg(&udev->dev, "can't read device descriptor %ld\n", PTR_ERR(descr)); } else { if (descriptors_changed(udev, descr, udev->bos)) { dev_dbg(&udev->dev, "device descriptor has changed\n"); } else { status = 0; /* Nothing to do */ } kfree(descr); } #ifdef CONFIG_PM } else if (udev->state == USB_STATE_SUSPENDED && udev->persist_enabled) { /* For a suspended device, treat this as a * remote wakeup event. */ usb_unlock_port(port_dev); status = usb_remote_wakeup(udev); usb_lock_port(port_dev); #endif } else { /* Don't resuscitate */; } } clear_bit(port1, hub->change_bits); /* successfully revalidated the connection */ if (status == 0) return; usb_unlock_port(port_dev); hub_port_connect(hub, port1, portstatus, portchange); usb_lock_port(port_dev); } /* Handle notifying userspace about hub over-current events */ static void port_over_current_notify(struct usb_port *port_dev) { char *envp[3] = { NULL, NULL, NULL }; struct device *hub_dev; char *port_dev_path; sysfs_notify(&port_dev->dev.kobj, NULL, "over_current_count"); hub_dev = port_dev->dev.parent; if (!hub_dev) return; port_dev_path = kobject_get_path(&port_dev->dev.kobj, GFP_KERNEL); if (!port_dev_path) return; envp[0] = kasprintf(GFP_KERNEL, "OVER_CURRENT_PORT=%s", port_dev_path); if (!envp[0]) goto exit; envp[1] = kasprintf(GFP_KERNEL, "OVER_CURRENT_COUNT=%u", port_dev->over_current_count); if (!envp[1]) goto exit; kobject_uevent_env(&hub_dev->kobj, KOBJ_CHANGE, envp); exit: kfree(envp[1]); kfree(envp[0]); kfree(port_dev_path); } static void port_event(struct usb_hub *hub, int port1) __must_hold(&port_dev->status_lock) { int connect_change; struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; struct usb_device *hdev = hub->hdev; u16 portstatus, portchange; int i = 0; connect_change = test_bit(port1, hub->change_bits); clear_bit(port1, hub->event_bits); clear_bit(port1, hub->wakeup_bits); if (usb_hub_port_status(hub, port1, &portstatus, &portchange) < 0) return; if (portchange & USB_PORT_STAT_C_CONNECTION) { usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_CONNECTION); connect_change = 1; } if (portchange & USB_PORT_STAT_C_ENABLE) { if (!connect_change) dev_dbg(&port_dev->dev, "enable change, status %08x\n", portstatus); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_ENABLE); /* * EM interference sometimes causes badly shielded USB devices * to be shutdown by the hub, this hack enables them again. * Works at least with mouse driver. */ if (!(portstatus & USB_PORT_STAT_ENABLE) && !connect_change && udev) { dev_err(&port_dev->dev, "disabled by hub (EMI?), re-enabling...\n"); connect_change = 1; } } if (portchange & USB_PORT_STAT_C_OVERCURRENT) { u16 status = 0, unused; port_dev->over_current_count++; port_over_current_notify(port_dev); dev_dbg(&port_dev->dev, "over-current change #%u\n", port_dev->over_current_count); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_OVER_CURRENT); msleep(100); /* Cool down */ hub_power_on(hub, true); usb_hub_port_status(hub, port1, &status, &unused); if (status & USB_PORT_STAT_OVERCURRENT) dev_err(&port_dev->dev, "over-current condition\n"); } if (portchange & USB_PORT_STAT_C_RESET) { dev_dbg(&port_dev->dev, "reset change\n"); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_RESET); } if ((portchange & USB_PORT_STAT_C_BH_RESET) && hub_is_superspeed(hdev)) { dev_dbg(&port_dev->dev, "warm reset change\n"); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_BH_PORT_RESET); } if (portchange & USB_PORT_STAT_C_LINK_STATE) { dev_dbg(&port_dev->dev, "link state change\n"); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_PORT_LINK_STATE); } if (portchange & USB_PORT_STAT_C_CONFIG_ERROR) { dev_warn(&port_dev->dev, "config error\n"); usb_clear_port_feature(hdev, port1, USB_PORT_FEAT_C_PORT_CONFIG_ERROR); } /* skip port actions that require the port to be powered on */ if (!pm_runtime_active(&port_dev->dev)) return; /* skip port actions if ignore_event and early_stop are true */ if (port_dev->ignore_event && port_dev->early_stop) return; if (hub_handle_remote_wakeup(hub, port1, portstatus, portchange)) connect_change = 1; /* * Avoid trying to recover a USB3 SS.Inactive port with a warm reset if * the device was disconnected. A 12ms disconnect detect timer in * SS.Inactive state transitions the port to RxDetect automatically. * SS.Inactive link error state is common during device disconnect. */ while (hub_port_warm_reset_required(hub, port1, portstatus)) { if ((i++ < DETECT_DISCONNECT_TRIES) && udev) { u16 unused; msleep(20); usb_hub_port_status(hub, port1, &portstatus, &unused); dev_dbg(&port_dev->dev, "Wait for inactive link disconnect detect\n"); continue; } else if (!udev || !(portstatus & USB_PORT_STAT_CONNECTION) || udev->state == USB_STATE_NOTATTACHED) { dev_dbg(&port_dev->dev, "do warm reset, port only\n"); if (hub_port_reset(hub, port1, NULL, HUB_BH_RESET_TIME, true) < 0) hub_port_disable(hub, port1, 1); } else { dev_dbg(&port_dev->dev, "do warm reset, full device\n"); usb_unlock_port(port_dev); usb_lock_device(udev); usb_reset_device(udev); usb_unlock_device(udev); usb_lock_port(port_dev); connect_change = 0; } break; } if (connect_change) hub_port_connect_change(hub, port1, portstatus, portchange); } static void hub_event(struct work_struct *work) { struct usb_device *hdev; struct usb_interface *intf; struct usb_hub *hub; struct device *hub_dev; u16 hubstatus; u16 hubchange; int i, ret; hub = container_of(work, struct usb_hub, events); hdev = hub->hdev; hub_dev = hub->intfdev; intf = to_usb_interface(hub_dev); kcov_remote_start_usb((u64)hdev->bus->busnum); dev_dbg(hub_dev, "state %d ports %d chg %04x evt %04x\n", hdev->state, hdev->maxchild, /* NOTE: expects max 15 ports... */ (u16) hub->change_bits[0], (u16) hub->event_bits[0]); /* Lock the device, then check to see if we were * disconnected while waiting for the lock to succeed. */ usb_lock_device(hdev); if (unlikely(hub->disconnected)) goto out_hdev_lock; /* If the hub has died, clean up after it */ if (hdev->state == USB_STATE_NOTATTACHED) { hub->error = -ENODEV; hub_quiesce(hub, HUB_DISCONNECT); goto out_hdev_lock; } /* Autoresume */ ret = usb_autopm_get_interface(intf); if (ret) { dev_dbg(hub_dev, "Can't autoresume: %d\n", ret); goto out_hdev_lock; } /* If this is an inactive hub, do nothing */ if (hub->quiescing) goto out_autopm; if (hub->error) { dev_dbg(hub_dev, "resetting for error %d\n", hub->error); ret = usb_reset_device(hdev); if (ret) { dev_dbg(hub_dev, "error resetting hub: %d\n", ret); goto out_autopm; } hub->nerrors = 0; hub->error = 0; } /* deal with port status changes */ for (i = 1; i <= hdev->maxchild; i++) { struct usb_port *port_dev = hub->ports[i - 1]; if (test_bit(i, hub->event_bits) || test_bit(i, hub->change_bits) || test_bit(i, hub->wakeup_bits)) { /* * The get_noresume and barrier ensure that if * the port was in the process of resuming, we * flush that work and keep the port active for * the duration of the port_event(). However, * if the port is runtime pm suspended * (powered-off), we leave it in that state, run * an abbreviated port_event(), and move on. */ pm_runtime_get_noresume(&port_dev->dev); pm_runtime_barrier(&port_dev->dev); usb_lock_port(port_dev); port_event(hub, i); usb_unlock_port(port_dev); pm_runtime_put_sync(&port_dev->dev); } } /* deal with hub status changes */ if (test_and_clear_bit(0, hub->event_bits) == 0) ; /* do nothing */ else if (hub_hub_status(hub, &hubstatus, &hubchange) < 0) dev_err(hub_dev, "get_hub_status failed\n"); else { if (hubchange & HUB_CHANGE_LOCAL_POWER) { dev_dbg(hub_dev, "power change\n"); clear_hub_feature(hdev, C_HUB_LOCAL_POWER); if (hubstatus & HUB_STATUS_LOCAL_POWER) /* FIXME: Is this always true? */ hub->limited_power = 1; else hub->limited_power = 0; } if (hubchange & HUB_CHANGE_OVERCURRENT) { u16 status = 0; u16 unused; dev_dbg(hub_dev, "over-current change\n"); clear_hub_feature(hdev, C_HUB_OVER_CURRENT); msleep(500); /* Cool down */ hub_power_on(hub, true); hub_hub_status(hub, &status, &unused); if (status & HUB_STATUS_OVERCURRENT) dev_err(hub_dev, "over-current condition\n"); } } out_autopm: /* Balance the usb_autopm_get_interface() above */ usb_autopm_put_interface_no_suspend(intf); out_hdev_lock: usb_unlock_device(hdev); /* Balance the stuff in kick_hub_wq() and allow autosuspend */ usb_autopm_put_interface(intf); hub_put(hub); kcov_remote_stop(); } static const struct usb_device_id hub_id_table[] = { { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_SMSC, .idProduct = USB_PRODUCT_USB5534B, .bInterfaceClass = USB_CLASS_HUB, .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT, .idVendor = USB_VENDOR_CYPRESS, .idProduct = USB_PRODUCT_CY7C65632, .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_INT_CLASS, .idVendor = USB_VENDOR_GENESYS_LOGIC, .bInterfaceClass = USB_CLASS_HUB, .driver_info = HUB_QUIRK_CHECK_PORT_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT, .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, .idProduct = USB_PRODUCT_TUSB8041_USB2, .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT, .idVendor = USB_VENDOR_TEXAS_INSTRUMENTS, .idProduct = USB_PRODUCT_TUSB8041_USB3, .driver_info = HUB_QUIRK_DISABLE_AUTOSUSPEND}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT, .idVendor = USB_VENDOR_MICROCHIP, .idProduct = USB_PRODUCT_USB4913, .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT, .idVendor = USB_VENDOR_MICROCHIP, .idProduct = USB_PRODUCT_USB4914, .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, { .match_flags = USB_DEVICE_ID_MATCH_VENDOR | USB_DEVICE_ID_MATCH_PRODUCT, .idVendor = USB_VENDOR_MICROCHIP, .idProduct = USB_PRODUCT_USB4915, .driver_info = HUB_QUIRK_REDUCE_FRAME_INTR_BINTERVAL}, { .match_flags = USB_DEVICE_ID_MATCH_DEV_CLASS, .bDeviceClass = USB_CLASS_HUB}, { .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS, .bInterfaceClass = USB_CLASS_HUB}, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, hub_id_table); static struct usb_driver hub_driver = { .name = "hub", .probe = hub_probe, .disconnect = hub_disconnect, .suspend = hub_suspend, .resume = hub_resume, .reset_resume = hub_reset_resume, .pre_reset = hub_pre_reset, .post_reset = hub_post_reset, .unlocked_ioctl = hub_ioctl, .id_table = hub_id_table, .supports_autosuspend = 1, }; int usb_hub_init(void) { if (usb_register(&hub_driver) < 0) { printk(KERN_ERR "%s: can't register hub driver\n", usbcore_name); return -1; } /* * The workqueue needs to be freezable to avoid interfering with * USB-PERSIST port handover. Otherwise it might see that a full-speed * device was gone before the EHCI controller had handed its port * over to the companion full-speed controller. */ hub_wq = alloc_workqueue("usb_hub_wq", WQ_FREEZABLE, 0); if (hub_wq) return 0; /* Fall through if kernel_thread failed */ usb_deregister(&hub_driver); pr_err("%s: can't allocate workqueue for usb hub\n", usbcore_name); return -1; } void usb_hub_cleanup(void) { destroy_workqueue(hub_wq); /* * Hub resources are freed for us by usb_deregister. It calls * usb_driver_purge on every device which in turn calls that * devices disconnect function if it is using this driver. * The hub_disconnect function takes care of releasing the * individual hub resources. -greg */ usb_deregister(&hub_driver); } /* usb_hub_cleanup() */ /** * hub_hc_release_resources - clear resources used by host controller * @udev: pointer to device being released * * Context: task context, might sleep * * Function releases the host controller resources in correct order before * making any operation on resuming usb device. The host controller resources * allocated for devices in tree should be released starting from the last * usb device in tree toward the root hub. This function is used only during * resuming device when usb device require reinitialization – that is, when * flag udev->reset_resume is set. * * This call is synchronous, and may not be used in an interrupt context. */ static void hub_hc_release_resources(struct usb_device *udev) { struct usb_hub *hub = usb_hub_to_struct_hub(udev); struct usb_hcd *hcd = bus_to_hcd(udev->bus); int i; /* Release up resources for all children before this device */ for (i = 0; i < udev->maxchild; i++) if (hub->ports[i]->child) hub_hc_release_resources(hub->ports[i]->child); if (hcd->driver->reset_device) hcd->driver->reset_device(hcd, udev); } /** * usb_reset_and_verify_device - perform a USB port reset to reinitialize a device * @udev: device to reset (not in SUSPENDED or NOTATTACHED state) * * WARNING - don't use this routine to reset a composite device * (one with multiple interfaces owned by separate drivers)! * Use usb_reset_device() instead. * * Do a port reset, reassign the device's address, and establish its * former operating configuration. If the reset fails, or the device's * descriptors change from their values before the reset, or the original * configuration and altsettings cannot be restored, a flag will be set * telling hub_wq to pretend the device has been disconnected and then * re-connected. All drivers will be unbound, and the device will be * re-enumerated and probed all over again. * * Return: 0 if the reset succeeded, -ENODEV if the device has been * flagged for logical disconnection, or some other negative error code * if the reset wasn't even attempted. * * Note: * The caller must own the device lock and the port lock, the latter is * taken by usb_reset_device(). For example, it's safe to use * usb_reset_device() from a driver probe() routine after downloading * new firmware. For calls that might not occur during probe(), drivers * should lock the device using usb_lock_device_for_reset(). * * Locking exception: This routine may also be called from within an * autoresume handler. Such usage won't conflict with other tasks * holding the device lock because these tasks should always call * usb_autopm_resume_device(), thereby preventing any unwanted * autoresume. The autoresume handler is expected to have already * acquired the port lock before calling this routine. */ static int usb_reset_and_verify_device(struct usb_device *udev) { struct usb_device *parent_hdev = udev->parent; struct usb_hub *parent_hub; struct usb_hcd *hcd = bus_to_hcd(udev->bus); struct usb_device_descriptor descriptor; struct usb_host_bos *bos; int i, j, ret = 0; int port1 = udev->portnum; if (udev->state == USB_STATE_NOTATTACHED || udev->state == USB_STATE_SUSPENDED) { dev_dbg(&udev->dev, "device reset not allowed in state %d\n", udev->state); return -EINVAL; } if (!parent_hdev) return -EISDIR; parent_hub = usb_hub_to_struct_hub(parent_hdev); /* Disable USB2 hardware LPM. * It will be re-enabled by the enumeration process. */ usb_disable_usb2_hardware_lpm(udev); bos = udev->bos; udev->bos = NULL; if (udev->reset_resume) hub_hc_release_resources(udev); mutex_lock(hcd->address0_mutex); for (i = 0; i < PORT_INIT_TRIES; ++i) { if (hub_port_stop_enumerate(parent_hub, port1, i)) { ret = -ENODEV; break; } /* ep0 maxpacket size may change; let the HCD know about it. * Other endpoints will be handled by re-enumeration. */ usb_ep0_reinit(udev); ret = hub_port_init(parent_hub, udev, port1, i, &descriptor); if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV) break; } mutex_unlock(hcd->address0_mutex); if (ret < 0) goto re_enumerate; /* Device might have changed firmware (DFU or similar) */ if (descriptors_changed(udev, &descriptor, bos)) { dev_info(&udev->dev, "device firmware changed\n"); goto re_enumerate; } /* Restore the device's previous configuration */ if (!udev->actconfig) goto done; mutex_lock(hcd->bandwidth_mutex); ret = usb_hcd_alloc_bandwidth(udev, udev->actconfig, NULL, NULL); if (ret < 0) { dev_warn(&udev->dev, "Busted HC? Not enough HCD resources for " "old configuration.\n"); mutex_unlock(hcd->bandwidth_mutex); goto re_enumerate; } ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), USB_REQ_SET_CONFIGURATION, 0, udev->actconfig->desc.bConfigurationValue, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); if (ret < 0) { dev_err(&udev->dev, "can't restore configuration #%d (error=%d)\n", udev->actconfig->desc.bConfigurationValue, ret); mutex_unlock(hcd->bandwidth_mutex); goto re_enumerate; } mutex_unlock(hcd->bandwidth_mutex); usb_set_device_state(udev, USB_STATE_CONFIGURED); /* Put interfaces back into the same altsettings as before. * Don't bother to send the Set-Interface request for interfaces * that were already in altsetting 0; besides being unnecessary, * many devices can't handle it. Instead just reset the host-side * endpoint state. */ for (i = 0; i < udev->actconfig->desc.bNumInterfaces; i++) { struct usb_host_config *config = udev->actconfig; struct usb_interface *intf = config->interface[i]; struct usb_interface_descriptor *desc; desc = &intf->cur_altsetting->desc; if (desc->bAlternateSetting == 0) { usb_disable_interface(udev, intf, true); usb_enable_interface(udev, intf, true); ret = 0; } else { /* Let the bandwidth allocation function know that this * device has been reset, and it will have to use * alternate setting 0 as the current alternate setting. */ intf->resetting_device = 1; ret = usb_set_interface(udev, desc->bInterfaceNumber, desc->bAlternateSetting); intf->resetting_device = 0; } if (ret < 0) { dev_err(&udev->dev, "failed to restore interface %d " "altsetting %d (error=%d)\n", desc->bInterfaceNumber, desc->bAlternateSetting, ret); goto re_enumerate; } /* Resetting also frees any allocated streams */ for (j = 0; j < intf->cur_altsetting->desc.bNumEndpoints; j++) intf->cur_altsetting->endpoint[j].streams = 0; } done: /* Now that the alt settings are re-installed, enable LTM and LPM. */ usb_enable_usb2_hardware_lpm(udev); usb_unlocked_enable_lpm(udev); usb_enable_ltm(udev); usb_release_bos_descriptor(udev); udev->bos = bos; return 0; re_enumerate: usb_release_bos_descriptor(udev); udev->bos = bos; hub_port_logical_disconnect(parent_hub, port1); return -ENODEV; } /** * usb_reset_device - warn interface drivers and perform a USB port reset * @udev: device to reset (not in NOTATTACHED state) * * Warns all drivers bound to registered interfaces (using their pre_reset * method), performs the port reset, and then lets the drivers know that * the reset is over (using their post_reset method). * * Return: The same as for usb_reset_and_verify_device(). * However, if a reset is already in progress (for instance, if a * driver doesn't have pre_reset() or post_reset() callbacks, and while * being unbound or re-bound during the ongoing reset its disconnect() * or probe() routine tries to perform a second, nested reset), the * routine returns -EINPROGRESS. * * Note: * The caller must own the device lock. For example, it's safe to use * this from a driver probe() routine after downloading new firmware. * For calls that might not occur during probe(), drivers should lock * the device using usb_lock_device_for_reset(). * * If an interface is currently being probed or disconnected, we assume * its driver knows how to handle resets. For all other interfaces, * if the driver doesn't have pre_reset and post_reset methods then * we attempt to unbind it and rebind afterward. */ int usb_reset_device(struct usb_device *udev) { int ret; int i; unsigned int noio_flag; struct usb_port *port_dev; struct usb_host_config *config = udev->actconfig; struct usb_hub *hub = usb_hub_to_struct_hub(udev->parent); if (udev->state == USB_STATE_NOTATTACHED) { dev_dbg(&udev->dev, "device reset not allowed in state %d\n", udev->state); return -EINVAL; } if (!udev->parent) { /* this requires hcd-specific logic; see ohci_restart() */ dev_dbg(&udev->dev, "%s for root hub!\n", __func__); return -EISDIR; } if (udev->reset_in_progress) return -EINPROGRESS; udev->reset_in_progress = 1; port_dev = hub->ports[udev->portnum - 1]; /* * Don't allocate memory with GFP_KERNEL in current * context to avoid possible deadlock if usb mass * storage interface or usbnet interface(iSCSI case) * is included in current configuration. The easist * approach is to do it for every device reset, * because the device 'memalloc_noio' flag may have * not been set before reseting the usb device. */ noio_flag = memalloc_noio_save(); /* Prevent autosuspend during the reset */ usb_autoresume_device(udev); if (config) { for (i = 0; i < config->desc.bNumInterfaces; ++i) { struct usb_interface *cintf = config->interface[i]; struct usb_driver *drv; int unbind = 0; if (cintf->dev.driver) { drv = to_usb_driver(cintf->dev.driver); if (drv->pre_reset && drv->post_reset) unbind = (drv->pre_reset)(cintf); else if (cintf->condition == USB_INTERFACE_BOUND) unbind = 1; if (unbind) usb_forced_unbind_intf(cintf); } } } usb_lock_port(port_dev); ret = usb_reset_and_verify_device(udev); usb_unlock_port(port_dev); if (config) { for (i = config->desc.bNumInterfaces - 1; i >= 0; --i) { struct usb_interface *cintf = config->interface[i]; struct usb_driver *drv; int rebind = cintf->needs_binding; if (!rebind && cintf->dev.driver) { drv = to_usb_driver(cintf->dev.driver); if (drv->post_reset) rebind = (drv->post_reset)(cintf); else if (cintf->condition == USB_INTERFACE_BOUND) rebind = 1; if (rebind) cintf->needs_binding = 1; } } /* If the reset failed, hub_wq will unbind drivers later */ if (ret == 0) usb_unbind_and_rebind_marked_interfaces(udev); } usb_autosuspend_device(udev); memalloc_noio_restore(noio_flag); udev->reset_in_progress = 0; return ret; } EXPORT_SYMBOL_GPL(usb_reset_device); /** * usb_queue_reset_device - Reset a USB device from an atomic context * @iface: USB interface belonging to the device to reset * * This function can be used to reset a USB device from an atomic * context, where usb_reset_device() won't work (as it blocks). * * Doing a reset via this method is functionally equivalent to calling * usb_reset_device(), except for the fact that it is delayed to a * workqueue. This means that any drivers bound to other interfaces * might be unbound, as well as users from usbfs in user space. * * Corner cases: * * - Scheduling two resets at the same time from two different drivers * attached to two different interfaces of the same device is * possible; depending on how the driver attached to each interface * handles ->pre_reset(), the second reset might happen or not. * * - If the reset is delayed so long that the interface is unbound from * its driver, the reset will be skipped. * * - This function can be called during .probe(). It can also be called * during .disconnect(), but doing so is pointless because the reset * will not occur. If you really want to reset the device during * .disconnect(), call usb_reset_device() directly -- but watch out * for nested unbinding issues! */ void usb_queue_reset_device(struct usb_interface *iface) { if (schedule_work(&iface->reset_ws)) usb_get_intf(iface); } EXPORT_SYMBOL_GPL(usb_queue_reset_device); /** * usb_hub_find_child - Get the pointer of child device * attached to the port which is specified by @port1. * @hdev: USB device belonging to the usb hub * @port1: port num to indicate which port the child device * is attached to. * * USB drivers call this function to get hub's child device * pointer. * * Return: %NULL if input param is invalid and * child's usb_device pointer if non-NULL. */ struct usb_device *usb_hub_find_child(struct usb_device *hdev, int port1) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); if (port1 < 1 || port1 > hdev->maxchild) return NULL; return hub->ports[port1 - 1]->child; } EXPORT_SYMBOL_GPL(usb_hub_find_child); void usb_hub_adjust_deviceremovable(struct usb_device *hdev, struct usb_hub_descriptor *desc) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); enum usb_port_connect_type connect_type; int i; if (!hub) return; if (!hub_is_superspeed(hdev)) { for (i = 1; i <= hdev->maxchild; i++) { struct usb_port *port_dev = hub->ports[i - 1]; connect_type = port_dev->connect_type; if (connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) { u8 mask = 1 << (i%8); if (!(desc->u.hs.DeviceRemovable[i/8] & mask)) { dev_dbg(&port_dev->dev, "DeviceRemovable is changed to 1 according to platform information.\n"); desc->u.hs.DeviceRemovable[i/8] |= mask; } } } } else { u16 port_removable = le16_to_cpu(desc->u.ss.DeviceRemovable); for (i = 1; i <= hdev->maxchild; i++) { struct usb_port *port_dev = hub->ports[i - 1]; connect_type = port_dev->connect_type; if (connect_type == USB_PORT_CONNECT_TYPE_HARD_WIRED) { u16 mask = 1 << i; if (!(port_removable & mask)) { dev_dbg(&port_dev->dev, "DeviceRemovable is changed to 1 according to platform information.\n"); port_removable |= mask; } } } desc->u.ss.DeviceRemovable = cpu_to_le16(port_removable); } } #ifdef CONFIG_ACPI /** * usb_get_hub_port_acpi_handle - Get the usb port's acpi handle * @hdev: USB device belonging to the usb hub * @port1: port num of the port * * Return: Port's acpi handle if successful, %NULL if params are * invalid. */ acpi_handle usb_get_hub_port_acpi_handle(struct usb_device *hdev, int port1) { struct usb_hub *hub = usb_hub_to_struct_hub(hdev); if (!hub) return NULL; return ACPI_HANDLE(&hub->ports[port1 - 1]->dev); } #endif
8 8 1 8 8 8 8 8 7 7 2 2 7 7 7 8 8 8 3 8 8 8 8 8 8 8 2 2 2 2 1 2 2 2 1 13 3 1 12 1 1 11 3 11 2 1 1 9 10 6 6 6 6 2 1 14 14 1 12 13 12 12 6 5 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" /* LINKMODES_GET */ struct linkmodes_req_info { struct ethnl_req_info base; }; struct linkmodes_reply_data { struct ethnl_reply_data base; struct ethtool_link_ksettings ksettings; struct ethtool_link_settings *lsettings; bool peer_empty; }; #define LINKMODES_REPDATA(__reply_base) \ container_of(__reply_base, struct linkmodes_reply_data, base) const struct nla_policy ethnl_linkmodes_get_policy[] = { [ETHTOOL_A_LINKMODES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int linkmodes_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; data->lsettings = &data->ksettings.base; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = __ethtool_get_link_ksettings(dev, &data->ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); goto out; } if (!dev->ethtool_ops->cap_link_lanes_supported) data->ksettings.lanes = 0; data->peer_empty = bitmap_empty(data->ksettings.link_modes.lp_advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); out: ethnl_ops_complete(dev); return ret; } static int linkmodes_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int len, ret; len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */ + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */ + nla_total_size(sizeof(u32)) /* LINKMODES_LANES */ + nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */ + nla_total_size(sizeof(u8)) /* LINKMODES_RATE_MATCHING */ + 0; ret = ethnl_bitset_size(ksettings->link_modes.advertising, ksettings->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; len += ret; if (!data->peer_empty) { ret = ethnl_bitset_size(ksettings->link_modes.lp_advertising, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return ret; len += ret; } if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED) len += nla_total_size(sizeof(u8)); if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED) len += nla_total_size(sizeof(u8)); return len; } static int linkmodes_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base); const struct ethtool_link_ksettings *ksettings = &data->ksettings; const struct ethtool_link_settings *lsettings = &ksettings->base; bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int ret; if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_AUTONEG, lsettings->autoneg)) return -EMSGSIZE; ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_OURS, ksettings->link_modes.advertising, ksettings->link_modes.supported, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return -EMSGSIZE; if (!data->peer_empty) { ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_PEER, ksettings->link_modes.lp_advertising, NULL, __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names, compact); if (ret < 0) return -EMSGSIZE; } if (nla_put_u32(skb, ETHTOOL_A_LINKMODES_SPEED, lsettings->speed) || nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex)) return -EMSGSIZE; if (ksettings->lanes && nla_put_u32(skb, ETHTOOL_A_LINKMODES_LANES, ksettings->lanes)) return -EMSGSIZE; if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED && nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG, lsettings->master_slave_cfg)) return -EMSGSIZE; if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED && nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE, lsettings->master_slave_state)) return -EMSGSIZE; if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_RATE_MATCHING, lsettings->rate_matching)) return -EMSGSIZE; return 0; } /* LINKMODES_SET */ const struct nla_policy ethnl_linkmodes_set_policy[] = { [ETHTOOL_A_LINKMODES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_LINKMODES_AUTONEG] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_OURS] = { .type = NLA_NESTED }, [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 }, [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG] = { .type = NLA_U8 }, [ETHTOOL_A_LINKMODES_LANES] = NLA_POLICY_RANGE(NLA_U32, 1, 8), }; /* Set advertised link modes to all supported modes matching requested speed, * lanes and duplex values. Called when autonegotiation is on, speed, lanes or * duplex is requested but no link mode change. This is done in userspace with * ioctl() interface, move it into kernel for netlink. * Returns true if advertised modes bitmap was modified. */ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings, bool req_speed, bool req_lanes, bool req_duplex) { unsigned long *advertising = ksettings->link_modes.advertising; unsigned long *supported = ksettings->link_modes.supported; DECLARE_BITMAP(old_adv, __ETHTOOL_LINK_MODE_MASK_NBITS); unsigned int i; bitmap_copy(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) { const struct link_mode_info *info = &link_mode_params[i]; if (info->speed == SPEED_UNKNOWN) continue; if (test_bit(i, supported) && (!req_speed || info->speed == ksettings->base.speed) && (!req_lanes || info->lanes == ksettings->lanes) && (!req_duplex || info->duplex == ksettings->base.duplex)) set_bit(i, advertising); else clear_bit(i, advertising); } return !bitmap_equal(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS); } static bool ethnl_validate_master_slave_cfg(u8 cfg) { switch (cfg) { case MASTER_SLAVE_CFG_MASTER_PREFERRED: case MASTER_SLAVE_CFG_SLAVE_PREFERRED: case MASTER_SLAVE_CFG_MASTER_FORCE: case MASTER_SLAVE_CFG_SLAVE_FORCE: return true; } return false; } static int ethnl_check_linkmodes(struct genl_info *info, struct nlattr **tb) { const struct nlattr *master_slave_cfg, *lanes_cfg; master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; if (master_slave_cfg && !ethnl_validate_master_slave_cfg(nla_get_u8(master_slave_cfg))) { NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, "master/slave value is invalid"); return -EOPNOTSUPP; } lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES]; if (lanes_cfg && !is_power_of_2(nla_get_u32(lanes_cfg))) { NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg, "lanes value is invalid"); return -EINVAL; } return 0; } static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb, struct ethtool_link_ksettings *ksettings, bool *mod, const struct net_device *dev) { struct ethtool_link_settings *lsettings = &ksettings->base; bool req_speed, req_lanes, req_duplex; const struct nlattr *master_slave_cfg, *lanes_cfg; int ret; master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]; if (master_slave_cfg) { if (lsettings->master_slave_cfg == MASTER_SLAVE_CFG_UNSUPPORTED) { NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg, "master/slave configuration not supported by device"); return -EOPNOTSUPP; } } *mod = false; req_speed = tb[ETHTOOL_A_LINKMODES_SPEED]; req_lanes = tb[ETHTOOL_A_LINKMODES_LANES]; req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX]; ethnl_update_u8(&lsettings->autoneg, tb[ETHTOOL_A_LINKMODES_AUTONEG], mod); lanes_cfg = tb[ETHTOOL_A_LINKMODES_LANES]; if (lanes_cfg) { /* If autoneg is off and lanes parameter is not supported by the * driver, return an error. */ if (!lsettings->autoneg && !dev->ethtool_ops->cap_link_lanes_supported) { NL_SET_ERR_MSG_ATTR(info->extack, lanes_cfg, "lanes configuration not supported by device"); return -EOPNOTSUPP; } } else if (!lsettings->autoneg && ksettings->lanes) { /* If autoneg is off and lanes parameter is not passed from user but * it was defined previously then set the lanes parameter to 0. */ ksettings->lanes = 0; *mod = true; } ret = ethnl_update_bitset(ksettings->link_modes.advertising, __ETHTOOL_LINK_MODE_MASK_NBITS, tb[ETHTOOL_A_LINKMODES_OURS], link_mode_names, info->extack, mod); if (ret < 0) return ret; ethnl_update_u32(&lsettings->speed, tb[ETHTOOL_A_LINKMODES_SPEED], mod); ethnl_update_u32(&ksettings->lanes, lanes_cfg, mod); ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX], mod); ethnl_update_u8(&lsettings->master_slave_cfg, master_slave_cfg, mod); if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg && (req_speed || req_lanes || req_duplex) && ethnl_auto_linkmodes(ksettings, req_speed, req_lanes, req_duplex)) *mod = true; return 0; } static int ethnl_set_linkmodes_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; int ret; ret = ethnl_check_linkmodes(info, info->attrs); if (ret < 0) return ret; if (!ops->get_link_ksettings || !ops->set_link_ksettings) return -EOPNOTSUPP; return 1; } static int ethnl_set_linkmodes(struct ethnl_req_info *req_info, struct genl_info *info) { struct ethtool_link_ksettings ksettings = {}; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; bool mod = false; int ret; ret = __ethtool_get_link_ksettings(dev, &ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "failed to retrieve link settings"); return ret; } ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod, dev); if (ret < 0) return ret; if (!mod) return 0; ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings); if (ret < 0) { GENL_SET_ERR_MSG(info, "link settings update failed"); return ret; } return 1; } const struct ethnl_request_ops ethnl_linkmodes_request_ops = { .request_cmd = ETHTOOL_MSG_LINKMODES_GET, .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY, .hdr_attr = ETHTOOL_A_LINKMODES_HEADER, .req_info_size = sizeof(struct linkmodes_req_info), .reply_data_size = sizeof(struct linkmodes_reply_data), .prepare_data = linkmodes_prepare_data, .reply_size = linkmodes_reply_size, .fill_reply = linkmodes_fill_reply, .set_validate = ethnl_set_linkmodes_validate, .set = ethnl_set_linkmodes, .set_ntf_cmd = ETHTOOL_MSG_LINKMODES_NTF, };
39 39 39 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* SMBUS message transfer tracepoints * * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #undef TRACE_SYSTEM #define TRACE_SYSTEM smbus #if !defined(_TRACE_SMBUS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SMBUS_H #include <linux/i2c.h> #include <linux/tracepoint.h> /* * drivers/i2c/i2c-core-smbus.c */ /* * i2c_smbus_xfer() write data or procedure call request */ TRACE_EVENT_CONDITION(smbus_write, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, const union i2c_smbus_data *data), TP_ARGS(adap, addr, flags, read_write, command, protocol, data), TP_CONDITION(read_write == I2C_SMBUS_WRITE || protocol == I2C_SMBUS_PROC_CALL || protocol == I2C_SMBUS_BLOCK_PROC_CALL), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u8, command ) __field(__u8, len ) __field(__u32, protocol ) __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->command = command; __entry->protocol = protocol; switch (protocol) { case I2C_SMBUS_BYTE_DATA: __entry->len = 1; goto copy; case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_PROC_CALL: __entry->len = 2; goto copy; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: case I2C_SMBUS_I2C_BLOCK_DATA: __entry->len = data->block[0] + 1; copy: memcpy(__entry->buf, data->block, __entry->len); break; case I2C_SMBUS_QUICK: case I2C_SMBUS_BYTE: case I2C_SMBUS_I2C_BLOCK_BROKEN: default: __entry->len = 0; } ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), __entry->len, __entry->len, __entry->buf )); /* * i2c_smbus_xfer() read data request */ TRACE_EVENT_CONDITION(smbus_read, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol), TP_ARGS(adap, addr, flags, read_write, command, protocol), TP_CONDITION(!(read_write == I2C_SMBUS_WRITE || protocol == I2C_SMBUS_PROC_CALL || protocol == I2C_SMBUS_BLOCK_PROC_CALL)), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, flags ) __field(__u16, addr ) __field(__u8, command ) __field(__u32, protocol ) __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->command = command; __entry->protocol = protocol; ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }) )); /* * i2c_smbus_xfer() read data or procedure call reply */ TRACE_EVENT_CONDITION(smbus_reply, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, const union i2c_smbus_data *data, int res), TP_ARGS(adap, addr, flags, read_write, command, protocol, data, res), TP_CONDITION(res >= 0 && read_write == I2C_SMBUS_READ), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u8, command ) __field(__u8, len ) __field(__u32, protocol ) __array(__u8, buf, I2C_SMBUS_BLOCK_MAX + 2) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->command = command; __entry->protocol = protocol; switch (protocol) { case I2C_SMBUS_BYTE: case I2C_SMBUS_BYTE_DATA: __entry->len = 1; goto copy; case I2C_SMBUS_WORD_DATA: case I2C_SMBUS_PROC_CALL: __entry->len = 2; goto copy; case I2C_SMBUS_BLOCK_DATA: case I2C_SMBUS_BLOCK_PROC_CALL: case I2C_SMBUS_I2C_BLOCK_DATA: __entry->len = data->block[0] + 1; copy: memcpy(__entry->buf, data->block, __entry->len); break; case I2C_SMBUS_QUICK: case I2C_SMBUS_I2C_BLOCK_BROKEN: default: __entry->len = 0; } ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s l=%u [%*phD]", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), __entry->len, __entry->len, __entry->buf )); /* * i2c_smbus_xfer() result */ TRACE_EVENT(smbus_result, TP_PROTO(const struct i2c_adapter *adap, u16 addr, unsigned short flags, char read_write, u8 command, int protocol, int res), TP_ARGS(adap, addr, flags, read_write, command, protocol, res), TP_STRUCT__entry( __field(int, adapter_nr ) __field(__u16, addr ) __field(__u16, flags ) __field(__u8, read_write ) __field(__u8, command ) __field(__s16, res ) __field(__u32, protocol ) ), TP_fast_assign( __entry->adapter_nr = adap->nr; __entry->addr = addr; __entry->flags = flags; __entry->read_write = read_write; __entry->command = command; __entry->protocol = protocol; __entry->res = res; ), TP_printk("i2c-%d a=%03x f=%04x c=%x %s %s res=%d", __entry->adapter_nr, __entry->addr, __entry->flags, __entry->command, __print_symbolic(__entry->protocol, { I2C_SMBUS_QUICK, "QUICK" }, { I2C_SMBUS_BYTE, "BYTE" }, { I2C_SMBUS_BYTE_DATA, "BYTE_DATA" }, { I2C_SMBUS_WORD_DATA, "WORD_DATA" }, { I2C_SMBUS_PROC_CALL, "PROC_CALL" }, { I2C_SMBUS_BLOCK_DATA, "BLOCK_DATA" }, { I2C_SMBUS_I2C_BLOCK_BROKEN, "I2C_BLOCK_BROKEN" }, { I2C_SMBUS_BLOCK_PROC_CALL, "BLOCK_PROC_CALL" }, { I2C_SMBUS_I2C_BLOCK_DATA, "I2C_BLOCK_DATA" }), __entry->read_write == I2C_SMBUS_WRITE ? "wr" : "rd", __entry->res )); #endif /* _TRACE_SMBUS_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
31 83 1314 1314 47 23 27 31 86 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 /* SPDX-License-Identifier: GPL-2.0 */ /* * NUMA memory policies for Linux. * Copyright 2003,2004 Andi Kleen SuSE Labs */ #ifndef _LINUX_MEMPOLICY_H #define _LINUX_MEMPOLICY_H 1 #include <linux/sched.h> #include <linux/mmzone.h> #include <linux/slab.h> #include <linux/rbtree.h> #include <linux/spinlock.h> #include <linux/nodemask.h> #include <linux/pagemap.h> #include <uapi/linux/mempolicy.h> struct mm_struct; #define NO_INTERLEAVE_INDEX (-1UL) /* use task il_prev for interleaving */ #ifdef CONFIG_NUMA /* * Describe a memory policy. * * A mempolicy can be either associated with a process or with a VMA. * For VMA related allocations the VMA policy is preferred, otherwise * the process policy is used. Interrupts ignore the memory policy * of the current process. * * Locking policy for interleave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on * mmap_lock. * * Freeing policy: * Mempolicy objects are reference counted. A mempolicy will be freed when * mpol_put() decrements the reference count to zero. * * Duplicating policy objects: * mpol_dup() allocates a new mempolicy and copies the specified mempolicy * to the new storage. The reference count of the new object is initialized * to 1, representing the caller of mpol_dup(). */ struct mempolicy { atomic_t refcnt; unsigned short mode; /* See MPOL_* above */ unsigned short flags; /* See set_mempolicy() MPOL_F_* above */ nodemask_t nodes; /* interleave/bind/preferred/etc */ int home_node; /* Home node to use for MPOL_BIND and MPOL_PREFERRED_MANY */ union { nodemask_t cpuset_mems_allowed; /* relative to these nodes */ nodemask_t user_nodemask; /* nodemask passed by user */ } w; }; /* * Support for managing mempolicy data objects (clone, copy, destroy) * The default fast path of a NULL MPOL_DEFAULT policy is always inlined. */ extern void __mpol_put(struct mempolicy *pol); static inline void mpol_put(struct mempolicy *pol) { if (pol) __mpol_put(pol); } /* * Does mempolicy pol need explicit unref after use? * Currently only needed for shared policies. */ static inline int mpol_needs_cond_ref(struct mempolicy *pol) { return (pol && (pol->flags & MPOL_F_SHARED)); } static inline void mpol_cond_put(struct mempolicy *pol) { if (mpol_needs_cond_ref(pol)) __mpol_put(pol); } extern struct mempolicy *__mpol_dup(struct mempolicy *pol); static inline struct mempolicy *mpol_dup(struct mempolicy *pol) { if (pol) pol = __mpol_dup(pol); return pol; } static inline void mpol_get(struct mempolicy *pol) { if (pol) atomic_inc(&pol->refcnt); } extern bool __mpol_equal(struct mempolicy *a, struct mempolicy *b); static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { if (a == b) return true; return __mpol_equal(a, b); } /* * Tree of shared policies for a shared memory region. */ struct shared_policy { struct rb_root root; rwlock_t lock; }; struct sp_node { struct rb_node nd; pgoff_t start, end; struct mempolicy *policy; }; int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst); void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol); int mpol_set_shared_policy(struct shared_policy *sp, struct vm_area_struct *vma, struct mempolicy *mpol); void mpol_free_shared_policy(struct shared_policy *sp); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx); struct mempolicy *get_task_policy(struct task_struct *p); struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr, pgoff_t *ilx); struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx); bool vma_policy_mof(struct vm_area_struct *vma); extern void numa_default_policy(void); extern void numa_policy_init(void); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); extern int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask); extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_in_oom_domain(struct task_struct *tsk, const nodemask_t *mask); extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; static inline void check_highest_zone(enum zone_type k) { if (k > policy_zone && k != ZONE_MOVABLE) policy_zone = k; } int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags); #ifdef CONFIG_TMPFS extern int mpol_parse_str(char *str, struct mempolicy **mpol); #endif extern void mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol); /* Check if a vma is migratable */ extern bool vma_migratable(struct vm_area_struct *vma); int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long addr); extern void mpol_put_task_policy(struct task_struct *); static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return (pol->mode == MPOL_PREFERRED_MANY); } extern bool apply_policy_zone(struct mempolicy *policy, enum zone_type zone); #else struct mempolicy {}; static inline struct mempolicy *get_task_policy(struct task_struct *p) { return NULL; } static inline bool mpol_equal(struct mempolicy *a, struct mempolicy *b) { return true; } static inline void mpol_put(struct mempolicy *pol) { } static inline void mpol_cond_put(struct mempolicy *pol) { } static inline void mpol_get(struct mempolicy *pol) { } struct shared_policy {}; static inline void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { } static inline void mpol_free_shared_policy(struct shared_policy *sp) { } static inline struct mempolicy * mpol_shared_policy_lookup(struct shared_policy *sp, pgoff_t idx) { return NULL; } static inline struct mempolicy *get_vma_policy(struct vm_area_struct *vma, unsigned long addr, int order, pgoff_t *ilx) { *ilx = 0; return NULL; } static inline int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst) { return 0; } static inline void numa_policy_init(void) { } static inline void numa_default_policy(void) { } static inline void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new) { } static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { } static inline int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { *mpol = NULL; *nodemask = NULL; return 0; } static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; } static inline int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, const nodemask_t *to, int flags) { return 0; } static inline void check_highest_zone(int k) { } #ifdef CONFIG_TMPFS static inline int mpol_parse_str(char *str, struct mempolicy **mpol) { return 1; /* error */ } #endif static inline int mpol_misplaced(struct folio *folio, struct vm_fault *vmf, unsigned long address) { return -1; /* no node preference */ } static inline void mpol_put_task_policy(struct task_struct *task) { } static inline bool mpol_is_preferred_many(struct mempolicy *pol) { return false; } #endif /* CONFIG_NUMA */ #endif
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/ipv6.h> #include <net/dsfield.h> #include <net/xfrm.h> #ifndef XFRM_INOUT_H #define XFRM_INOUT_H 1 static inline void xfrm4_extract_header(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = iph->id; XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off; XFRM_MODE_SKB_CB(skb)->tos = iph->tos; XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl; XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph); memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); } static inline void xfrm6_extract_header(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IPV6) struct ipv6hdr *iph = ipv6_hdr(skb); XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph); XFRM_MODE_SKB_CB(skb)->id = 0; XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF); XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph); XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit; XFRM_MODE_SKB_CB(skb)->optlen = 0; memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl, sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl)); #else WARN_ON_ONCE(1); #endif } static inline void xfrm6_beet_make_header(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); iph->version = 6; memcpy(iph->flow_lbl, XFRM_MODE_SKB_CB(skb)->flow_lbl, sizeof(iph->flow_lbl)); iph->nexthdr = XFRM_MODE_SKB_CB(skb)->protocol; ipv6_change_dsfield(iph, 0, XFRM_MODE_SKB_CB(skb)->tos); iph->hop_limit = XFRM_MODE_SKB_CB(skb)->ttl; } static inline void xfrm4_beet_make_header(struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); iph->ihl = 5; iph->version = 4; iph->protocol = XFRM_MODE_SKB_CB(skb)->protocol; iph->tos = XFRM_MODE_SKB_CB(skb)->tos; iph->id = XFRM_MODE_SKB_CB(skb)->id; iph->frag_off = XFRM_MODE_SKB_CB(skb)->frag_off; iph->ttl = XFRM_MODE_SKB_CB(skb)->ttl; } #endif
2 2 3 3 3 3 3 8 8 3 3 8 32 30 32 32 32 32 32 32 32 31 32 32 31 31 32 32 19 14 32 32 32 32 32 32 19 14 8 8 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 // SPDX-License-Identifier: GPL-2.0-or-later /* * Synchronous Compression operations * * Copyright 2015 LG Electronics Inc. * Copyright (c) 2016, Intel Corporation * Author: Giovanni Cabiddu <giovanni.cabiddu@intel.com> */ #include <crypto/internal/acompress.h> #include <crypto/internal/scompress.h> #include <crypto/scatterwalk.h> #include <linux/cryptouser.h> #include <linux/err.h> #include <linux/highmem.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/overflow.h> #include <linux/scatterlist.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/vmalloc.h> #include <net/netlink.h> #include "compress.h" #define SCOMP_SCRATCH_SIZE 65400 struct scomp_scratch { spinlock_t lock; union { void *src; unsigned long saddr; }; void *dst; }; static DEFINE_PER_CPU(struct scomp_scratch, scomp_scratch) = { .lock = __SPIN_LOCK_UNLOCKED(scomp_scratch.lock), }; static const struct crypto_type crypto_scomp_type; static int scomp_scratch_users; static DEFINE_MUTEX(scomp_lock); static int __maybe_unused crypto_scomp_report( struct sk_buff *skb, struct crypto_alg *alg) { struct crypto_report_comp rscomp; memset(&rscomp, 0, sizeof(rscomp)); strscpy(rscomp.type, "scomp", sizeof(rscomp.type)); return nla_put(skb, CRYPTOCFGA_REPORT_COMPRESS, sizeof(rscomp), &rscomp); } static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) __maybe_unused; static void crypto_scomp_show(struct seq_file *m, struct crypto_alg *alg) { seq_puts(m, "type : scomp\n"); } static void crypto_scomp_free_scratches(void) { struct scomp_scratch *scratch; int i; for_each_possible_cpu(i) { scratch = per_cpu_ptr(&scomp_scratch, i); free_page(scratch->saddr); vfree(scratch->dst); scratch->src = NULL; scratch->dst = NULL; } } static int crypto_scomp_alloc_scratches(void) { struct scomp_scratch *scratch; int i; for_each_possible_cpu(i) { struct page *page; void *mem; scratch = per_cpu_ptr(&scomp_scratch, i); page = alloc_pages_node(cpu_to_node(i), GFP_KERNEL, 0); if (!page) goto error; scratch->src = page_address(page); mem = vmalloc_node(SCOMP_SCRATCH_SIZE, cpu_to_node(i)); if (!mem) goto error; scratch->dst = mem; } return 0; error: crypto_scomp_free_scratches(); return -ENOMEM; } static void scomp_free_streams(struct scomp_alg *alg) { struct crypto_acomp_stream __percpu *stream = alg->stream; int i; alg->stream = NULL; if (!stream) return; for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); if (IS_ERR_OR_NULL(ps->ctx)) break; alg->free_ctx(ps->ctx); } free_percpu(stream); } static int scomp_alloc_streams(struct scomp_alg *alg) { struct crypto_acomp_stream __percpu *stream; int i; stream = alloc_percpu(struct crypto_acomp_stream); if (!stream) return -ENOMEM; alg->stream = stream; for_each_possible_cpu(i) { struct crypto_acomp_stream *ps = per_cpu_ptr(stream, i); ps->ctx = alg->alloc_ctx(); if (IS_ERR(ps->ctx)) { scomp_free_streams(alg); return PTR_ERR(ps->ctx); } spin_lock_init(&ps->lock); } return 0; } static int crypto_scomp_init_tfm(struct crypto_tfm *tfm) { struct scomp_alg *alg = crypto_scomp_alg(__crypto_scomp_tfm(tfm)); int ret = 0; mutex_lock(&scomp_lock); if (!alg->stream) { ret = scomp_alloc_streams(alg); if (ret) goto unlock; } if (!scomp_scratch_users) { ret = crypto_scomp_alloc_scratches(); if (ret) goto unlock; scomp_scratch_users++; } unlock: mutex_unlock(&scomp_lock); return ret; } static int scomp_acomp_comp_decomp(struct acomp_req *req, int dir) { struct scomp_scratch *scratch = raw_cpu_ptr(&scomp_scratch); struct crypto_acomp *tfm = crypto_acomp_reqtfm(req); struct crypto_scomp **tfm_ctx = acomp_tfm_ctx(tfm); struct crypto_scomp *scomp = *tfm_ctx; struct crypto_acomp_stream *stream; unsigned int slen = req->slen; unsigned int dlen = req->dlen; struct page *spage, *dpage; unsigned int n; const u8 *src; size_t soff; size_t doff; u8 *dst; int ret; if (!req->src || !slen) return -EINVAL; if (!req->dst || !dlen) return -EINVAL; if (acomp_request_src_isvirt(req)) src = req->svirt; else { src = scratch->src; do { if (acomp_request_src_isfolio(req)) { spage = folio_page(req->sfolio, 0); soff = req->soff; } else if (slen <= req->src->length) { spage = sg_page(req->src); soff = req->src->offset; } else break; spage = nth_page(spage, soff / PAGE_SIZE); soff = offset_in_page(soff); n = slen / PAGE_SIZE; n += (offset_in_page(slen) + soff - 1) / PAGE_SIZE; if (PageHighMem(nth_page(spage, n)) && size_add(soff, slen) > PAGE_SIZE) break; src = kmap_local_page(spage) + soff; } while (0); } if (acomp_request_dst_isvirt(req)) dst = req->dvirt; else { unsigned int max = SCOMP_SCRATCH_SIZE; dst = scratch->dst; do { if (acomp_request_dst_isfolio(req)) { dpage = folio_page(req->dfolio, 0); doff = req->doff; } else if (dlen <= req->dst->length) { dpage = sg_page(req->dst); doff = req->dst->offset; } else break; dpage = nth_page(dpage, doff / PAGE_SIZE); doff = offset_in_page(doff); n = dlen / PAGE_SIZE; n += (offset_in_page(dlen) + doff - 1) / PAGE_SIZE; if (PageHighMem(dpage + n) && size_add(doff, dlen) > PAGE_SIZE) break; dst = kmap_local_page(dpage) + doff; max = dlen; } while (0); dlen = min(dlen, max); } spin_lock_bh(&scratch->lock); if (src == scratch->src) memcpy_from_sglist(scratch->src, req->src, 0, slen); stream = raw_cpu_ptr(crypto_scomp_alg(scomp)->stream); spin_lock(&stream->lock); if (dir) ret = crypto_scomp_compress(scomp, src, slen, dst, &dlen, stream->ctx); else ret = crypto_scomp_decompress(scomp, src, slen, dst, &dlen, stream->ctx); if (dst == scratch->dst) memcpy_to_sglist(req->dst, 0, dst, dlen); spin_unlock(&stream->lock); spin_unlock_bh(&scratch->lock); req->dlen = dlen; if (!acomp_request_dst_isvirt(req) && dst != scratch->dst) { kunmap_local(dst); dlen += doff; for (;;) { flush_dcache_page(dpage); if (dlen <= PAGE_SIZE) break; dlen -= PAGE_SIZE; dpage = nth_page(dpage, 1); } } if (!acomp_request_src_isvirt(req) && src != scratch->src) kunmap_local(src); return ret; } static int scomp_acomp_chain(struct acomp_req *req, int dir) { struct acomp_req *r2; int err; err = scomp_acomp_comp_decomp(req, dir); req->base.err = err; list_for_each_entry(r2, &req->base.list, base.list) r2->base.err = scomp_acomp_comp_decomp(r2, dir); return err; } static int scomp_acomp_compress(struct acomp_req *req) { return scomp_acomp_chain(req, 1); } static int scomp_acomp_decompress(struct acomp_req *req) { return scomp_acomp_chain(req, 0); } static void crypto_exit_scomp_ops_async(struct crypto_tfm *tfm) { struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); crypto_free_scomp(*ctx); mutex_lock(&scomp_lock); if (!--scomp_scratch_users) crypto_scomp_free_scratches(); mutex_unlock(&scomp_lock); } int crypto_init_scomp_ops_async(struct crypto_tfm *tfm) { struct crypto_alg *calg = tfm->__crt_alg; struct crypto_acomp *crt = __crypto_acomp_tfm(tfm); struct crypto_scomp **ctx = crypto_tfm_ctx(tfm); struct crypto_scomp *scomp; if (!crypto_mod_get(calg)) return -EAGAIN; scomp = crypto_create_tfm(calg, &crypto_scomp_type); if (IS_ERR(scomp)) { crypto_mod_put(calg); return PTR_ERR(scomp); } *ctx = scomp; tfm->exit = crypto_exit_scomp_ops_async; crt->compress = scomp_acomp_compress; crt->decompress = scomp_acomp_decompress; return 0; } static void crypto_scomp_destroy(struct crypto_alg *alg) { scomp_free_streams(__crypto_scomp_alg(alg)); } static const struct crypto_type crypto_scomp_type = { .extsize = crypto_alg_extsize, .init_tfm = crypto_scomp_init_tfm, .destroy = crypto_scomp_destroy, #ifdef CONFIG_PROC_FS .show = crypto_scomp_show, #endif #if IS_ENABLED(CONFIG_CRYPTO_USER) .report = crypto_scomp_report, #endif .maskclear = ~CRYPTO_ALG_TYPE_MASK, .maskset = CRYPTO_ALG_TYPE_MASK, .type = CRYPTO_ALG_TYPE_SCOMPRESS, .tfmsize = offsetof(struct crypto_scomp, base), }; static void scomp_prepare_alg(struct scomp_alg *alg) { struct crypto_alg *base = &alg->calg.base; comp_prepare_alg(&alg->calg); base->cra_flags |= CRYPTO_ALG_REQ_CHAIN; } int crypto_register_scomp(struct scomp_alg *alg) { struct crypto_alg *base = &alg->calg.base; scomp_prepare_alg(alg); base->cra_type = &crypto_scomp_type; base->cra_flags |= CRYPTO_ALG_TYPE_SCOMPRESS; return crypto_register_alg(base); } EXPORT_SYMBOL_GPL(crypto_register_scomp); void crypto_unregister_scomp(struct scomp_alg *alg) { crypto_unregister_alg(&alg->base); } EXPORT_SYMBOL_GPL(crypto_unregister_scomp); int crypto_register_scomps(struct scomp_alg *algs, int count) { int i, ret; for (i = 0; i < count; i++) { ret = crypto_register_scomp(&algs[i]); if (ret) goto err; } return 0; err: for (--i; i >= 0; --i) crypto_unregister_scomp(&algs[i]); return ret; } EXPORT_SYMBOL_GPL(crypto_register_scomps); void crypto_unregister_scomps(struct scomp_alg *algs, int count) { int i; for (i = count - 1; i >= 0; --i) crypto_unregister_scomp(&algs[i]); } EXPORT_SYMBOL_GPL(crypto_unregister_scomps); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Synchronous compression type");
2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 // SPDX-License-Identifier: GPL-2.0-or-later /* * PRNG: Pseudo Random Number Generator * Based on NIST Recommended PRNG From ANSI X9.31 Appendix A.2.4 using * AES 128 cipher * * (C) Neil Horman <nhorman@tuxdriver.com> */ #include <crypto/internal/cipher.h> #include <crypto/internal/rng.h> #include <linux/err.h> #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/string.h> #define DEFAULT_PRNG_KEY "0123456789abcdef" #define DEFAULT_PRNG_KSZ 16 #define DEFAULT_BLK_SZ 16 #define DEFAULT_V_SEED "zaybxcwdveuftgsh" /* * Flags for the prng_context flags field */ #define PRNG_FIXED_SIZE 0x1 #define PRNG_NEED_RESET 0x2 /* * Note: DT is our counter value * I is our intermediate value * V is our seed vector * See http://csrc.nist.gov/groups/STM/cavp/documents/rng/931rngext.pdf * for implementation details */ struct prng_context { spinlock_t prng_lock; unsigned char rand_data[DEFAULT_BLK_SZ]; unsigned char last_rand_data[DEFAULT_BLK_SZ]; unsigned char DT[DEFAULT_BLK_SZ]; unsigned char I[DEFAULT_BLK_SZ]; unsigned char V[DEFAULT_BLK_SZ]; u32 rand_data_valid; struct crypto_cipher *tfm; u32 flags; }; static int dbg; static void hexdump(char *note, unsigned char *buf, unsigned int len) { if (dbg) { printk(KERN_CRIT "%s", note); print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false); } } #define dbgprint(format, args...) do {\ if (dbg)\ printk(format, ##args);\ } while (0) static void xor_vectors(unsigned char *in1, unsigned char *in2, unsigned char *out, unsigned int size) { int i; for (i = 0; i < size; i++) out[i] = in1[i] ^ in2[i]; } /* * Returns DEFAULT_BLK_SZ bytes of random data per call * returns 0 if generation succeeded, <0 if something went wrong */ static int _get_more_prng_bytes(struct prng_context *ctx, int cont_test) { int i; unsigned char tmp[DEFAULT_BLK_SZ]; unsigned char *output = NULL; dbgprint(KERN_CRIT "Calling _get_more_prng_bytes for context %p\n", ctx); hexdump("Input DT: ", ctx->DT, DEFAULT_BLK_SZ); hexdump("Input I: ", ctx->I, DEFAULT_BLK_SZ); hexdump("Input V: ", ctx->V, DEFAULT_BLK_SZ); /* * This algorithm is a 3 stage state machine */ for (i = 0; i < 3; i++) { switch (i) { case 0: /* * Start by encrypting the counter value * This gives us an intermediate value I */ memcpy(tmp, ctx->DT, DEFAULT_BLK_SZ); output = ctx->I; hexdump("tmp stage 0: ", tmp, DEFAULT_BLK_SZ); break; case 1: /* * Next xor I with our secret vector V * encrypt that result to obtain our * pseudo random data which we output */ xor_vectors(ctx->I, ctx->V, tmp, DEFAULT_BLK_SZ); hexdump("tmp stage 1: ", tmp, DEFAULT_BLK_SZ); output = ctx->rand_data; break; case 2: /* * First check that we didn't produce the same * random data that we did last time around through this */ if (!memcmp(ctx->rand_data, ctx->last_rand_data, DEFAULT_BLK_SZ)) { if (cont_test) { panic("cprng %p Failed repetition check!\n", ctx); } printk(KERN_ERR "ctx %p Failed repetition check!\n", ctx); ctx->flags |= PRNG_NEED_RESET; return -EINVAL; } memcpy(ctx->last_rand_data, ctx->rand_data, DEFAULT_BLK_SZ); /* * Lastly xor the random data with I * and encrypt that to obtain a new secret vector V */ xor_vectors(ctx->rand_data, ctx->I, tmp, DEFAULT_BLK_SZ); output = ctx->V; hexdump("tmp stage 2: ", tmp, DEFAULT_BLK_SZ); break; } /* do the encryption */ crypto_cipher_encrypt_one(ctx->tfm, output, tmp); } /* * Now update our DT value */ for (i = DEFAULT_BLK_SZ - 1; i >= 0; i--) { ctx->DT[i] += 1; if (ctx->DT[i] != 0) break; } dbgprint("Returning new block for context %p\n", ctx); ctx->rand_data_valid = 0; hexdump("Output DT: ", ctx->DT, DEFAULT_BLK_SZ); hexdump("Output I: ", ctx->I, DEFAULT_BLK_SZ); hexdump("Output V: ", ctx->V, DEFAULT_BLK_SZ); hexdump("New Random Data: ", ctx->rand_data, DEFAULT_BLK_SZ); return 0; } /* Our exported functions */ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, int do_cont_test) { unsigned char *ptr = buf; unsigned int byte_count = (unsigned int)nbytes; int err; spin_lock_bh(&ctx->prng_lock); err = -EINVAL; if (ctx->flags & PRNG_NEED_RESET) goto done; /* * If the FIXED_SIZE flag is on, only return whole blocks of * pseudo random data */ err = -EINVAL; if (ctx->flags & PRNG_FIXED_SIZE) { if (nbytes < DEFAULT_BLK_SZ) goto done; byte_count = DEFAULT_BLK_SZ; } /* * Return 0 in case of success as mandated by the kernel * crypto API interface definition. */ err = 0; dbgprint(KERN_CRIT "getting %d random bytes for context %p\n", byte_count, ctx); remainder: if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; } } /* * Copy any data less than an entire block */ if (byte_count < DEFAULT_BLK_SZ) { empty_rbuf: while (ctx->rand_data_valid < DEFAULT_BLK_SZ) { *ptr = ctx->rand_data[ctx->rand_data_valid]; ptr++; byte_count--; ctx->rand_data_valid++; if (byte_count == 0) goto done; } } /* * Now copy whole blocks */ for (; byte_count >= DEFAULT_BLK_SZ; byte_count -= DEFAULT_BLK_SZ) { if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; } } if (ctx->rand_data_valid > 0) goto empty_rbuf; memcpy(ptr, ctx->rand_data, DEFAULT_BLK_SZ); ctx->rand_data_valid += DEFAULT_BLK_SZ; ptr += DEFAULT_BLK_SZ; } /* * Now go back and get any remaining partial block */ if (byte_count) goto remainder; done: spin_unlock_bh(&ctx->prng_lock); dbgprint(KERN_CRIT "returning %d from get_prng_bytes in context %p\n", err, ctx); return err; } static void free_prng_context(struct prng_context *ctx) { crypto_free_cipher(ctx->tfm); } static int reset_prng_context(struct prng_context *ctx, const unsigned char *key, size_t klen, const unsigned char *V, const unsigned char *DT) { int ret; const unsigned char *prng_key; spin_lock_bh(&ctx->prng_lock); ctx->flags |= PRNG_NEED_RESET; prng_key = (key != NULL) ? key : (unsigned char *)DEFAULT_PRNG_KEY; if (!key) klen = DEFAULT_PRNG_KSZ; if (V) memcpy(ctx->V, V, DEFAULT_BLK_SZ); else memcpy(ctx->V, DEFAULT_V_SEED, DEFAULT_BLK_SZ); if (DT) memcpy(ctx->DT, DT, DEFAULT_BLK_SZ); else memset(ctx->DT, 0, DEFAULT_BLK_SZ); memset(ctx->rand_data, 0, DEFAULT_BLK_SZ); memset(ctx->last_rand_data, 0, DEFAULT_BLK_SZ); ctx->rand_data_valid = DEFAULT_BLK_SZ; ret = crypto_cipher_setkey(ctx->tfm, prng_key, klen); if (ret) { dbgprint(KERN_CRIT "PRNG: setkey() failed flags=%x\n", crypto_cipher_get_flags(ctx->tfm)); goto out; } ret = 0; ctx->flags &= ~PRNG_NEED_RESET; out: spin_unlock_bh(&ctx->prng_lock); return ret; } static int cprng_init(struct crypto_tfm *tfm) { struct prng_context *ctx = crypto_tfm_ctx(tfm); spin_lock_init(&ctx->prng_lock); ctx->tfm = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(ctx->tfm)) { dbgprint(KERN_CRIT "Failed to alloc tfm for context %p\n", ctx); return PTR_ERR(ctx->tfm); } if (reset_prng_context(ctx, NULL, DEFAULT_PRNG_KSZ, NULL, NULL) < 0) return -EINVAL; /* * after allocation, we should always force the user to reset * so they don't inadvertently use the insecure default values * without specifying them intentially */ ctx->flags |= PRNG_NEED_RESET; return 0; } static void cprng_exit(struct crypto_tfm *tfm) { free_prng_context(crypto_tfm_ctx(tfm)); } static int cprng_get_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *rdata, unsigned int dlen) { struct prng_context *prng = crypto_rng_ctx(tfm); return get_prng_bytes(rdata, dlen, prng, 0); } /* * This is the cprng_registered reset method the seed value is * interpreted as the tuple { V KEY DT} * V and KEY are required during reset, and DT is optional, detected * as being present by testing the length of the seed */ static int cprng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { struct prng_context *prng = crypto_rng_ctx(tfm); const u8 *key = seed + DEFAULT_BLK_SZ; const u8 *dt = NULL; if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ) return -EINVAL; if (slen >= (2 * DEFAULT_BLK_SZ + DEFAULT_PRNG_KSZ)) dt = key + DEFAULT_PRNG_KSZ; reset_prng_context(prng, key, DEFAULT_PRNG_KSZ, seed, dt); if (prng->flags & PRNG_NEED_RESET) return -EINVAL; return 0; } #ifdef CONFIG_CRYPTO_FIPS static int fips_cprng_get_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *rdata, unsigned int dlen) { struct prng_context *prng = crypto_rng_ctx(tfm); return get_prng_bytes(rdata, dlen, prng, 1); } static int fips_cprng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { u8 rdata[DEFAULT_BLK_SZ]; const u8 *key = seed + DEFAULT_BLK_SZ; int rc; struct prng_context *prng = crypto_rng_ctx(tfm); if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ) return -EINVAL; /* fips strictly requires seed != key */ if (!memcmp(seed, key, DEFAULT_PRNG_KSZ)) return -EINVAL; rc = cprng_reset(tfm, seed, slen); if (!rc) goto out; /* this primes our continuity test */ rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0); prng->rand_data_valid = DEFAULT_BLK_SZ; out: return rc; } #endif static struct rng_alg rng_algs[] = { { .generate = cprng_get_random, .seed = cprng_reset, .seedsize = DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ, .base = { .cra_name = "stdrng", .cra_driver_name = "ansi_cprng", .cra_priority = 100, .cra_ctxsize = sizeof(struct prng_context), .cra_module = THIS_MODULE, .cra_init = cprng_init, .cra_exit = cprng_exit, } #ifdef CONFIG_CRYPTO_FIPS }, { .generate = fips_cprng_get_random, .seed = fips_cprng_reset, .seedsize = DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ, .base = { .cra_name = "fips(ansi_cprng)", .cra_driver_name = "fips_ansi_cprng", .cra_priority = 300, .cra_ctxsize = sizeof(struct prng_context), .cra_module = THIS_MODULE, .cra_init = cprng_init, .cra_exit = cprng_exit, } #endif } }; /* Module initalization */ static int __init prng_mod_init(void) { return crypto_register_rngs(rng_algs, ARRAY_SIZE(rng_algs)); } static void __exit prng_mod_fini(void) { crypto_unregister_rngs(rng_algs, ARRAY_SIZE(rng_algs)); } MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Software Pseudo Random Number Generator"); MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); module_param(dbg, int, 0); MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)"); subsys_initcall(prng_mod_init); module_exit(prng_mod_fini); MODULE_ALIAS_CRYPTO("stdrng"); MODULE_ALIAS_CRYPTO("ansi_cprng"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
33 33 33 3 3 3 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 // SPDX-License-Identifier: GPL-2.0 #include <linux/proc_fs.h> #include <linux/ethtool.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/bonding.h> #include "bonding_priv.h" static void *bond_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; loff_t off = 0; rcu_read_lock(); if (*pos == 0) return SEQ_START_TOKEN; bond_for_each_slave_rcu(bond, slave, iter) if (++off == *pos) return slave; return NULL; } static void *bond_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bonding *bond = pde_data(file_inode(seq->file)); struct list_head *iter; struct slave *slave; bool found = false; ++*pos; if (v == SEQ_START_TOKEN) return bond_first_slave_rcu(bond); bond_for_each_slave_rcu(bond, slave, iter) { if (found) return slave; if (slave == v) found = true; } return NULL; } static void bond_info_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { rcu_read_unlock(); } static void bond_info_show_master(struct seq_file *seq) { struct bonding *bond = pde_data(file_inode(seq->file)); const struct bond_opt_value *optval; struct slave *curr, *primary; int i; curr = rcu_dereference(bond->curr_active_slave); seq_printf(seq, "Bonding Mode: %s", bond_mode_name(BOND_MODE(bond))); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac) { optval = bond_opt_get_val(BOND_OPT_FAIL_OVER_MAC, bond->params.fail_over_mac); seq_printf(seq, " (fail_over_mac %s)", optval->string); } seq_printf(seq, "\n"); if (bond_mode_uses_xmit_hash(bond)) { optval = bond_opt_get_val(BOND_OPT_XMIT_HASH, bond->params.xmit_policy); seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", optval->string, bond->params.xmit_policy); } if (bond_uses_primary(bond)) { primary = rcu_dereference(bond->primary_slave); seq_printf(seq, "Primary Slave: %s", primary ? primary->dev->name : "None"); if (primary) { optval = bond_opt_get_val(BOND_OPT_PRIMARY_RESELECT, bond->params.primary_reselect); seq_printf(seq, " (primary_reselect %s)", optval->string); } seq_printf(seq, "\nCurrently Active Slave: %s\n", (curr) ? curr->dev->name : "None"); } seq_printf(seq, "MII Status: %s\n", netif_carrier_ok(bond->dev) ? "up" : "down"); seq_printf(seq, "MII Polling Interval (ms): %d\n", bond->params.miimon); seq_printf(seq, "Up Delay (ms): %d\n", bond->params.updelay * bond->params.miimon); seq_printf(seq, "Down Delay (ms): %d\n", bond->params.downdelay * bond->params.miimon); seq_printf(seq, "Peer Notification Delay (ms): %d\n", bond->params.peer_notif_delay * bond->params.miimon); /* ARP information */ if (bond->params.arp_interval > 0) { int printed = 0; seq_printf(seq, "ARP Polling Interval (ms): %d\n", bond->params.arp_interval); seq_printf(seq, "ARP Missed Max: %u\n", bond->params.missed_max); seq_printf(seq, "ARP IP target/s (n.n.n.n form):"); for (i = 0; (i < BOND_MAX_ARP_TARGETS); i++) { if (!bond->params.arp_targets[i]) break; if (printed) seq_printf(seq, ","); seq_printf(seq, " %pI4", &bond->params.arp_targets[i]); printed = 1; } seq_printf(seq, "\n"); #if IS_ENABLED(CONFIG_IPV6) printed = 0; seq_printf(seq, "NS IPv6 target/s (xx::xx form):"); for (i = 0; (i < BOND_MAX_NS_TARGETS); i++) { if (ipv6_addr_any(&bond->params.ns_targets[i])) break; if (printed) seq_printf(seq, ","); seq_printf(seq, " %pI6c", &bond->params.ns_targets[i]); printed = 1; } seq_printf(seq, "\n"); #endif } if (BOND_MODE(bond) == BOND_MODE_8023AD) { struct ad_info ad_info; seq_puts(seq, "\n802.3ad info\n"); seq_printf(seq, "LACP active: %s\n", (bond->params.lacp_active) ? "on" : "off"); seq_printf(seq, "LACP rate: %s\n", (bond->params.lacp_fast) ? "fast" : "slow"); seq_printf(seq, "Min links: %d\n", bond->params.min_links); optval = bond_opt_get_val(BOND_OPT_AD_SELECT, bond->params.ad_select); seq_printf(seq, "Aggregator selection policy (ad_select): %s\n", optval->string); if (capable(CAP_NET_ADMIN)) { seq_printf(seq, "System priority: %d\n", BOND_AD_INFO(bond).system.sys_priority); seq_printf(seq, "System MAC address: %pM\n", &BOND_AD_INFO(bond).system.sys_mac_addr); if (__bond_3ad_get_active_agg_info(bond, &ad_info)) { seq_printf(seq, "bond %s has no active aggregator\n", bond->dev->name); } else { seq_printf(seq, "Active Aggregator Info:\n"); seq_printf(seq, "\tAggregator ID: %d\n", ad_info.aggregator_id); seq_printf(seq, "\tNumber of ports: %d\n", ad_info.ports); seq_printf(seq, "\tActor Key: %d\n", ad_info.actor_key); seq_printf(seq, "\tPartner Key: %d\n", ad_info.partner_key); seq_printf(seq, "\tPartner Mac Address: %pM\n", ad_info.partner_system); } } } } static void bond_info_show_slave(struct seq_file *seq, const struct slave *slave) { struct bonding *bond = pde_data(file_inode(seq->file)); seq_printf(seq, "\nSlave Interface: %s\n", slave->dev->name); seq_printf(seq, "MII Status: %s\n", bond_slave_link_status(slave->link)); if (slave->speed == SPEED_UNKNOWN) seq_printf(seq, "Speed: %s\n", "Unknown"); else seq_printf(seq, "Speed: %d Mbps\n", slave->speed); if (slave->duplex == DUPLEX_UNKNOWN) seq_printf(seq, "Duplex: %s\n", "Unknown"); else seq_printf(seq, "Duplex: %s\n", slave->duplex ? "full" : "half"); seq_printf(seq, "Link Failure Count: %u\n", slave->link_failure_count); seq_printf(seq, "Permanent HW addr: %*phC\n", slave->dev->addr_len, slave->perm_hwaddr); seq_printf(seq, "Slave queue ID: %d\n", READ_ONCE(slave->queue_id)); if (BOND_MODE(bond) == BOND_MODE_8023AD) { const struct port *port = &SLAVE_AD_INFO(slave)->port; const struct aggregator *agg = port->aggregator; if (agg) { seq_printf(seq, "Aggregator ID: %d\n", agg->aggregator_identifier); seq_printf(seq, "Actor Churn State: %s\n", bond_3ad_churn_desc(port->sm_churn_actor_state)); seq_printf(seq, "Partner Churn State: %s\n", bond_3ad_churn_desc(port->sm_churn_partner_state)); seq_printf(seq, "Actor Churned Count: %d\n", port->churn_actor_count); seq_printf(seq, "Partner Churned Count: %d\n", port->churn_partner_count); if (capable(CAP_NET_ADMIN)) { seq_puts(seq, "details actor lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->actor_system_priority); seq_printf(seq, " system mac address: %pM\n", &port->actor_system); seq_printf(seq, " port key: %d\n", port->actor_oper_port_key); seq_printf(seq, " port priority: %d\n", port->actor_port_priority); seq_printf(seq, " port number: %d\n", port->actor_port_number); seq_printf(seq, " port state: %d\n", port->actor_oper_port_state); seq_puts(seq, "details partner lacp pdu:\n"); seq_printf(seq, " system priority: %d\n", port->partner_oper.system_priority); seq_printf(seq, " system mac address: %pM\n", &port->partner_oper.system); seq_printf(seq, " oper key: %d\n", port->partner_oper.key); seq_printf(seq, " port priority: %d\n", port->partner_oper.port_priority); seq_printf(seq, " port number: %d\n", port->partner_oper.port_number); seq_printf(seq, " port state: %d\n", port->partner_oper.port_state); } } else { seq_puts(seq, "Aggregator ID: N/A\n"); } } } static int bond_info_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_printf(seq, "%s\n", bond_version); bond_info_show_master(seq); } else bond_info_show_slave(seq, v); return 0; } static const struct seq_operations bond_info_seq_ops = { .start = bond_info_seq_start, .next = bond_info_seq_next, .stop = bond_info_seq_stop, .show = bond_info_seq_show, }; void bond_create_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir) { bond->proc_entry = proc_create_seq_data(bond_dev->name, 0444, bn->proc_dir, &bond_info_seq_ops, bond); if (bond->proc_entry == NULL) netdev_warn(bond_dev, "Cannot create /proc/net/%s/%s\n", DRV_NAME, bond_dev->name); else memcpy(bond->proc_file_name, bond_dev->name, IFNAMSIZ); } } void bond_remove_proc_entry(struct bonding *bond) { struct net_device *bond_dev = bond->dev; struct bond_net *bn = net_generic(dev_net(bond_dev), bond_net_id); if (bn->proc_dir && bond->proc_entry) { remove_proc_entry(bond->proc_file_name, bn->proc_dir); memset(bond->proc_file_name, 0, IFNAMSIZ); bond->proc_entry = NULL; } } /* Create the bonding directory under /proc/net, if doesn't exist yet. * Caller must hold rtnl_lock. */ void __net_init bond_create_proc_dir(struct bond_net *bn) { if (!bn->proc_dir) { bn->proc_dir = proc_mkdir(DRV_NAME, bn->net->proc_net); if (!bn->proc_dir) pr_warn("Warning: Cannot create /proc/net/%s\n", DRV_NAME); } } /* Destroy the bonding directory under /proc/net, if empty. */ void __net_exit bond_destroy_proc_dir(struct bond_net *bn) { if (bn->proc_dir) { remove_proc_entry(DRV_NAME, bn->net->proc_net); bn->proc_dir = NULL; } }
2 2 2 2 2 2 1 3 2 2 2 1 3 2 1 2 2 3 3 3 3 1 3 2 3 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 // SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/err.h> #include <linux/scatterlist.h> #include <linux/sched.h> #include <linux/slab.h> #include <crypto/aes.h> #include <crypto/skcipher.h> #include <linux/key-type.h> #include <linux/sched/mm.h> #include <keys/ceph-type.h> #include <keys/user-type.h> #include <linux/ceph/decode.h> #include "crypto.h" /* * Set ->key and ->tfm. The rest of the key should be filled in before * this function is called. */ static int set_secret(struct ceph_crypto_key *key, void *buf) { unsigned int noio_flag; int ret; key->key = NULL; key->tfm = NULL; switch (key->type) { case CEPH_CRYPTO_NONE: return 0; /* nothing to do */ case CEPH_CRYPTO_AES: break; default: return -ENOTSUPP; } if (!key->len) return -EINVAL; key->key = kmemdup(buf, key->len, GFP_NOIO); if (!key->key) { ret = -ENOMEM; goto fail; } /* crypto_alloc_sync_skcipher() allocates with GFP_KERNEL */ noio_flag = memalloc_noio_save(); key->tfm = crypto_alloc_sync_skcipher("cbc(aes)", 0, 0); memalloc_noio_restore(noio_flag); if (IS_ERR(key->tfm)) { ret = PTR_ERR(key->tfm); key->tfm = NULL; goto fail; } ret = crypto_sync_skcipher_setkey(key->tfm, key->key, key->len); if (ret) goto fail; return 0; fail: ceph_crypto_key_destroy(key); return ret; } int ceph_crypto_key_clone(struct ceph_crypto_key *dst, const struct ceph_crypto_key *src) { memcpy(dst, src, sizeof(struct ceph_crypto_key)); return set_secret(dst, src->key); } int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) { int ret; ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); key->type = ceph_decode_16(p); ceph_decode_copy(p, &key->created, sizeof(key->created)); key->len = ceph_decode_16(p); ceph_decode_need(p, end, key->len, bad); ret = set_secret(key, *p); memzero_explicit(*p, key->len); *p += key->len; return ret; bad: dout("failed to decode crypto key\n"); return -EINVAL; } int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) { int inlen = strlen(inkey); int blen = inlen * 3 / 4; void *buf, *p; int ret; dout("crypto_key_unarmor %s\n", inkey); buf = kmalloc(blen, GFP_NOFS); if (!buf) return -ENOMEM; blen = ceph_unarmor(buf, inkey, inkey+inlen); if (blen < 0) { kfree(buf); return blen; } p = buf; ret = ceph_crypto_key_decode(key, &p, p + blen); kfree(buf); if (ret) return ret; dout("crypto_key_unarmor key %p type %d len %d\n", key, key->type, key->len); return 0; } void ceph_crypto_key_destroy(struct ceph_crypto_key *key) { if (key) { kfree_sensitive(key->key); key->key = NULL; if (key->tfm) { crypto_free_sync_skcipher(key->tfm); key->tfm = NULL; } } } static const u8 *aes_iv = (u8 *)CEPH_AES_IV; /* * Should be used for buffers allocated with kvmalloc(). * Currently these are encrypt out-buffer (ceph_buffer) and decrypt * in-buffer (msg front). * * Dispose of @sgt with teardown_sgtable(). * * @prealloc_sg is to avoid memory allocation inside sg_alloc_table() * in cases where a single sg is sufficient. No attempt to reduce the * number of sgs by squeezing physically contiguous pages together is * made though, for simplicity. */ static int setup_sgtable(struct sg_table *sgt, struct scatterlist *prealloc_sg, const void *buf, unsigned int buf_len) { struct scatterlist *sg; const bool is_vmalloc = is_vmalloc_addr(buf); unsigned int off = offset_in_page(buf); unsigned int chunk_cnt = 1; unsigned int chunk_len = PAGE_ALIGN(off + buf_len); int i; int ret; if (buf_len == 0) { memset(sgt, 0, sizeof(*sgt)); return -EINVAL; } if (is_vmalloc) { chunk_cnt = chunk_len >> PAGE_SHIFT; chunk_len = PAGE_SIZE; } if (chunk_cnt > 1) { ret = sg_alloc_table(sgt, chunk_cnt, GFP_NOFS); if (ret) return ret; } else { WARN_ON(chunk_cnt != 1); sg_init_table(prealloc_sg, 1); sgt->sgl = prealloc_sg; sgt->nents = sgt->orig_nents = 1; } for_each_sg(sgt->sgl, sg, sgt->orig_nents, i) { struct page *page; unsigned int len = min(chunk_len - off, buf_len); if (is_vmalloc) page = vmalloc_to_page(buf); else page = virt_to_page(buf); sg_set_page(sg, page, len, off); off = 0; buf += len; buf_len -= len; } WARN_ON(buf_len != 0); return 0; } static void teardown_sgtable(struct sg_table *sgt) { if (sgt->orig_nents > 1) sg_free_table(sgt); } static int ceph_aes_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { SYNC_SKCIPHER_REQUEST_ON_STACK(req, key->tfm); struct sg_table sgt; struct scatterlist prealloc_sg; char iv[AES_BLOCK_SIZE] __aligned(8); int pad_byte = AES_BLOCK_SIZE - (in_len & (AES_BLOCK_SIZE - 1)); int crypt_len = encrypt ? in_len + pad_byte : in_len; int ret; WARN_ON(crypt_len > buf_len); if (encrypt) memset(buf + in_len, pad_byte, pad_byte); ret = setup_sgtable(&sgt, &prealloc_sg, buf, crypt_len); if (ret) return ret; memcpy(iv, aes_iv, AES_BLOCK_SIZE); skcipher_request_set_sync_tfm(req, key->tfm); skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sgt.sgl, sgt.sgl, crypt_len, iv); /* print_hex_dump(KERN_ERR, "key: ", DUMP_PREFIX_NONE, 16, 1, key->key, key->len, 1); print_hex_dump(KERN_ERR, " in: ", DUMP_PREFIX_NONE, 16, 1, buf, crypt_len, 1); */ if (encrypt) ret = crypto_skcipher_encrypt(req); else ret = crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (ret) { pr_err("%s %scrypt failed: %d\n", __func__, encrypt ? "en" : "de", ret); goto out_sgt; } /* print_hex_dump(KERN_ERR, "out: ", DUMP_PREFIX_NONE, 16, 1, buf, crypt_len, 1); */ if (encrypt) { *pout_len = crypt_len; } else { pad_byte = *(char *)(buf + in_len - 1); if (pad_byte > 0 && pad_byte <= AES_BLOCK_SIZE && in_len >= pad_byte) { *pout_len = in_len - pad_byte; } else { pr_err("%s got bad padding %d on in_len %d\n", __func__, pad_byte, in_len); ret = -EPERM; goto out_sgt; } } out_sgt: teardown_sgtable(&sgt); return ret; } int ceph_crypt(const struct ceph_crypto_key *key, bool encrypt, void *buf, int buf_len, int in_len, int *pout_len) { switch (key->type) { case CEPH_CRYPTO_NONE: *pout_len = in_len; return 0; case CEPH_CRYPTO_AES: return ceph_aes_crypt(key, encrypt, buf, buf_len, in_len, pout_len); default: return -ENOTSUPP; } } static int ceph_key_preparse(struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; size_t datalen = prep->datalen; int ret; void *p; ret = -EINVAL; if (datalen <= 0 || datalen > 32767 || !prep->data) goto err; ret = -ENOMEM; ckey = kmalloc(sizeof(*ckey), GFP_KERNEL); if (!ckey) goto err; /* TODO ceph_crypto_key_decode should really take const input */ p = (void *)prep->data; ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen); if (ret < 0) goto err_ckey; prep->payload.data[0] = ckey; prep->quotalen = datalen; return 0; err_ckey: kfree(ckey); err: return ret; } static void ceph_key_free_preparse(struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey = prep->payload.data[0]; ceph_crypto_key_destroy(ckey); kfree(ckey); } static void ceph_key_destroy(struct key *key) { struct ceph_crypto_key *ckey = key->payload.data[0]; ceph_crypto_key_destroy(ckey); kfree(ckey); } struct key_type key_type_ceph = { .name = "ceph", .preparse = ceph_key_preparse, .free_preparse = ceph_key_free_preparse, .instantiate = generic_key_instantiate, .destroy = ceph_key_destroy, }; int __init ceph_crypto_init(void) { return register_key_type(&key_type_ceph); } void ceph_crypto_shutdown(void) { unregister_key_type(&key_type_ceph); }
2 2 2 2 5 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 5 5 5 59 58 5 53 58 2 2 21 21 21 21 16 14 14 8 3 3 5 2 2 2 2 2 2 2 2 2 2 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) Neil Brown 2002 * Copyright (C) Christoph Hellwig 2007 * * This file contains the code mapping from inodes to NFS file handles, * and for mapping back from file handles to dentries. * * For details on why we do all the strange and hairy things in here * take a look at Documentation/filesystems/nfs/exporting.rst. */ #include <linux/exportfs.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/sched.h> #include <linux/cred.h> #define dprintk(fmt, args...) pr_debug(fmt, ##args) static int get_name(const struct path *path, char *name, struct dentry *child); static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, char *name, struct dentry *child) { const struct export_operations *nop = dir->d_sb->s_export_op; struct path path = {.mnt = mnt, .dentry = dir}; if (nop->get_name) return nop->get_name(dir, name, child); else return get_name(&path, name, child); } /* * Check if the dentry or any of it's aliases is acceptable. */ static struct dentry * find_acceptable_alias(struct dentry *result, int (*acceptable)(void *context, struct dentry *dentry), void *context) { struct dentry *dentry, *toput = NULL; struct inode *inode; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) dput(toput); if (dentry != result && acceptable(context, dentry)) { dput(result); return dentry; } spin_lock(&inode->i_lock); toput = dentry; } spin_unlock(&inode->i_lock); if (toput) dput(toput); return NULL; } static bool dentry_connected(struct dentry *dentry) { dget(dentry); while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); dput(dentry); if (dentry == parent) { dput(parent); return false; } dentry = parent; } dput(dentry); return true; } static void clear_disconnected(struct dentry *dentry) { dget(dentry); while (dentry->d_flags & DCACHE_DISCONNECTED) { struct dentry *parent = dget_parent(dentry); WARN_ON_ONCE(IS_ROOT(dentry)); spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_DISCONNECTED; spin_unlock(&dentry->d_lock); dput(dentry); dentry = parent; } dput(dentry); } /* * Reconnect a directory dentry with its parent. * * This can return a dentry, or NULL, or an error. * * In the first case the returned dentry is the parent of the given * dentry, and may itself need to be reconnected to its parent. * * In the NULL case, a concurrent VFS operation has either renamed or * removed this directory. The concurrent operation has reconnected our * dentry, so we no longer need to. */ static struct dentry *reconnect_one(struct vfsmount *mnt, struct dentry *dentry, char *nbuf) { struct dentry *parent; struct dentry *tmp; int err; parent = ERR_PTR(-EACCES); if (mnt->mnt_sb->s_export_op->get_parent) parent = mnt->mnt_sb->s_export_op->get_parent(dentry); if (IS_ERR(parent)) { dprintk("get_parent of %lu failed, err %ld\n", dentry->d_inode->i_ino, PTR_ERR(parent)); return parent; } dprintk("%s: find name of %lu in %lu\n", __func__, dentry->d_inode->i_ino, parent->d_inode->i_ino); err = exportfs_get_name(mnt, parent, nbuf, dentry); if (err == -ENOENT) goto out_reconnected; if (err) goto out_err; dprintk("%s: found name: %s\n", __func__, nbuf); tmp = lookup_one_unlocked(mnt_idmap(mnt), nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { dprintk("lookup failed: %ld\n", PTR_ERR(tmp)); err = PTR_ERR(tmp); goto out_err; } if (tmp != dentry) { /* * Somebody has renamed it since exportfs_get_name(); * great, since it could've only been renamed if it * got looked up and thus connected, and it would * remain connected afterwards. We are done. */ dput(tmp); goto out_reconnected; } dput(tmp); if (IS_ROOT(dentry)) { err = -ESTALE; goto out_err; } return parent; out_err: dput(parent); return ERR_PTR(err); out_reconnected: dput(parent); /* * Someone must have renamed our entry into another parent, in * which case it has been reconnected by the rename. * * Or someone removed it entirely, in which case filehandle * lookup will succeed but the directory is now IS_DEAD and * subsequent operations on it will fail. * * Alternatively, maybe there was no race at all, and the * filesystem is just corrupt and gave us a parent that doesn't * actually contain any entry pointing to this inode. So, * double check that this worked and return -ESTALE if not: */ if (!dentry_connected(dentry)) return ERR_PTR(-ESTALE); return NULL; } /* * Make sure target_dir is fully connected to the dentry tree. * * On successful return, DCACHE_DISCONNECTED will be cleared on * target_dir, and target_dir->d_parent->...->d_parent will reach the * root of the filesystem. * * Whenever DCACHE_DISCONNECTED is unset, target_dir is fully connected. * But the converse is not true: target_dir may have DCACHE_DISCONNECTED * set but already be connected. In that case we'll verify the * connection to root and then clear the flag. * * Note that target_dir could be removed by a concurrent operation. In * that case reconnect_path may still succeed with target_dir fully * connected, but further operations using the filehandle will fail when * necessary (due to S_DEAD being set on the directory). */ static int reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { struct dentry *dentry, *parent; dentry = dget(target_dir); while (dentry->d_flags & DCACHE_DISCONNECTED) { BUG_ON(dentry == mnt->mnt_sb->s_root); if (IS_ROOT(dentry)) parent = reconnect_one(mnt, dentry, nbuf); else parent = dget_parent(dentry); if (!parent) break; dput(dentry); if (IS_ERR(parent)) return PTR_ERR(parent); dentry = parent; } dput(dentry); clear_disconnected(target_dir); return 0; } struct getdents_callback { struct dir_context ctx; char *name; /* name that was found. It already points to a buffer NAME_MAX+1 is size */ u64 ino; /* the inum we are looking for */ int found; /* inode matched? */ int sequence; /* sequence counter */ }; /* * A rather strange filldir function to capture * the name matching the specified inode number. */ static bool filldir_one(struct dir_context *ctx, const char *name, int len, loff_t pos, u64 ino, unsigned int d_type) { struct getdents_callback *buf = container_of(ctx, struct getdents_callback, ctx); buf->sequence++; if (buf->ino == ino && len <= NAME_MAX && !is_dot_dotdot(name, len)) { memcpy(buf->name, name, len); buf->name[len] = '\0'; buf->found = 1; return false; // no more } return true; } /** * get_name - default export_operations->get_name function * @path: the directory in which to find a name * @name: a pointer to a %NAME_MAX+1 char buffer to store the name * @child: the dentry for the child directory. * * calls readdir on the parent until it finds an entry with * the same inode number as the child, and returns that. */ static int get_name(const struct path *path, char *name, struct dentry *child) { const struct cred *cred = current_cred(); struct inode *dir = path->dentry->d_inode; int error; struct file *file; struct kstat stat; struct path child_path = { .mnt = path->mnt, .dentry = child, }; struct getdents_callback buffer = { .ctx.actor = filldir_one, .name = name, }; error = -ENOTDIR; if (!dir || !S_ISDIR(dir->i_mode)) goto out; error = -EINVAL; if (!dir->i_fop) goto out; /* * inode->i_ino is unsigned long, kstat->ino is u64, so the * former would be insufficient on 32-bit hosts when the * filesystem supports 64-bit inode numbers. So we need to * actually call ->getattr, not just read i_ino: */ error = vfs_getattr_nosec(&child_path, &stat, STATX_INO, AT_STATX_SYNC_AS_STAT); if (error) return error; buffer.ino = stat.ino; /* * Open the directory ... */ file = dentry_open(path, O_RDONLY, cred); error = PTR_ERR(file); if (IS_ERR(file)) goto out; error = -EINVAL; if (!file->f_op->iterate_shared) goto out_close; buffer.sequence = 0; while (1) { int old_seq = buffer.sequence; error = iterate_dir(file, &buffer.ctx); if (buffer.found) { error = 0; break; } if (error < 0) break; error = -ENOENT; if (old_seq == buffer.sequence) break; } out_close: fput(file); out: return error; } #define FILEID_INO64_GEN_LEN 3 /** * exportfs_encode_ino64_fid - encode non-decodeable 64bit ino file id * @inode: the object to encode * @fid: where to store the file handle fragment * @max_len: maximum length to store there (in 4 byte units) * * This generic function is used to encode a non-decodeable file id for * fanotify for filesystems that do not support NFS export. */ static int exportfs_encode_ino64_fid(struct inode *inode, struct fid *fid, int *max_len) { if (*max_len < FILEID_INO64_GEN_LEN) { *max_len = FILEID_INO64_GEN_LEN; return FILEID_INVALID; } fid->i64.ino = inode->i_ino; fid->i64.gen = inode->i_generation; *max_len = FILEID_INO64_GEN_LEN; return FILEID_INO64_GEN; } /** * exportfs_encode_inode_fh - encode a file handle from inode * @inode: the object to encode * @fid: where to store the file handle fragment * @max_len: maximum length to store there * @parent: parent directory inode, if wanted * @flags: properties of the requested file handle * * Returns an enum fid_type or a negative errno. */ int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid, int *max_len, struct inode *parent, int flags) { const struct export_operations *nop = inode->i_sb->s_export_op; enum fid_type type; if (!exportfs_can_encode_fh(nop, flags)) return -EOPNOTSUPP; if (!nop && (flags & EXPORT_FH_FID)) type = exportfs_encode_ino64_fid(inode, fid, max_len); else type = nop->encode_fh(inode, fid->raw, max_len, parent); if (type > 0 && FILEID_USER_FLAGS(type)) { pr_warn_once("%s: unexpected fh type value 0x%x from fstype %s.\n", __func__, type, inode->i_sb->s_type->name); return -EINVAL; } return type; } EXPORT_SYMBOL_GPL(exportfs_encode_inode_fh); /** * exportfs_encode_fh - encode a file handle from dentry * @dentry: the object to encode * @fid: where to store the file handle fragment * @max_len: maximum length to store there * @flags: properties of the requested file handle * * Returns an enum fid_type or a negative errno. */ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, int flags) { int error; struct dentry *p = NULL; struct inode *inode = dentry->d_inode, *parent = NULL; if ((flags & EXPORT_FH_CONNECTABLE) && !S_ISDIR(inode->i_mode)) { p = dget_parent(dentry); /* * note that while p might've ceased to be our parent already, * it's still pinned by and still positive. */ parent = p->d_inode; } error = exportfs_encode_inode_fh(inode, fid, max_len, parent, flags); dput(p); return error; } EXPORT_SYMBOL_GPL(exportfs_encode_fh); struct dentry * exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, unsigned int flags, int (*acceptable)(void *, struct dentry *), void *context) { const struct export_operations *nop = mnt->mnt_sb->s_export_op; struct dentry *result, *alias; char nbuf[NAME_MAX+1]; int err; if (fileid_type < 0 || FILEID_USER_FLAGS(fileid_type)) return ERR_PTR(-EINVAL); /* * Try to get any dentry for the given file handle from the filesystem. */ if (!exportfs_can_decode_fh(nop)) return ERR_PTR(-ESTALE); result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); if (IS_ERR_OR_NULL(result)) return result; if ((flags & EXPORT_FH_DIR_ONLY) && !d_is_dir(result)) { err = -ENOTDIR; goto err_result; } /* * If no acceptance criteria was specified by caller, a disconnected * dentry is also accepatable. Callers may use this mode to query if * file handle is stale or to get a reference to an inode without * risking the high overhead caused by directory reconnect. */ if (!acceptable) return result; if (d_is_dir(result)) { /* * This request is for a directory. * * On the positive side there is only one dentry for each * directory inode. On the negative side this implies that we * to ensure our dentry is connected all the way up to the * filesystem root. */ if (result->d_flags & DCACHE_DISCONNECTED) { err = reconnect_path(mnt, result, nbuf); if (err) goto err_result; } if (!acceptable(context, result)) { err = -EACCES; goto err_result; } return result; } else { /* * It's not a directory. Life is a little more complicated. */ struct dentry *target_dir, *nresult; /* * See if either the dentry we just got from the filesystem * or any alias for it is acceptable. This is always true * if this filesystem is exported without the subtreecheck * option. If the filesystem is exported with the subtree * check option there's a fair chance we need to look at * the parent directory in the file handle and make sure * it's connected to the filesystem root. */ alias = find_acceptable_alias(result, acceptable, context); if (alias) return alias; /* * Try to extract a dentry for the parent directory from the * file handle. If this fails we'll have to give up. */ err = -ESTALE; if (!nop->fh_to_parent) goto err_result; target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, fh_len, fileid_type); if (!target_dir) goto err_result; err = PTR_ERR(target_dir); if (IS_ERR(target_dir)) goto err_result; /* * And as usual we need to make sure the parent directory is * connected to the filesystem root. The VFS really doesn't * like disconnected directories.. */ err = reconnect_path(mnt, target_dir, nbuf); if (err) { dput(target_dir); goto err_result; } /* * Now that we've got both a well-connected parent and a * dentry for the inode we're after, make sure that our * inode is actually connected to the parent. */ err = exportfs_get_name(mnt, target_dir, nbuf, result); if (err) { dput(target_dir); goto err_result; } inode_lock(target_dir->d_inode); nresult = lookup_one(mnt_idmap(mnt), nbuf, target_dir, strlen(nbuf)); if (!IS_ERR(nresult)) { if (unlikely(nresult->d_inode != result->d_inode)) { dput(nresult); nresult = ERR_PTR(-ESTALE); } } inode_unlock(target_dir->d_inode); /* * At this point we are done with the parent, but it's pinned * by the child dentry anyway. */ dput(target_dir); if (IS_ERR(nresult)) { err = PTR_ERR(nresult); goto err_result; } dput(result); result = nresult; /* * And finally make sure the dentry is actually acceptable * to NFSD. */ alias = find_acceptable_alias(result, acceptable, context); if (!alias) { err = -EACCES; goto err_result; } return alias; } err_result: dput(result); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw); struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *), void *context) { struct dentry *ret; ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type, 0, acceptable, context); if (IS_ERR_OR_NULL(ret)) { if (ret == ERR_PTR(-ENOMEM)) return ret; return ERR_PTR(-ESTALE); } return ret; } EXPORT_SYMBOL_GPL(exportfs_decode_fh); MODULE_DESCRIPTION("Code mapping from inodes to file handles"); MODULE_LICENSE("GPL");
2870 36 13925 6629 18113 4074 6473 169 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 /* SPDX-License-Identifier: GPL-2.0 */ /* * This header provides generic wrappers for memory access instrumentation that * the compiler cannot emit for: KASAN, KCSAN, KMSAN. */ #ifndef _LINUX_INSTRUMENTED_H #define _LINUX_INSTRUMENTED_H #include <linux/compiler.h> #include <linux/kasan-checks.h> #include <linux/kcsan-checks.h> #include <linux/kmsan-checks.h> #include <linux/types.h> /** * instrument_read - instrument regular read access * @v: address of access * @size: size of access * * Instrument a regular read access. The instrumentation should be inserted * before the actual read happens. */ static __always_inline void instrument_read(const volatile void *v, size_t size) { kasan_check_read(v, size); kcsan_check_read(v, size); } /** * instrument_write - instrument regular write access * @v: address of access * @size: size of access * * Instrument a regular write access. The instrumentation should be inserted * before the actual write happens. */ static __always_inline void instrument_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_write(v, size); } /** * instrument_read_write - instrument regular read-write access * @v: address of access * @size: size of access * * Instrument a regular write access. The instrumentation should be inserted * before the actual write happens. */ static __always_inline void instrument_read_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_read_write(v, size); } /** * instrument_atomic_read - instrument atomic read access * @v: address of access * @size: size of access * * Instrument an atomic read access. The instrumentation should be inserted * before the actual read happens. */ static __always_inline void instrument_atomic_read(const volatile void *v, size_t size) { kasan_check_read(v, size); kcsan_check_atomic_read(v, size); } /** * instrument_atomic_write - instrument atomic write access * @v: address of access * @size: size of access * * Instrument an atomic write access. The instrumentation should be inserted * before the actual write happens. */ static __always_inline void instrument_atomic_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_atomic_write(v, size); } /** * instrument_atomic_read_write - instrument atomic read-write access * @v: address of access * @size: size of access * * Instrument an atomic read-write access. The instrumentation should be * inserted before the actual write happens. */ static __always_inline void instrument_atomic_read_write(const volatile void *v, size_t size) { kasan_check_write(v, size); kcsan_check_atomic_read_write(v, size); } /** * instrument_copy_to_user - instrument reads of copy_to_user * @to: destination address * @from: source address * @n: number of bytes to copy * * Instrument reads from kernel memory, that are due to copy_to_user (and * variants). The instrumentation must be inserted before the accesses. */ static __always_inline void instrument_copy_to_user(void __user *to, const void *from, unsigned long n) { kasan_check_read(from, n); kcsan_check_read(from, n); kmsan_copy_to_user(to, from, n, 0); } /** * instrument_copy_from_user_before - add instrumentation before copy_from_user * @to: destination address * @from: source address * @n: number of bytes to copy * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted before the accesses. */ static __always_inline void instrument_copy_from_user_before(const void *to, const void __user *from, unsigned long n) { kasan_check_write(to, n); kcsan_check_write(to, n); } /** * instrument_copy_from_user_after - add instrumentation after copy_from_user * @to: destination address * @from: source address * @n: number of bytes to copy * @left: number of bytes not copied (as returned by copy_from_user) * * Instrument writes to kernel memory, that are due to copy_from_user (and * variants). The instrumentation should be inserted after the accesses. */ static __always_inline void instrument_copy_from_user_after(const void *to, const void __user *from, unsigned long n, unsigned long left) { kmsan_unpoison_memory(to, n - left); } /** * instrument_memcpy_before - add instrumentation before non-instrumented memcpy * @to: destination address * @from: source address * @n: number of bytes to copy * * Instrument memory accesses that happen in custom memcpy implementations. The * instrumentation should be inserted before the memcpy call. */ static __always_inline void instrument_memcpy_before(void *to, const void *from, unsigned long n) { kasan_check_write(to, n); kasan_check_read(from, n); kcsan_check_write(to, n); kcsan_check_read(from, n); } /** * instrument_memcpy_after - add instrumentation after non-instrumented memcpy * @to: destination address * @from: source address * @n: number of bytes to copy * @left: number of bytes not copied (if known) * * Instrument memory accesses that happen in custom memcpy implementations. The * instrumentation should be inserted after the memcpy call. */ static __always_inline void instrument_memcpy_after(void *to, const void *from, unsigned long n, unsigned long left) { kmsan_memmove(to, from, n - left); } /** * instrument_get_user() - add instrumentation to get_user()-like macros * @to: destination variable, may not be address-taken * * get_user() and friends are fragile, so it may depend on the implementation * whether the instrumentation happens before or after the data is copied from * the userspace. */ #define instrument_get_user(to) \ ({ \ u64 __tmp = (u64)(to); \ kmsan_unpoison_memory(&__tmp, sizeof(__tmp)); \ to = __tmp; \ }) /** * instrument_put_user() - add instrumentation to put_user()-like macros * @from: source address * @ptr: userspace pointer to copy to * @size: number of bytes to copy * * put_user() and friends are fragile, so it may depend on the implementation * whether the instrumentation happens before or after the data is copied from * the userspace. */ #define instrument_put_user(from, ptr, size) \ ({ \ kmsan_copy_to_user(ptr, &from, sizeof(from), 0); \ }) #endif /* _LINUX_INSTRUMENTED_H */
3 3 3 3 3 2 2 2 2 2 1 1 3 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_cgroup.c Control Group Classifier * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/rcupdate.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> #include <net/sock.h> #include <net/cls_cgroup.h> #include <net/tc_wrapper.h> struct cls_cgroup_head { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; struct rcu_work rwork; }; TC_INDIRECT_SCOPE int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { struct cls_cgroup_head *head = rcu_dereference_bh(tp->root); u32 classid = task_get_classid(skb); if (unlikely(!head)) return -1; if (!classid) return -1; if (!tcf_em_tree_match(skb, &head->ematches, NULL)) return -1; res->classid = classid; res->class = 0; return tcf_exts_exec(skb, &head->exts, res); } static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle) { return NULL; } static int cls_cgroup_init(struct tcf_proto *tp) { return 0; } static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; static void __cls_cgroup_destroy(struct cls_cgroup_head *head) { tcf_exts_destroy(&head->exts); tcf_em_tree_destroy(&head->ematches); tcf_exts_put_net(&head->exts); kfree(head); } static void cls_cgroup_destroy_work(struct work_struct *work) { struct cls_cgroup_head *head = container_of(to_rcu_work(work), struct cls_cgroup_head, rwork); rtnl_lock(); __cls_cgroup_destroy(head); rtnl_unlock(); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct cls_cgroup_head *new; int err; if (!tca[TCA_OPTIONS]) return -EINVAL; if (!head && !handle) return -EINVAL; if (head && handle != head->handle) return -ENOENT; new = kzalloc(sizeof(*head), GFP_KERNEL); if (!new) return -ENOBUFS; err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); if (err < 0) goto errout; new->handle = handle; new->tp = tp; err = nla_parse_nested_deprecated(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], cgroup_policy, NULL); if (err < 0) goto errout; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, flags, extack); if (err < 0) goto errout; err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches); if (err < 0) goto errout; rcu_assign_pointer(tp->root, new); if (head) { tcf_exts_get_net(&head->exts); tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); } return 0; errout: tcf_exts_destroy(&new->exts); kfree(new); return err; } static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ if (head) { if (tcf_exts_get_net(&head->exts)) tcf_queue_work(&head->rwork, cls_cgroup_destroy_work); else __cls_cgroup_destroy(head); } } static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); if (arg->count < arg->skip) goto skip; if (!head) return; if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; } skip: arg->count++; } static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct nlattr *nest; t->tcm_handle = head->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (tcf_exts_dump(skb, &head->exts) < 0 || tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &head->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct tcf_proto_ops cls_cgroup_ops __read_mostly = { .kind = "cgroup", .init = cls_cgroup_init, .change = cls_cgroup_change, .classify = cls_cgroup_classify, .destroy = cls_cgroup_destroy, .get = cls_cgroup_get, .delete = cls_cgroup_delete, .walk = cls_cgroup_walk, .dump = cls_cgroup_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("cgroup"); static int __init init_cgroup_cls(void) { return register_tcf_proto_ops(&cls_cgroup_ops); } static void __exit exit_cgroup_cls(void) { unregister_tcf_proto_ops(&cls_cgroup_ops); } module_init(init_cgroup_cls); module_exit(exit_cgroup_cls); MODULE_DESCRIPTION("TC cgroup classifier"); MODULE_LICENSE("GPL");
14 14 14 14 14 13 2 12 14 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 // SPDX-License-Identifier: GPL-2.0-only /* String matching match for iptables * * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net> */ #include <linux/gfp.h> #include <linux/init.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_string.h> #include <linux/textsearch.h> MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>"); MODULE_DESCRIPTION("Xtables: string-based matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); MODULE_ALIAS("ebt_string"); static bool string_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_string_info *conf = par->matchinfo; bool invert; invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT; return (skb_find_text((struct sk_buff *)skb, conf->from_offset, conf->to_offset, conf->config) != UINT_MAX) ^ invert; } #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m)) static int string_mt_check(const struct xt_mtchk_param *par) { struct xt_string_info *conf = par->matchinfo; struct ts_config *ts_conf; int flags = TS_AUTOLOAD; /* Damn, can't handle this case properly with iptables... */ if (conf->from_offset > conf->to_offset) return -EINVAL; if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0') return -EINVAL; if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) return -EINVAL; if (conf->u.v1.flags & ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT)) return -EINVAL; if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE) flags |= TS_IGNORECASE; ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, GFP_KERNEL, flags); if (IS_ERR(ts_conf)) return PTR_ERR(ts_conf); conf->config = ts_conf; return 0; } static void string_mt_destroy(const struct xt_mtdtor_param *par) { textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config); } static struct xt_match xt_string_mt_reg __read_mostly = { .name = "string", .revision = 1, .family = NFPROTO_UNSPEC, .checkentry = string_mt_check, .match = string_mt, .destroy = string_mt_destroy, .matchsize = sizeof(struct xt_string_info), .usersize = offsetof(struct xt_string_info, config), .me = THIS_MODULE, }; static int __init string_mt_init(void) { return xt_register_match(&xt_string_mt_reg); } static void __exit string_mt_exit(void) { xt_unregister_match(&xt_string_mt_reg); } module_init(string_mt_init); module_exit(string_mt_exit);
10 6 6 34 33 10 10 10 53 53 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 // SPDX-License-Identifier: GPL-2.0 /* * dax: direct host memory access * Copyright (C) 2020 Red Hat, Inc. */ #include "fuse_i.h" #include <linux/delay.h> #include <linux/dax.h> #include <linux/uio.h> #include <linux/pagemap.h> #include <linux/pfn_t.h> #include <linux/iomap.h> #include <linux/interval_tree.h> /* * Default memory range size. A power of 2 so it agrees with common FUSE_INIT * map_alignment values 4KB and 64KB. */ #define FUSE_DAX_SHIFT 21 #define FUSE_DAX_SZ (1 << FUSE_DAX_SHIFT) #define FUSE_DAX_PAGES (FUSE_DAX_SZ / PAGE_SIZE) /* Number of ranges reclaimer will try to free in one invocation */ #define FUSE_DAX_RECLAIM_CHUNK (10) /* * Dax memory reclaim threshold in percetage of total ranges. When free * number of free ranges drops below this threshold, reclaim can trigger * Default is 20% */ #define FUSE_DAX_RECLAIM_THRESHOLD (20) /** Translation information for file offsets to DAX window offsets */ struct fuse_dax_mapping { /* Pointer to inode where this memory range is mapped */ struct inode *inode; /* Will connect in fcd->free_ranges to keep track of free memory */ struct list_head list; /* For interval tree in file/inode */ struct interval_tree_node itn; /* Will connect in fc->busy_ranges to keep track busy memory */ struct list_head busy_list; /** Position in DAX window */ u64 window_offset; /** Length of mapping, in bytes */ loff_t length; /* Is this mapping read-only or read-write */ bool writable; /* reference count when the mapping is used by dax iomap. */ refcount_t refcnt; }; /* Per-inode dax map */ struct fuse_inode_dax { /* Semaphore to protect modifications to the dmap tree */ struct rw_semaphore sem; /* Sorted rb tree of struct fuse_dax_mapping elements */ struct rb_root_cached tree; unsigned long nr; }; struct fuse_conn_dax { /* DAX device */ struct dax_device *dev; /* Lock protecting accessess to members of this structure */ spinlock_t lock; /* List of memory ranges which are busy */ unsigned long nr_busy_ranges; struct list_head busy_ranges; /* Worker to free up memory ranges */ struct delayed_work free_work; /* Wait queue for a dax range to become free */ wait_queue_head_t range_waitq; /* DAX Window Free Ranges */ long nr_free_ranges; struct list_head free_ranges; unsigned long nr_ranges; }; static inline struct fuse_dax_mapping * node_to_dmap(struct interval_tree_node *node) { if (!node) return NULL; return container_of(node, struct fuse_dax_mapping, itn); } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode); static void __kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { unsigned long free_threshold; /* If number of free ranges are below threshold, start reclaim */ free_threshold = max_t(unsigned long, fcd->nr_ranges * FUSE_DAX_RECLAIM_THRESHOLD / 100, 1); if (fcd->nr_free_ranges < free_threshold) queue_delayed_work(system_long_wq, &fcd->free_work, msecs_to_jiffies(delay_ms)); } static void kick_dmap_free_worker(struct fuse_conn_dax *fcd, unsigned long delay_ms) { spin_lock(&fcd->lock); __kick_dmap_free_worker(fcd, delay_ms); spin_unlock(&fcd->lock); } static struct fuse_dax_mapping *alloc_dax_mapping(struct fuse_conn_dax *fcd) { struct fuse_dax_mapping *dmap; spin_lock(&fcd->lock); dmap = list_first_entry_or_null(&fcd->free_ranges, struct fuse_dax_mapping, list); if (dmap) { list_del_init(&dmap->list); WARN_ON(fcd->nr_free_ranges <= 0); fcd->nr_free_ranges--; } __kick_dmap_free_worker(fcd, 0); spin_unlock(&fcd->lock); return dmap; } /* This assumes fcd->lock is held */ static void __dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_del_init(&dmap->busy_list); WARN_ON(fcd->nr_busy_ranges == 0); fcd->nr_busy_ranges--; } static void dmap_remove_busy_list(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { spin_lock(&fcd->lock); __dmap_remove_busy_list(fcd, dmap); spin_unlock(&fcd->lock); } /* This assumes fcd->lock is held */ static void __dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { list_add_tail(&dmap->list, &fcd->free_ranges); fcd->nr_free_ranges++; wake_up(&fcd->range_waitq); } static void dmap_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { /* Return fuse_dax_mapping to free list */ spin_lock(&fcd->lock); __dmap_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); } static int fuse_setup_one_mapping(struct inode *inode, unsigned long start_idx, struct fuse_dax_mapping *dmap, bool writable, bool upgrade) { struct fuse_mount *fm = get_fuse_mount(inode); struct fuse_conn_dax *fcd = fm->fc->dax; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_setupmapping_in inarg; loff_t offset = start_idx << FUSE_DAX_SHIFT; FUSE_ARGS(args); ssize_t err; WARN_ON(fcd->nr_free_ranges < 0); /* Ask fuse daemon to setup mapping */ memset(&inarg, 0, sizeof(inarg)); inarg.foffset = offset; inarg.fh = -1; inarg.moffset = dmap->window_offset; inarg.len = FUSE_DAX_SZ; inarg.flags |= FUSE_SETUPMAPPING_FLAG_READ; if (writable) inarg.flags |= FUSE_SETUPMAPPING_FLAG_WRITE; args.opcode = FUSE_SETUPMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 1; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; err = fuse_simple_request(fm, &args); if (err < 0) return err; dmap->writable = writable; if (!upgrade) { /* * We don't take a reference on inode. inode is valid right now * and when inode is going away, cleanup logic should first * cleanup dmap entries. */ dmap->inode = inode; dmap->itn.start = dmap->itn.last = start_idx; /* Protected by fi->dax->sem */ interval_tree_insert(&dmap->itn, &fi->dax->tree); fi->dax->nr++; spin_lock(&fcd->lock); list_add_tail(&dmap->busy_list, &fcd->busy_ranges); fcd->nr_busy_ranges++; spin_unlock(&fcd->lock); } return 0; } static int fuse_send_removemapping(struct inode *inode, struct fuse_removemapping_in *inargp, struct fuse_removemapping_one *remove_one) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); args.opcode = FUSE_REMOVEMAPPING; args.nodeid = fi->nodeid; args.in_numargs = 3; fuse_set_zero_arg0(&args); args.in_args[1].size = sizeof(*inargp); args.in_args[1].value = inargp; args.in_args[2].size = inargp->count * sizeof(*remove_one); args.in_args[2].value = remove_one; return fuse_simple_request(fm, &args); } static int dmap_removemapping_list(struct inode *inode, unsigned int num, struct list_head *to_remove) { struct fuse_removemapping_one *remove_one, *ptr; struct fuse_removemapping_in inarg; struct fuse_dax_mapping *dmap; int ret, i = 0, nr_alloc; nr_alloc = min_t(unsigned int, num, FUSE_REMOVEMAPPING_MAX_ENTRY); remove_one = kmalloc_array(nr_alloc, sizeof(*remove_one), GFP_NOFS); if (!remove_one) return -ENOMEM; ptr = remove_one; list_for_each_entry(dmap, to_remove, list) { ptr->moffset = dmap->window_offset; ptr->len = dmap->length; ptr++; i++; num--; if (i >= nr_alloc || num == 0) { memset(&inarg, 0, sizeof(inarg)); inarg.count = i; ret = fuse_send_removemapping(inode, &inarg, remove_one); if (ret) goto out; ptr = remove_one; i = 0; } } out: kfree(remove_one); return ret; } /* * Cleanup dmap entry and add back to free list. This should be called with * fcd->lock held. */ static void dmap_reinit_add_to_free_pool(struct fuse_conn_dax *fcd, struct fuse_dax_mapping *dmap) { pr_debug("fuse: freeing memory range start_idx=0x%lx end_idx=0x%lx window_offset=0x%llx length=0x%llx\n", dmap->itn.start, dmap->itn.last, dmap->window_offset, dmap->length); __dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; __dmap_add_to_free_pool(fcd, dmap); } /* * Free inode dmap entries whose range falls inside [start, end]. * Does not take any locks. At this point of time it should only be * called from evict_inode() path where we know all dmap entries can be * reclaimed. */ static void inode_reclaim_dmap_range(struct fuse_conn_dax *fcd, struct inode *inode, loff_t start, loff_t end) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap, *n; int err, num = 0; LIST_HEAD(to_remove); unsigned long start_idx = start >> FUSE_DAX_SHIFT; unsigned long end_idx = end >> FUSE_DAX_SHIFT; struct interval_tree_node *node; while (1) { node = interval_tree_iter_first(&fi->dax->tree, start_idx, end_idx); if (!node) break; dmap = node_to_dmap(node); /* inode is going away. There should not be any users of dmap */ WARN_ON(refcount_read(&dmap->refcnt) > 1); interval_tree_remove(&dmap->itn, &fi->dax->tree); num++; list_add(&dmap->list, &to_remove); } /* Nothing to remove */ if (list_empty(&to_remove)) return; WARN_ON(fi->dax->nr < num); fi->dax->nr -= num; err = dmap_removemapping_list(inode, num, &to_remove); if (err && err != -ENOTCONN) { pr_warn("Failed to removemappings. start=0x%llx end=0x%llx\n", start, end); } spin_lock(&fcd->lock); list_for_each_entry_safe(dmap, n, &to_remove, list) { list_del_init(&dmap->list); dmap_reinit_add_to_free_pool(fcd, dmap); } spin_unlock(&fcd->lock); } static int dmap_removemapping_one(struct inode *inode, struct fuse_dax_mapping *dmap) { struct fuse_removemapping_one forget_one; struct fuse_removemapping_in inarg; memset(&inarg, 0, sizeof(inarg)); inarg.count = 1; memset(&forget_one, 0, sizeof(forget_one)); forget_one.moffset = dmap->window_offset; forget_one.len = dmap->length; return fuse_send_removemapping(inode, &inarg, &forget_one); } /* * It is called from evict_inode() and by that time inode is going away. So * this function does not take any locks like fi->dax->sem for traversing * that fuse inode interval tree. If that lock is taken then lock validator * complains of deadlock situation w.r.t fs_reclaim lock. */ void fuse_dax_inode_cleanup(struct inode *inode) { struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_inode *fi = get_fuse_inode(inode); /* * fuse_evict_inode() has already called truncate_inode_pages_final() * before we arrive here. So we should not have to worry about any * pages/exception entries still associated with inode. */ inode_reclaim_dmap_range(fc->dax, inode, 0, -1); WARN_ON(fi->dax->nr); } static void fuse_fill_iomap_hole(struct iomap *iomap, loff_t length) { iomap->addr = IOMAP_NULL_ADDR; iomap->length = length; iomap->type = IOMAP_HOLE; } static void fuse_fill_iomap(struct inode *inode, loff_t pos, loff_t length, struct iomap *iomap, struct fuse_dax_mapping *dmap, unsigned int flags) { loff_t offset, len; loff_t i_size = i_size_read(inode); offset = pos - (dmap->itn.start << FUSE_DAX_SHIFT); len = min(length, dmap->length - offset); /* If length is beyond end of file, truncate further */ if (pos + len > i_size) len = i_size - pos; if (len > 0) { iomap->addr = dmap->window_offset + offset; iomap->length = len; if (flags & IOMAP_FAULT) iomap->length = ALIGN(len, PAGE_SIZE); iomap->type = IOMAP_MAPPED; /* * increace refcnt so that reclaim code knows this dmap is in * use. This assumes fi->dax->sem mutex is held either * shared/exclusive. */ refcount_inc(&dmap->refcnt); /* iomap->private should be NULL */ WARN_ON_ONCE(iomap->private); iomap->private = dmap; } else { /* Mapping beyond end of file is hole */ fuse_fill_iomap_hole(iomap, length); } } static int fuse_setup_new_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; struct fuse_dax_mapping *dmap, *alloc_dmap = NULL; int ret; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Can't do inline reclaim in fault path. We call * dax_layout_busy_page() before we free a range. And * fuse_wait_dax_page() drops mapping->invalidate_lock and requires it. * In fault path we enter with mapping->invalidate_lock held and can't * drop it. Also in fault path we hold mapping->invalidate_lock shared * and not exclusive, so that creates further issues with * fuse_wait_dax_page(). Hence return -EAGAIN and fuse_dax_fault() * will wait for a memory range to become free and retry. */ if (flags & IOMAP_FAULT) { alloc_dmap = alloc_dax_mapping(fcd); if (!alloc_dmap) return -EAGAIN; } else { alloc_dmap = alloc_dax_mapping_reclaim(fcd, inode); if (IS_ERR(alloc_dmap)) return PTR_ERR(alloc_dmap); } /* If we are here, we should have memory allocated */ if (WARN_ON(!alloc_dmap)) return -EIO; /* * Take write lock so that only one caller can try to setup mapping * and other waits. */ down_write(&fi->dax->sem); /* * We dropped lock. Check again if somebody else setup * mapping already. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return 0; } /* Setup one mapping */ ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, alloc_dmap, writable, false); if (ret < 0) { dmap_add_to_free_pool(fcd, alloc_dmap); up_write(&fi->dax->sem); return ret; } fuse_fill_iomap(inode, pos, length, iomap, alloc_dmap, flags); up_write(&fi->dax->sem); return 0; } static int fuse_upgrade_dax_mapping(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; int ret; unsigned long idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* * Take exclusive lock so that only one caller can try to setup * mapping and others wait. */ down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, idx, idx); /* We are holding either inode lock or invalidate_lock, and that should * ensure that dmap can't be truncated. We are holding a reference * on dmap and that should make sure it can't be reclaimed. So dmap * should still be there in tree despite the fact we dropped and * re-acquired the fi->dax->sem lock. */ ret = -EIO; if (WARN_ON(!node)) goto out_err; dmap = node_to_dmap(node); /* We took an extra reference on dmap to make sure its not reclaimd. * Now we hold fi->dax->sem lock and that reference is not needed * anymore. Drop it. */ if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } /* Maybe another thread already upgraded mapping while we were not * holding lock. */ if (dmap->writable) { ret = 0; goto out_fill_iomap; } ret = fuse_setup_one_mapping(inode, pos >> FUSE_DAX_SHIFT, dmap, true, true); if (ret < 0) goto out_err; out_fill_iomap: fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); out_err: up_write(&fi->dax->sem); return ret; } /* This is just for DAX and the mapping is ephemeral, do not use it for other * purposes since there is no block device with a permanent mapping. */ static int fuse_iomap_begin(struct inode *inode, loff_t pos, loff_t length, unsigned int flags, struct iomap *iomap, struct iomap *srcmap) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_dax_mapping *dmap; bool writable = flags & IOMAP_WRITE; unsigned long start_idx = pos >> FUSE_DAX_SHIFT; struct interval_tree_node *node; /* We don't support FIEMAP */ if (WARN_ON(flags & IOMAP_REPORT)) return -EIO; iomap->offset = pos; iomap->flags = 0; iomap->bdev = NULL; iomap->dax_dev = fc->dax->dev; /* * Both read/write and mmap path can race here. So we need something * to make sure if we are setting up mapping, then other path waits * * For now, use a semaphore for this. It probably needs to be * optimized later. */ down_read(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); if (node) { dmap = node_to_dmap(node); if (writable && !dmap->writable) { /* Upgrade read-only mapping to read-write. This will * require exclusive fi->dax->sem lock as we don't want * two threads to be trying to this simultaneously * for same dmap. So drop shared lock and acquire * exclusive lock. * * Before dropping fi->dax->sem lock, take reference * on dmap so that its not freed by range reclaim. */ refcount_inc(&dmap->refcnt); up_read(&fi->dax->sem); pr_debug("%s: Upgrading mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); return fuse_upgrade_dax_mapping(inode, pos, length, flags, iomap); } else { fuse_fill_iomap(inode, pos, length, iomap, dmap, flags); up_read(&fi->dax->sem); return 0; } } else { up_read(&fi->dax->sem); pr_debug("%s: no mapping at offset 0x%llx length 0x%llx\n", __func__, pos, length); if (pos >= i_size_read(inode)) goto iomap_hole; return fuse_setup_new_dax_mapping(inode, pos, length, flags, iomap); } /* * If read beyond end of file happens, fs code seems to return * it as hole */ iomap_hole: fuse_fill_iomap_hole(iomap, length); pr_debug("%s returning hole mapping. pos=0x%llx length_asked=0x%llx length_returned=0x%llx\n", __func__, pos, length, iomap->length); return 0; } static int fuse_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned int flags, struct iomap *iomap) { struct fuse_dax_mapping *dmap = iomap->private; if (dmap) { if (refcount_dec_and_test(&dmap->refcnt)) { /* refcount should not hit 0. This object only goes * away when fuse connection goes away */ WARN_ON_ONCE(1); } } /* DAX writes beyond end-of-file aren't handled using iomap, so the * file size is unchanged and there is nothing to do here. */ return 0; } static const struct iomap_ops fuse_iomap_ops = { .iomap_begin = fuse_iomap_begin, .iomap_end = fuse_iomap_end, }; static void fuse_wait_dax_page(struct inode *inode) { filemap_invalidate_unlock(inode->i_mapping); schedule(); filemap_invalidate_lock(inode->i_mapping); } /* Should be called with mapping->invalidate_lock held exclusively. */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { return dax_break_layout(inode, dmap_start, dmap_end, fuse_wait_dax_page); } ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock_shared(inode)) return -EAGAIN; } else { inode_lock_shared(inode); } ret = dax_iomap_rw(iocb, to, &fuse_iomap_ops); inode_unlock_shared(inode); /* TODO file_accessed(iocb->f_filp) */ return ret; } static bool file_extending_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); return (iov_iter_rw(from) == WRITE && ((iocb->ki_pos) >= i_size_read(inode) || (iocb->ki_pos + iov_iter_count(from) > i_size_read(inode)))); } static ssize_t fuse_dax_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb); ssize_t ret; ret = fuse_direct_io(&io, from, &iocb->ki_pos, FUSE_DIO_WRITE); fuse_write_update_attr(inode, iocb->ki_pos, ret); return ret; } ssize_t fuse_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t ret; if (iocb->ki_flags & IOCB_NOWAIT) { if (!inode_trylock(inode)) return -EAGAIN; } else { inode_lock(inode); } ret = generic_write_checks(iocb, from); if (ret <= 0) goto out; ret = file_remove_privs(iocb->ki_filp); if (ret) goto out; /* TODO file_update_time() but we don't want metadata I/O */ /* Do not use dax for file extending writes as write and on * disk i_size increase are not atomic otherwise. */ if (file_extending_write(iocb, from)) ret = fuse_dax_direct_write(iocb, from); else ret = dax_iomap_rw(iocb, from, &fuse_iomap_ops); out: inode_unlock(inode); if (ret > 0) ret = generic_write_sync(iocb, ret); return ret; } static vm_fault_t __fuse_dax_fault(struct vm_fault *vmf, unsigned int order, bool write) { vm_fault_t ret; struct inode *inode = file_inode(vmf->vma->vm_file); struct super_block *sb = inode->i_sb; pfn_t pfn; int error = 0; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_conn_dax *fcd = fc->dax; bool retry = false; if (write) sb_start_pagefault(sb); retry: if (retry && !(fcd->nr_free_ranges > 0)) wait_event(fcd->range_waitq, (fcd->nr_free_ranges > 0)); /* * We need to serialize against not only truncate but also against * fuse dax memory range reclaim. While a range is being reclaimed, * we do not want any read/write/mmap to make progress and try * to populate page cache or access memory we are trying to free. */ filemap_invalidate_lock_shared(inode->i_mapping); ret = dax_iomap_fault(vmf, order, &pfn, &error, &fuse_iomap_ops); if ((ret & VM_FAULT_ERROR) && error == -EAGAIN) { error = 0; retry = true; filemap_invalidate_unlock_shared(inode->i_mapping); goto retry; } if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, order, pfn); filemap_invalidate_unlock_shared(inode->i_mapping); if (write) sb_end_pagefault(sb); return ret; } static vm_fault_t fuse_dax_fault(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_huge_fault(struct vm_fault *vmf, unsigned int order) { return __fuse_dax_fault(vmf, order, vmf->flags & FAULT_FLAG_WRITE); } static vm_fault_t fuse_dax_page_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static vm_fault_t fuse_dax_pfn_mkwrite(struct vm_fault *vmf) { return __fuse_dax_fault(vmf, 0, true); } static const struct vm_operations_struct fuse_dax_vm_ops = { .fault = fuse_dax_fault, .huge_fault = fuse_dax_huge_fault, .page_mkwrite = fuse_dax_page_mkwrite, .pfn_mkwrite = fuse_dax_pfn_mkwrite, }; int fuse_dax_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &fuse_dax_vm_ops; vm_flags_set(vma, VM_MIXEDMAP | VM_HUGEPAGE); return 0; } static int dmap_writeback_invalidate(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; loff_t start_pos = dmap->itn.start << FUSE_DAX_SHIFT; loff_t end_pos = (start_pos + FUSE_DAX_SZ - 1); ret = filemap_fdatawrite_range(inode->i_mapping, start_pos, end_pos); if (ret) { pr_debug("fuse: filemap_fdatawrite_range() failed. err=%d start_pos=0x%llx, end_pos=0x%llx\n", ret, start_pos, end_pos); return ret; } ret = invalidate_inode_pages2_range(inode->i_mapping, start_pos >> PAGE_SHIFT, end_pos >> PAGE_SHIFT); if (ret) pr_debug("fuse: invalidate_inode_pages2_range() failed err=%d\n", ret); return ret; } static int reclaim_one_dmap_locked(struct inode *inode, struct fuse_dax_mapping *dmap) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); /* * igrab() was done to make sure inode won't go under us, and this * further avoids the race with evict(). */ ret = dmap_writeback_invalidate(inode, dmap); if (ret) return ret; /* Remove dax mapping from inode interval tree now */ interval_tree_remove(&dmap->itn, &fi->dax->tree); fi->dax->nr--; /* It is possible that umount/shutdown has killed the fuse connection * and worker thread is trying to reclaim memory in parallel. Don't * warn in that case. */ ret = dmap_removemapping_one(inode, dmap); if (ret && ret != -ENOTCONN) { pr_warn("Failed to remove mapping. offset=0x%llx len=0x%llx ret=%d\n", dmap->window_offset, dmap->length, ret); } return 0; } /* Find first mapped dmap for an inode and return file offset. Caller needs * to hold fi->dax->sem lock either shared or exclusive. */ static struct fuse_dax_mapping *inode_lookup_first_dmap(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; for (node = interval_tree_iter_first(&fi->dax->tree, 0, -1); node; node = interval_tree_iter_next(node, 0, -1)) { dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) continue; return dmap; } return NULL; } /* * Find first mapping in the tree and free it and return it. Do not add * it back to free pool. */ static struct fuse_dax_mapping * inode_inline_reclaim_one_dmap(struct fuse_conn_dax *fcd, struct inode *inode, bool *retry) { struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; u64 dmap_start, dmap_end; unsigned long start_idx; int ret; struct interval_tree_node *node; filemap_invalidate_lock(inode->i_mapping); /* Lookup a dmap and corresponding file offset to reclaim. */ down_read(&fi->dax->sem); dmap = inode_lookup_first_dmap(inode); if (dmap) { start_idx = dmap->itn.start; dmap_start = start_idx << FUSE_DAX_SHIFT; dmap_end = dmap_start + FUSE_DAX_SZ - 1; } up_read(&fi->dax->sem); if (!dmap) goto out_mmap_sem; /* * Make sure there are no references to inode pages using * get_user_pages() */ ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("fuse: fuse_dax_break_layouts() failed. err=%d\n", ret); dmap = ERR_PTR(ret); goto out_mmap_sem; } down_write(&fi->dax->sem); node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got reclaimed by somebody else */ if (!node) { if (retry) *retry = true; goto out_write_dmap_sem; } dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) { dmap = NULL; if (retry) *retry = true; goto out_write_dmap_sem; } ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) { dmap = ERR_PTR(ret); goto out_write_dmap_sem; } /* Clean up dmap. Do not add back to free list */ dmap_remove_busy_list(fcd, dmap); dmap->inode = NULL; dmap->itn.start = dmap->itn.last = 0; pr_debug("fuse: %s: inline reclaimed memory range. inode=%p, window_offset=0x%llx, length=0x%llx\n", __func__, inode, dmap->window_offset, dmap->length); out_write_dmap_sem: up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return dmap; } static struct fuse_dax_mapping * alloc_dax_mapping_reclaim(struct fuse_conn_dax *fcd, struct inode *inode) { struct fuse_dax_mapping *dmap; struct fuse_inode *fi = get_fuse_inode(inode); while (1) { bool retry = false; dmap = alloc_dax_mapping(fcd); if (dmap) return dmap; dmap = inode_inline_reclaim_one_dmap(fcd, inode, &retry); /* * Either we got a mapping or it is an error, return in both * the cases. */ if (dmap) return dmap; /* If we could not reclaim a mapping because it * had a reference or some other temporary failure, * Try again. We want to give up inline reclaim only * if there is no range assigned to this node. Otherwise * if a deadlock is possible if we sleep with * mapping->invalidate_lock held and worker to free memory * can't make progress due to unavailability of * mapping->invalidate_lock. So sleep only if fi->dax->nr=0 */ if (retry) continue; /* * There are no mappings which can be reclaimed. Wait for one. * We are not holding fi->dax->sem. So it is possible * that range gets added now. But as we are not holding * mapping->invalidate_lock, worker should still be able to * free up a range and wake us up. */ if (!fi->dax->nr && !(fcd->nr_free_ranges > 0)) { if (wait_event_killable_exclusive(fcd->range_waitq, (fcd->nr_free_ranges > 0))) { return ERR_PTR(-EINTR); } } } } static int lookup_and_reclaim_dmap_locked(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_dax_mapping *dmap; struct interval_tree_node *node; /* Find fuse dax mapping at file offset inode. */ node = interval_tree_iter_first(&fi->dax->tree, start_idx, start_idx); /* Range already got cleaned up by somebody else */ if (!node) return 0; dmap = node_to_dmap(node); /* still in use. */ if (refcount_read(&dmap->refcnt) > 1) return 0; ret = reclaim_one_dmap_locked(inode, dmap); if (ret < 0) return ret; /* Cleanup dmap entry and add back to free list */ spin_lock(&fcd->lock); dmap_reinit_add_to_free_pool(fcd, dmap); spin_unlock(&fcd->lock); return ret; } /* * Free a range of memory. * Locking: * 1. Take mapping->invalidate_lock to block dax faults. * 2. Take fi->dax->sem to protect interval tree and also to make sure * read/write can not reuse a dmap which we might be freeing. */ static int lookup_and_reclaim_dmap(struct fuse_conn_dax *fcd, struct inode *inode, unsigned long start_idx, unsigned long end_idx) { int ret; struct fuse_inode *fi = get_fuse_inode(inode); loff_t dmap_start = start_idx << FUSE_DAX_SHIFT; loff_t dmap_end = (dmap_start + FUSE_DAX_SZ) - 1; filemap_invalidate_lock(inode->i_mapping); ret = fuse_dax_break_layouts(inode, dmap_start, dmap_end); if (ret) { pr_debug("virtio_fs: fuse_dax_break_layouts() failed. err=%d\n", ret); goto out_mmap_sem; } down_write(&fi->dax->sem); ret = lookup_and_reclaim_dmap_locked(fcd, inode, start_idx); up_write(&fi->dax->sem); out_mmap_sem: filemap_invalidate_unlock(inode->i_mapping); return ret; } static int try_to_free_dmap_chunks(struct fuse_conn_dax *fcd, unsigned long nr_to_free) { struct fuse_dax_mapping *dmap, *pos, *temp; int ret, nr_freed = 0; unsigned long start_idx = 0, end_idx = 0; struct inode *inode = NULL; /* Pick first busy range and free it for now*/ while (1) { if (nr_freed >= nr_to_free) break; dmap = NULL; spin_lock(&fcd->lock); if (!fcd->nr_busy_ranges) { spin_unlock(&fcd->lock); return 0; } list_for_each_entry_safe(pos, temp, &fcd->busy_ranges, busy_list) { /* skip this range if it's in use. */ if (refcount_read(&pos->refcnt) > 1) continue; inode = igrab(pos->inode); /* * This inode is going away. That will free * up all the ranges anyway, continue to * next range. */ if (!inode) continue; /* * Take this element off list and add it tail. If * this element can't be freed, it will help with * selecting new element in next iteration of loop. */ dmap = pos; list_move_tail(&dmap->busy_list, &fcd->busy_ranges); start_idx = end_idx = dmap->itn.start; break; } spin_unlock(&fcd->lock); if (!dmap) return 0; ret = lookup_and_reclaim_dmap(fcd, inode, start_idx, end_idx); iput(inode); if (ret) return ret; nr_freed++; } return 0; } static void fuse_dax_free_mem_worker(struct work_struct *work) { int ret; struct fuse_conn_dax *fcd = container_of(work, struct fuse_conn_dax, free_work.work); ret = try_to_free_dmap_chunks(fcd, FUSE_DAX_RECLAIM_CHUNK); if (ret) { pr_debug("fuse: try_to_free_dmap_chunks() failed with err=%d\n", ret); } /* If number of free ranges are still below threshold, requeue */ kick_dmap_free_worker(fcd, 1); } static void fuse_free_dax_mem_ranges(struct list_head *mem_list) { struct fuse_dax_mapping *range, *temp; /* Free All allocated elements */ list_for_each_entry_safe(range, temp, mem_list, list) { list_del(&range->list); if (!list_empty(&range->busy_list)) list_del(&range->busy_list); kfree(range); } } void fuse_dax_conn_free(struct fuse_conn *fc) { if (fc->dax) { fuse_free_dax_mem_ranges(&fc->dax->free_ranges); kfree(fc->dax); fc->dax = NULL; } } static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) { long nr_pages, nr_ranges; struct fuse_dax_mapping *range; int ret, id; size_t dax_size = -1; unsigned long i; init_waitqueue_head(&fcd->range_waitq); INIT_LIST_HEAD(&fcd->free_ranges); INIT_LIST_HEAD(&fcd->busy_ranges); INIT_DELAYED_WORK(&fcd->free_work, fuse_dax_free_mem_worker); id = dax_read_lock(); nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), DAX_ACCESS, NULL, NULL); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); return nr_pages; } nr_ranges = nr_pages/FUSE_DAX_PAGES; pr_debug("%s: dax mapped %ld pages. nr_ranges=%ld\n", __func__, nr_pages, nr_ranges); for (i = 0; i < nr_ranges; i++) { range = kzalloc(sizeof(struct fuse_dax_mapping), GFP_KERNEL); ret = -ENOMEM; if (!range) goto out_err; /* TODO: This offset only works if virtio-fs driver is not * having some memory hidden at the beginning. This needs * better handling */ range->window_offset = i * FUSE_DAX_SZ; range->length = FUSE_DAX_SZ; INIT_LIST_HEAD(&range->busy_list); refcount_set(&range->refcnt, 1); list_add_tail(&range->list, &fcd->free_ranges); } fcd->nr_free_ranges = nr_ranges; fcd->nr_ranges = nr_ranges; return 0; out_err: /* Free All allocated elements */ fuse_free_dax_mem_ranges(&fcd->free_ranges); return ret; } int fuse_dax_conn_alloc(struct fuse_conn *fc, enum fuse_dax_mode dax_mode, struct dax_device *dax_dev) { struct fuse_conn_dax *fcd; int err; fc->dax_mode = dax_mode; if (!dax_dev) return 0; fcd = kzalloc(sizeof(*fcd), GFP_KERNEL); if (!fcd) return -ENOMEM; spin_lock_init(&fcd->lock); fcd->dev = dax_dev; err = fuse_dax_mem_range_init(fcd); if (err) { kfree(fcd); return err; } fc->dax = fcd; return 0; } bool fuse_dax_inode_alloc(struct super_block *sb, struct fuse_inode *fi) { struct fuse_conn *fc = get_fuse_conn_super(sb); fi->dax = NULL; if (fc->dax) { fi->dax = kzalloc(sizeof(*fi->dax), GFP_KERNEL_ACCOUNT); if (!fi->dax) return false; init_rwsem(&fi->dax->sem); fi->dax->tree = RB_ROOT_CACHED; } return true; } static const struct address_space_operations fuse_dax_file_aops = { .direct_IO = noop_direct_IO, .dirty_folio = noop_dirty_folio, }; static bool fuse_should_enable_dax(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); enum fuse_dax_mode dax_mode = fc->dax_mode; if (dax_mode == FUSE_DAX_NEVER) return false; /* * fc->dax may be NULL in 'inode' mode when filesystem device doesn't * support DAX, in which case it will silently fallback to 'never' mode. */ if (!fc->dax) return false; if (dax_mode == FUSE_DAX_ALWAYS) return true; /* dax_mode is FUSE_DAX_INODE* */ return fc->inode_dax && (flags & FUSE_ATTR_DAX); } void fuse_dax_inode_init(struct inode *inode, unsigned int flags) { if (!fuse_should_enable_dax(inode, flags)) return; inode->i_flags |= S_DAX; inode->i_data.a_ops = &fuse_dax_file_aops; } void fuse_dax_dontcache(struct inode *inode, unsigned int flags) { struct fuse_conn *fc = get_fuse_conn(inode); if (fuse_is_inode_dax_mode(fc->dax_mode) && ((bool) IS_DAX(inode) != (bool) (flags & FUSE_ATTR_DAX))) d_mark_dontcache(inode); } bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment) { if (fc->dax && (map_alignment > FUSE_DAX_SHIFT)) { pr_warn("FUSE: map_alignment %u incompatible with dax mem range size %u\n", map_alignment, FUSE_DAX_SZ); return false; } return true; } void fuse_dax_cancel_work(struct fuse_conn *fc) { struct fuse_conn_dax *fcd = fc->dax; if (fcd) cancel_delayed_work_sync(&fcd->free_work); } EXPORT_SYMBOL_GPL(fuse_dax_cancel_work);
4 4 4 4 4 4 4 4 3 3 3 3 2 2 1 2 3 3 3 1 3 3 4 1 1 1 3 4 4 4 4 3 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 /* * Cryptographic API. * * Whirlpool hashing Algorithm * * The Whirlpool algorithm was developed by Paulo S. L. M. Barreto and * Vincent Rijmen. It has been selected as one of cryptographic * primitives by the NESSIE project http://www.cryptonessie.org/ * * The original authors have disclaimed all copyright interest in this * code and thus put it in the public domain. The subsequent authors * have put this under the GNU General Public License. * * By Aaron Grothe ajgrothe@yahoo.com, August 23, 2004 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * */ #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <asm/byteorder.h> #include <linux/types.h> #define WP512_DIGEST_SIZE 64 #define WP384_DIGEST_SIZE 48 #define WP256_DIGEST_SIZE 32 #define WP512_BLOCK_SIZE 64 #define WP512_LENGTHBYTES 32 #define WHIRLPOOL_ROUNDS 10 struct wp512_ctx { u8 bitLength[WP512_LENGTHBYTES]; u8 buffer[WP512_BLOCK_SIZE]; int bufferBits; int bufferPos; u64 hash[WP512_DIGEST_SIZE/8]; }; /* * Though Whirlpool is endianness-neutral, the encryption tables are listed * in BIG-ENDIAN format, which is adopted throughout this implementation * (but little-endian notation would be equally suitable if consistently * employed). */ static const u64 C0[256] = { 0x18186018c07830d8ULL, 0x23238c2305af4626ULL, 0xc6c63fc67ef991b8ULL, 0xe8e887e8136fcdfbULL, 0x878726874ca113cbULL, 0xb8b8dab8a9626d11ULL, 0x0101040108050209ULL, 0x4f4f214f426e9e0dULL, 0x3636d836adee6c9bULL, 0xa6a6a2a6590451ffULL, 0xd2d26fd2debdb90cULL, 0xf5f5f3f5fb06f70eULL, 0x7979f979ef80f296ULL, 0x6f6fa16f5fcede30ULL, 0x91917e91fcef3f6dULL, 0x52525552aa07a4f8ULL, 0x60609d6027fdc047ULL, 0xbcbccabc89766535ULL, 0x9b9b569baccd2b37ULL, 0x8e8e028e048c018aULL, 0xa3a3b6a371155bd2ULL, 0x0c0c300c603c186cULL, 0x7b7bf17bff8af684ULL, 0x3535d435b5e16a80ULL, 0x1d1d741de8693af5ULL, 0xe0e0a7e05347ddb3ULL, 0xd7d77bd7f6acb321ULL, 0xc2c22fc25eed999cULL, 0x2e2eb82e6d965c43ULL, 0x4b4b314b627a9629ULL, 0xfefedffea321e15dULL, 0x575741578216aed5ULL, 0x15155415a8412abdULL, 0x7777c1779fb6eee8ULL, 0x3737dc37a5eb6e92ULL, 0xe5e5b3e57b56d79eULL, 0x9f9f469f8cd92313ULL, 0xf0f0e7f0d317fd23ULL, 0x4a4a354a6a7f9420ULL, 0xdada4fda9e95a944ULL, 0x58587d58fa25b0a2ULL, 0xc9c903c906ca8fcfULL, 0x2929a429558d527cULL, 0x0a0a280a5022145aULL, 0xb1b1feb1e14f7f50ULL, 0xa0a0baa0691a5dc9ULL, 0x6b6bb16b7fdad614ULL, 0x85852e855cab17d9ULL, 0xbdbdcebd8173673cULL, 0x5d5d695dd234ba8fULL, 0x1010401080502090ULL, 0xf4f4f7f4f303f507ULL, 0xcbcb0bcb16c08bddULL, 0x3e3ef83eedc67cd3ULL, 0x0505140528110a2dULL, 0x676781671fe6ce78ULL, 0xe4e4b7e47353d597ULL, 0x27279c2725bb4e02ULL, 0x4141194132588273ULL, 0x8b8b168b2c9d0ba7ULL, 0xa7a7a6a7510153f6ULL, 0x7d7de97dcf94fab2ULL, 0x95956e95dcfb3749ULL, 0xd8d847d88e9fad56ULL, 0xfbfbcbfb8b30eb70ULL, 0xeeee9fee2371c1cdULL, 0x7c7ced7cc791f8bbULL, 0x6666856617e3cc71ULL, 0xdddd53dda68ea77bULL, 0x17175c17b84b2eafULL, 0x4747014702468e45ULL, 0x9e9e429e84dc211aULL, 0xcaca0fca1ec589d4ULL, 0x2d2db42d75995a58ULL, 0xbfbfc6bf9179632eULL, 0x07071c07381b0e3fULL, 0xadad8ead012347acULL, 0x5a5a755aea2fb4b0ULL, 0x838336836cb51befULL, 0x3333cc3385ff66b6ULL, 0x636391633ff2c65cULL, 0x02020802100a0412ULL, 0xaaaa92aa39384993ULL, 0x7171d971afa8e2deULL, 0xc8c807c80ecf8dc6ULL, 0x19196419c87d32d1ULL, 0x494939497270923bULL, 0xd9d943d9869aaf5fULL, 0xf2f2eff2c31df931ULL, 0xe3e3abe34b48dba8ULL, 0x5b5b715be22ab6b9ULL, 0x88881a8834920dbcULL, 0x9a9a529aa4c8293eULL, 0x262698262dbe4c0bULL, 0x3232c8328dfa64bfULL, 0xb0b0fab0e94a7d59ULL, 0xe9e983e91b6acff2ULL, 0x0f0f3c0f78331e77ULL, 0xd5d573d5e6a6b733ULL, 0x80803a8074ba1df4ULL, 0xbebec2be997c6127ULL, 0xcdcd13cd26de87ebULL, 0x3434d034bde46889ULL, 0x48483d487a759032ULL, 0xffffdbffab24e354ULL, 0x7a7af57af78ff48dULL, 0x90907a90f4ea3d64ULL, 0x5f5f615fc23ebe9dULL, 0x202080201da0403dULL, 0x6868bd6867d5d00fULL, 0x1a1a681ad07234caULL, 0xaeae82ae192c41b7ULL, 0xb4b4eab4c95e757dULL, 0x54544d549a19a8ceULL, 0x93937693ece53b7fULL, 0x222288220daa442fULL, 0x64648d6407e9c863ULL, 0xf1f1e3f1db12ff2aULL, 0x7373d173bfa2e6ccULL, 0x12124812905a2482ULL, 0x40401d403a5d807aULL, 0x0808200840281048ULL, 0xc3c32bc356e89b95ULL, 0xecec97ec337bc5dfULL, 0xdbdb4bdb9690ab4dULL, 0xa1a1bea1611f5fc0ULL, 0x8d8d0e8d1c830791ULL, 0x3d3df43df5c97ac8ULL, 0x97976697ccf1335bULL, 0x0000000000000000ULL, 0xcfcf1bcf36d483f9ULL, 0x2b2bac2b4587566eULL, 0x7676c57697b3ece1ULL, 0x8282328264b019e6ULL, 0xd6d67fd6fea9b128ULL, 0x1b1b6c1bd87736c3ULL, 0xb5b5eeb5c15b7774ULL, 0xafaf86af112943beULL, 0x6a6ab56a77dfd41dULL, 0x50505d50ba0da0eaULL, 0x45450945124c8a57ULL, 0xf3f3ebf3cb18fb38ULL, 0x3030c0309df060adULL, 0xefef9bef2b74c3c4ULL, 0x3f3ffc3fe5c37edaULL, 0x55554955921caac7ULL, 0xa2a2b2a2791059dbULL, 0xeaea8fea0365c9e9ULL, 0x656589650fecca6aULL, 0xbabad2bab9686903ULL, 0x2f2fbc2f65935e4aULL, 0xc0c027c04ee79d8eULL, 0xdede5fdebe81a160ULL, 0x1c1c701ce06c38fcULL, 0xfdfdd3fdbb2ee746ULL, 0x4d4d294d52649a1fULL, 0x92927292e4e03976ULL, 0x7575c9758fbceafaULL, 0x06061806301e0c36ULL, 0x8a8a128a249809aeULL, 0xb2b2f2b2f940794bULL, 0xe6e6bfe66359d185ULL, 0x0e0e380e70361c7eULL, 0x1f1f7c1ff8633ee7ULL, 0x6262956237f7c455ULL, 0xd4d477d4eea3b53aULL, 0xa8a89aa829324d81ULL, 0x96966296c4f43152ULL, 0xf9f9c3f99b3aef62ULL, 0xc5c533c566f697a3ULL, 0x2525942535b14a10ULL, 0x59597959f220b2abULL, 0x84842a8454ae15d0ULL, 0x7272d572b7a7e4c5ULL, 0x3939e439d5dd72ecULL, 0x4c4c2d4c5a619816ULL, 0x5e5e655eca3bbc94ULL, 0x7878fd78e785f09fULL, 0x3838e038ddd870e5ULL, 0x8c8c0a8c14860598ULL, 0xd1d163d1c6b2bf17ULL, 0xa5a5aea5410b57e4ULL, 0xe2e2afe2434dd9a1ULL, 0x616199612ff8c24eULL, 0xb3b3f6b3f1457b42ULL, 0x2121842115a54234ULL, 0x9c9c4a9c94d62508ULL, 0x1e1e781ef0663ceeULL, 0x4343114322528661ULL, 0xc7c73bc776fc93b1ULL, 0xfcfcd7fcb32be54fULL, 0x0404100420140824ULL, 0x51515951b208a2e3ULL, 0x99995e99bcc72f25ULL, 0x6d6da96d4fc4da22ULL, 0x0d0d340d68391a65ULL, 0xfafacffa8335e979ULL, 0xdfdf5bdfb684a369ULL, 0x7e7ee57ed79bfca9ULL, 0x242490243db44819ULL, 0x3b3bec3bc5d776feULL, 0xabab96ab313d4b9aULL, 0xcece1fce3ed181f0ULL, 0x1111441188552299ULL, 0x8f8f068f0c890383ULL, 0x4e4e254e4a6b9c04ULL, 0xb7b7e6b7d1517366ULL, 0xebeb8beb0b60cbe0ULL, 0x3c3cf03cfdcc78c1ULL, 0x81813e817cbf1ffdULL, 0x94946a94d4fe3540ULL, 0xf7f7fbf7eb0cf31cULL, 0xb9b9deb9a1676f18ULL, 0x13134c13985f268bULL, 0x2c2cb02c7d9c5851ULL, 0xd3d36bd3d6b8bb05ULL, 0xe7e7bbe76b5cd38cULL, 0x6e6ea56e57cbdc39ULL, 0xc4c437c46ef395aaULL, 0x03030c03180f061bULL, 0x565645568a13acdcULL, 0x44440d441a49885eULL, 0x7f7fe17fdf9efea0ULL, 0xa9a99ea921374f88ULL, 0x2a2aa82a4d825467ULL, 0xbbbbd6bbb16d6b0aULL, 0xc1c123c146e29f87ULL, 0x53535153a202a6f1ULL, 0xdcdc57dcae8ba572ULL, 0x0b0b2c0b58271653ULL, 0x9d9d4e9d9cd32701ULL, 0x6c6cad6c47c1d82bULL, 0x3131c43195f562a4ULL, 0x7474cd7487b9e8f3ULL, 0xf6f6fff6e309f115ULL, 0x464605460a438c4cULL, 0xacac8aac092645a5ULL, 0x89891e893c970fb5ULL, 0x14145014a04428b4ULL, 0xe1e1a3e15b42dfbaULL, 0x16165816b04e2ca6ULL, 0x3a3ae83acdd274f7ULL, 0x6969b9696fd0d206ULL, 0x09092409482d1241ULL, 0x7070dd70a7ade0d7ULL, 0xb6b6e2b6d954716fULL, 0xd0d067d0ceb7bd1eULL, 0xeded93ed3b7ec7d6ULL, 0xcccc17cc2edb85e2ULL, 0x424215422a578468ULL, 0x98985a98b4c22d2cULL, 0xa4a4aaa4490e55edULL, 0x2828a0285d885075ULL, 0x5c5c6d5cda31b886ULL, 0xf8f8c7f8933fed6bULL, 0x8686228644a411c2ULL, }; static const u64 C1[256] = { 0xd818186018c07830ULL, 0x2623238c2305af46ULL, 0xb8c6c63fc67ef991ULL, 0xfbe8e887e8136fcdULL, 0xcb878726874ca113ULL, 0x11b8b8dab8a9626dULL, 0x0901010401080502ULL, 0x0d4f4f214f426e9eULL, 0x9b3636d836adee6cULL, 0xffa6a6a2a6590451ULL, 0x0cd2d26fd2debdb9ULL, 0x0ef5f5f3f5fb06f7ULL, 0x967979f979ef80f2ULL, 0x306f6fa16f5fcedeULL, 0x6d91917e91fcef3fULL, 0xf852525552aa07a4ULL, 0x4760609d6027fdc0ULL, 0x35bcbccabc897665ULL, 0x379b9b569baccd2bULL, 0x8a8e8e028e048c01ULL, 0xd2a3a3b6a371155bULL, 0x6c0c0c300c603c18ULL, 0x847b7bf17bff8af6ULL, 0x803535d435b5e16aULL, 0xf51d1d741de8693aULL, 0xb3e0e0a7e05347ddULL, 0x21d7d77bd7f6acb3ULL, 0x9cc2c22fc25eed99ULL, 0x432e2eb82e6d965cULL, 0x294b4b314b627a96ULL, 0x5dfefedffea321e1ULL, 0xd5575741578216aeULL, 0xbd15155415a8412aULL, 0xe87777c1779fb6eeULL, 0x923737dc37a5eb6eULL, 0x9ee5e5b3e57b56d7ULL, 0x139f9f469f8cd923ULL, 0x23f0f0e7f0d317fdULL, 0x204a4a354a6a7f94ULL, 0x44dada4fda9e95a9ULL, 0xa258587d58fa25b0ULL, 0xcfc9c903c906ca8fULL, 0x7c2929a429558d52ULL, 0x5a0a0a280a502214ULL, 0x50b1b1feb1e14f7fULL, 0xc9a0a0baa0691a5dULL, 0x146b6bb16b7fdad6ULL, 0xd985852e855cab17ULL, 0x3cbdbdcebd817367ULL, 0x8f5d5d695dd234baULL, 0x9010104010805020ULL, 0x07f4f4f7f4f303f5ULL, 0xddcbcb0bcb16c08bULL, 0xd33e3ef83eedc67cULL, 0x2d0505140528110aULL, 0x78676781671fe6ceULL, 0x97e4e4b7e47353d5ULL, 0x0227279c2725bb4eULL, 0x7341411941325882ULL, 0xa78b8b168b2c9d0bULL, 0xf6a7a7a6a7510153ULL, 0xb27d7de97dcf94faULL, 0x4995956e95dcfb37ULL, 0x56d8d847d88e9fadULL, 0x70fbfbcbfb8b30ebULL, 0xcdeeee9fee2371c1ULL, 0xbb7c7ced7cc791f8ULL, 0x716666856617e3ccULL, 0x7bdddd53dda68ea7ULL, 0xaf17175c17b84b2eULL, 0x454747014702468eULL, 0x1a9e9e429e84dc21ULL, 0xd4caca0fca1ec589ULL, 0x582d2db42d75995aULL, 0x2ebfbfc6bf917963ULL, 0x3f07071c07381b0eULL, 0xacadad8ead012347ULL, 0xb05a5a755aea2fb4ULL, 0xef838336836cb51bULL, 0xb63333cc3385ff66ULL, 0x5c636391633ff2c6ULL, 0x1202020802100a04ULL, 0x93aaaa92aa393849ULL, 0xde7171d971afa8e2ULL, 0xc6c8c807c80ecf8dULL, 0xd119196419c87d32ULL, 0x3b49493949727092ULL, 0x5fd9d943d9869aafULL, 0x31f2f2eff2c31df9ULL, 0xa8e3e3abe34b48dbULL, 0xb95b5b715be22ab6ULL, 0xbc88881a8834920dULL, 0x3e9a9a529aa4c829ULL, 0x0b262698262dbe4cULL, 0xbf3232c8328dfa64ULL, 0x59b0b0fab0e94a7dULL, 0xf2e9e983e91b6acfULL, 0x770f0f3c0f78331eULL, 0x33d5d573d5e6a6b7ULL, 0xf480803a8074ba1dULL, 0x27bebec2be997c61ULL, 0xebcdcd13cd26de87ULL, 0x893434d034bde468ULL, 0x3248483d487a7590ULL, 0x54ffffdbffab24e3ULL, 0x8d7a7af57af78ff4ULL, 0x6490907a90f4ea3dULL, 0x9d5f5f615fc23ebeULL, 0x3d202080201da040ULL, 0x0f6868bd6867d5d0ULL, 0xca1a1a681ad07234ULL, 0xb7aeae82ae192c41ULL, 0x7db4b4eab4c95e75ULL, 0xce54544d549a19a8ULL, 0x7f93937693ece53bULL, 0x2f222288220daa44ULL, 0x6364648d6407e9c8ULL, 0x2af1f1e3f1db12ffULL, 0xcc7373d173bfa2e6ULL, 0x8212124812905a24ULL, 0x7a40401d403a5d80ULL, 0x4808082008402810ULL, 0x95c3c32bc356e89bULL, 0xdfecec97ec337bc5ULL, 0x4ddbdb4bdb9690abULL, 0xc0a1a1bea1611f5fULL, 0x918d8d0e8d1c8307ULL, 0xc83d3df43df5c97aULL, 0x5b97976697ccf133ULL, 0x0000000000000000ULL, 0xf9cfcf1bcf36d483ULL, 0x6e2b2bac2b458756ULL, 0xe17676c57697b3ecULL, 0xe68282328264b019ULL, 0x28d6d67fd6fea9b1ULL, 0xc31b1b6c1bd87736ULL, 0x74b5b5eeb5c15b77ULL, 0xbeafaf86af112943ULL, 0x1d6a6ab56a77dfd4ULL, 0xea50505d50ba0da0ULL, 0x5745450945124c8aULL, 0x38f3f3ebf3cb18fbULL, 0xad3030c0309df060ULL, 0xc4efef9bef2b74c3ULL, 0xda3f3ffc3fe5c37eULL, 0xc755554955921caaULL, 0xdba2a2b2a2791059ULL, 0xe9eaea8fea0365c9ULL, 0x6a656589650feccaULL, 0x03babad2bab96869ULL, 0x4a2f2fbc2f65935eULL, 0x8ec0c027c04ee79dULL, 0x60dede5fdebe81a1ULL, 0xfc1c1c701ce06c38ULL, 0x46fdfdd3fdbb2ee7ULL, 0x1f4d4d294d52649aULL, 0x7692927292e4e039ULL, 0xfa7575c9758fbceaULL, 0x3606061806301e0cULL, 0xae8a8a128a249809ULL, 0x4bb2b2f2b2f94079ULL, 0x85e6e6bfe66359d1ULL, 0x7e0e0e380e70361cULL, 0xe71f1f7c1ff8633eULL, 0x556262956237f7c4ULL, 0x3ad4d477d4eea3b5ULL, 0x81a8a89aa829324dULL, 0x5296966296c4f431ULL, 0x62f9f9c3f99b3aefULL, 0xa3c5c533c566f697ULL, 0x102525942535b14aULL, 0xab59597959f220b2ULL, 0xd084842a8454ae15ULL, 0xc57272d572b7a7e4ULL, 0xec3939e439d5dd72ULL, 0x164c4c2d4c5a6198ULL, 0x945e5e655eca3bbcULL, 0x9f7878fd78e785f0ULL, 0xe53838e038ddd870ULL, 0x988c8c0a8c148605ULL, 0x17d1d163d1c6b2bfULL, 0xe4a5a5aea5410b57ULL, 0xa1e2e2afe2434dd9ULL, 0x4e616199612ff8c2ULL, 0x42b3b3f6b3f1457bULL, 0x342121842115a542ULL, 0x089c9c4a9c94d625ULL, 0xee1e1e781ef0663cULL, 0x6143431143225286ULL, 0xb1c7c73bc776fc93ULL, 0x4ffcfcd7fcb32be5ULL, 0x2404041004201408ULL, 0xe351515951b208a2ULL, 0x2599995e99bcc72fULL, 0x226d6da96d4fc4daULL, 0x650d0d340d68391aULL, 0x79fafacffa8335e9ULL, 0x69dfdf5bdfb684a3ULL, 0xa97e7ee57ed79bfcULL, 0x19242490243db448ULL, 0xfe3b3bec3bc5d776ULL, 0x9aabab96ab313d4bULL, 0xf0cece1fce3ed181ULL, 0x9911114411885522ULL, 0x838f8f068f0c8903ULL, 0x044e4e254e4a6b9cULL, 0x66b7b7e6b7d15173ULL, 0xe0ebeb8beb0b60cbULL, 0xc13c3cf03cfdcc78ULL, 0xfd81813e817cbf1fULL, 0x4094946a94d4fe35ULL, 0x1cf7f7fbf7eb0cf3ULL, 0x18b9b9deb9a1676fULL, 0x8b13134c13985f26ULL, 0x512c2cb02c7d9c58ULL, 0x05d3d36bd3d6b8bbULL, 0x8ce7e7bbe76b5cd3ULL, 0x396e6ea56e57cbdcULL, 0xaac4c437c46ef395ULL, 0x1b03030c03180f06ULL, 0xdc565645568a13acULL, 0x5e44440d441a4988ULL, 0xa07f7fe17fdf9efeULL, 0x88a9a99ea921374fULL, 0x672a2aa82a4d8254ULL, 0x0abbbbd6bbb16d6bULL, 0x87c1c123c146e29fULL, 0xf153535153a202a6ULL, 0x72dcdc57dcae8ba5ULL, 0x530b0b2c0b582716ULL, 0x019d9d4e9d9cd327ULL, 0x2b6c6cad6c47c1d8ULL, 0xa43131c43195f562ULL, 0xf37474cd7487b9e8ULL, 0x15f6f6fff6e309f1ULL, 0x4c464605460a438cULL, 0xa5acac8aac092645ULL, 0xb589891e893c970fULL, 0xb414145014a04428ULL, 0xbae1e1a3e15b42dfULL, 0xa616165816b04e2cULL, 0xf73a3ae83acdd274ULL, 0x066969b9696fd0d2ULL, 0x4109092409482d12ULL, 0xd77070dd70a7ade0ULL, 0x6fb6b6e2b6d95471ULL, 0x1ed0d067d0ceb7bdULL, 0xd6eded93ed3b7ec7ULL, 0xe2cccc17cc2edb85ULL, 0x68424215422a5784ULL, 0x2c98985a98b4c22dULL, 0xeda4a4aaa4490e55ULL, 0x752828a0285d8850ULL, 0x865c5c6d5cda31b8ULL, 0x6bf8f8c7f8933fedULL, 0xc28686228644a411ULL, }; static const u64 C2[256] = { 0x30d818186018c078ULL, 0x462623238c2305afULL, 0x91b8c6c63fc67ef9ULL, 0xcdfbe8e887e8136fULL, 0x13cb878726874ca1ULL, 0x6d11b8b8dab8a962ULL, 0x0209010104010805ULL, 0x9e0d4f4f214f426eULL, 0x6c9b3636d836adeeULL, 0x51ffa6a6a2a65904ULL, 0xb90cd2d26fd2debdULL, 0xf70ef5f5f3f5fb06ULL, 0xf2967979f979ef80ULL, 0xde306f6fa16f5fceULL, 0x3f6d91917e91fcefULL, 0xa4f852525552aa07ULL, 0xc04760609d6027fdULL, 0x6535bcbccabc8976ULL, 0x2b379b9b569baccdULL, 0x018a8e8e028e048cULL, 0x5bd2a3a3b6a37115ULL, 0x186c0c0c300c603cULL, 0xf6847b7bf17bff8aULL, 0x6a803535d435b5e1ULL, 0x3af51d1d741de869ULL, 0xddb3e0e0a7e05347ULL, 0xb321d7d77bd7f6acULL, 0x999cc2c22fc25eedULL, 0x5c432e2eb82e6d96ULL, 0x96294b4b314b627aULL, 0xe15dfefedffea321ULL, 0xaed5575741578216ULL, 0x2abd15155415a841ULL, 0xeee87777c1779fb6ULL, 0x6e923737dc37a5ebULL, 0xd79ee5e5b3e57b56ULL, 0x23139f9f469f8cd9ULL, 0xfd23f0f0e7f0d317ULL, 0x94204a4a354a6a7fULL, 0xa944dada4fda9e95ULL, 0xb0a258587d58fa25ULL, 0x8fcfc9c903c906caULL, 0x527c2929a429558dULL, 0x145a0a0a280a5022ULL, 0x7f50b1b1feb1e14fULL, 0x5dc9a0a0baa0691aULL, 0xd6146b6bb16b7fdaULL, 0x17d985852e855cabULL, 0x673cbdbdcebd8173ULL, 0xba8f5d5d695dd234ULL, 0x2090101040108050ULL, 0xf507f4f4f7f4f303ULL, 0x8bddcbcb0bcb16c0ULL, 0x7cd33e3ef83eedc6ULL, 0x0a2d050514052811ULL, 0xce78676781671fe6ULL, 0xd597e4e4b7e47353ULL, 0x4e0227279c2725bbULL, 0x8273414119413258ULL, 0x0ba78b8b168b2c9dULL, 0x53f6a7a7a6a75101ULL, 0xfab27d7de97dcf94ULL, 0x374995956e95dcfbULL, 0xad56d8d847d88e9fULL, 0xeb70fbfbcbfb8b30ULL, 0xc1cdeeee9fee2371ULL, 0xf8bb7c7ced7cc791ULL, 0xcc716666856617e3ULL, 0xa77bdddd53dda68eULL, 0x2eaf17175c17b84bULL, 0x8e45474701470246ULL, 0x211a9e9e429e84dcULL, 0x89d4caca0fca1ec5ULL, 0x5a582d2db42d7599ULL, 0x632ebfbfc6bf9179ULL, 0x0e3f07071c07381bULL, 0x47acadad8ead0123ULL, 0xb4b05a5a755aea2fULL, 0x1bef838336836cb5ULL, 0x66b63333cc3385ffULL, 0xc65c636391633ff2ULL, 0x041202020802100aULL, 0x4993aaaa92aa3938ULL, 0xe2de7171d971afa8ULL, 0x8dc6c8c807c80ecfULL, 0x32d119196419c87dULL, 0x923b494939497270ULL, 0xaf5fd9d943d9869aULL, 0xf931f2f2eff2c31dULL, 0xdba8e3e3abe34b48ULL, 0xb6b95b5b715be22aULL, 0x0dbc88881a883492ULL, 0x293e9a9a529aa4c8ULL, 0x4c0b262698262dbeULL, 0x64bf3232c8328dfaULL, 0x7d59b0b0fab0e94aULL, 0xcff2e9e983e91b6aULL, 0x1e770f0f3c0f7833ULL, 0xb733d5d573d5e6a6ULL, 0x1df480803a8074baULL, 0x6127bebec2be997cULL, 0x87ebcdcd13cd26deULL, 0x68893434d034bde4ULL, 0x903248483d487a75ULL, 0xe354ffffdbffab24ULL, 0xf48d7a7af57af78fULL, 0x3d6490907a90f4eaULL, 0xbe9d5f5f615fc23eULL, 0x403d202080201da0ULL, 0xd00f6868bd6867d5ULL, 0x34ca1a1a681ad072ULL, 0x41b7aeae82ae192cULL, 0x757db4b4eab4c95eULL, 0xa8ce54544d549a19ULL, 0x3b7f93937693ece5ULL, 0x442f222288220daaULL, 0xc86364648d6407e9ULL, 0xff2af1f1e3f1db12ULL, 0xe6cc7373d173bfa2ULL, 0x248212124812905aULL, 0x807a40401d403a5dULL, 0x1048080820084028ULL, 0x9b95c3c32bc356e8ULL, 0xc5dfecec97ec337bULL, 0xab4ddbdb4bdb9690ULL, 0x5fc0a1a1bea1611fULL, 0x07918d8d0e8d1c83ULL, 0x7ac83d3df43df5c9ULL, 0x335b97976697ccf1ULL, 0x0000000000000000ULL, 0x83f9cfcf1bcf36d4ULL, 0x566e2b2bac2b4587ULL, 0xece17676c57697b3ULL, 0x19e68282328264b0ULL, 0xb128d6d67fd6fea9ULL, 0x36c31b1b6c1bd877ULL, 0x7774b5b5eeb5c15bULL, 0x43beafaf86af1129ULL, 0xd41d6a6ab56a77dfULL, 0xa0ea50505d50ba0dULL, 0x8a5745450945124cULL, 0xfb38f3f3ebf3cb18ULL, 0x60ad3030c0309df0ULL, 0xc3c4efef9bef2b74ULL, 0x7eda3f3ffc3fe5c3ULL, 0xaac755554955921cULL, 0x59dba2a2b2a27910ULL, 0xc9e9eaea8fea0365ULL, 0xca6a656589650fecULL, 0x6903babad2bab968ULL, 0x5e4a2f2fbc2f6593ULL, 0x9d8ec0c027c04ee7ULL, 0xa160dede5fdebe81ULL, 0x38fc1c1c701ce06cULL, 0xe746fdfdd3fdbb2eULL, 0x9a1f4d4d294d5264ULL, 0x397692927292e4e0ULL, 0xeafa7575c9758fbcULL, 0x0c3606061806301eULL, 0x09ae8a8a128a2498ULL, 0x794bb2b2f2b2f940ULL, 0xd185e6e6bfe66359ULL, 0x1c7e0e0e380e7036ULL, 0x3ee71f1f7c1ff863ULL, 0xc4556262956237f7ULL, 0xb53ad4d477d4eea3ULL, 0x4d81a8a89aa82932ULL, 0x315296966296c4f4ULL, 0xef62f9f9c3f99b3aULL, 0x97a3c5c533c566f6ULL, 0x4a102525942535b1ULL, 0xb2ab59597959f220ULL, 0x15d084842a8454aeULL, 0xe4c57272d572b7a7ULL, 0x72ec3939e439d5ddULL, 0x98164c4c2d4c5a61ULL, 0xbc945e5e655eca3bULL, 0xf09f7878fd78e785ULL, 0x70e53838e038ddd8ULL, 0x05988c8c0a8c1486ULL, 0xbf17d1d163d1c6b2ULL, 0x57e4a5a5aea5410bULL, 0xd9a1e2e2afe2434dULL, 0xc24e616199612ff8ULL, 0x7b42b3b3f6b3f145ULL, 0x42342121842115a5ULL, 0x25089c9c4a9c94d6ULL, 0x3cee1e1e781ef066ULL, 0x8661434311432252ULL, 0x93b1c7c73bc776fcULL, 0xe54ffcfcd7fcb32bULL, 0x0824040410042014ULL, 0xa2e351515951b208ULL, 0x2f2599995e99bcc7ULL, 0xda226d6da96d4fc4ULL, 0x1a650d0d340d6839ULL, 0xe979fafacffa8335ULL, 0xa369dfdf5bdfb684ULL, 0xfca97e7ee57ed79bULL, 0x4819242490243db4ULL, 0x76fe3b3bec3bc5d7ULL, 0x4b9aabab96ab313dULL, 0x81f0cece1fce3ed1ULL, 0x2299111144118855ULL, 0x03838f8f068f0c89ULL, 0x9c044e4e254e4a6bULL, 0x7366b7b7e6b7d151ULL, 0xcbe0ebeb8beb0b60ULL, 0x78c13c3cf03cfdccULL, 0x1ffd81813e817cbfULL, 0x354094946a94d4feULL, 0xf31cf7f7fbf7eb0cULL, 0x6f18b9b9deb9a167ULL, 0x268b13134c13985fULL, 0x58512c2cb02c7d9cULL, 0xbb05d3d36bd3d6b8ULL, 0xd38ce7e7bbe76b5cULL, 0xdc396e6ea56e57cbULL, 0x95aac4c437c46ef3ULL, 0x061b03030c03180fULL, 0xacdc565645568a13ULL, 0x885e44440d441a49ULL, 0xfea07f7fe17fdf9eULL, 0x4f88a9a99ea92137ULL, 0x54672a2aa82a4d82ULL, 0x6b0abbbbd6bbb16dULL, 0x9f87c1c123c146e2ULL, 0xa6f153535153a202ULL, 0xa572dcdc57dcae8bULL, 0x16530b0b2c0b5827ULL, 0x27019d9d4e9d9cd3ULL, 0xd82b6c6cad6c47c1ULL, 0x62a43131c43195f5ULL, 0xe8f37474cd7487b9ULL, 0xf115f6f6fff6e309ULL, 0x8c4c464605460a43ULL, 0x45a5acac8aac0926ULL, 0x0fb589891e893c97ULL, 0x28b414145014a044ULL, 0xdfbae1e1a3e15b42ULL, 0x2ca616165816b04eULL, 0x74f73a3ae83acdd2ULL, 0xd2066969b9696fd0ULL, 0x124109092409482dULL, 0xe0d77070dd70a7adULL, 0x716fb6b6e2b6d954ULL, 0xbd1ed0d067d0ceb7ULL, 0xc7d6eded93ed3b7eULL, 0x85e2cccc17cc2edbULL, 0x8468424215422a57ULL, 0x2d2c98985a98b4c2ULL, 0x55eda4a4aaa4490eULL, 0x50752828a0285d88ULL, 0xb8865c5c6d5cda31ULL, 0xed6bf8f8c7f8933fULL, 0x11c28686228644a4ULL, }; static const u64 C3[256] = { 0x7830d818186018c0ULL, 0xaf462623238c2305ULL, 0xf991b8c6c63fc67eULL, 0x6fcdfbe8e887e813ULL, 0xa113cb878726874cULL, 0x626d11b8b8dab8a9ULL, 0x0502090101040108ULL, 0x6e9e0d4f4f214f42ULL, 0xee6c9b3636d836adULL, 0x0451ffa6a6a2a659ULL, 0xbdb90cd2d26fd2deULL, 0x06f70ef5f5f3f5fbULL, 0x80f2967979f979efULL, 0xcede306f6fa16f5fULL, 0xef3f6d91917e91fcULL, 0x07a4f852525552aaULL, 0xfdc04760609d6027ULL, 0x766535bcbccabc89ULL, 0xcd2b379b9b569bacULL, 0x8c018a8e8e028e04ULL, 0x155bd2a3a3b6a371ULL, 0x3c186c0c0c300c60ULL, 0x8af6847b7bf17bffULL, 0xe16a803535d435b5ULL, 0x693af51d1d741de8ULL, 0x47ddb3e0e0a7e053ULL, 0xacb321d7d77bd7f6ULL, 0xed999cc2c22fc25eULL, 0x965c432e2eb82e6dULL, 0x7a96294b4b314b62ULL, 0x21e15dfefedffea3ULL, 0x16aed55757415782ULL, 0x412abd15155415a8ULL, 0xb6eee87777c1779fULL, 0xeb6e923737dc37a5ULL, 0x56d79ee5e5b3e57bULL, 0xd923139f9f469f8cULL, 0x17fd23f0f0e7f0d3ULL, 0x7f94204a4a354a6aULL, 0x95a944dada4fda9eULL, 0x25b0a258587d58faULL, 0xca8fcfc9c903c906ULL, 0x8d527c2929a42955ULL, 0x22145a0a0a280a50ULL, 0x4f7f50b1b1feb1e1ULL, 0x1a5dc9a0a0baa069ULL, 0xdad6146b6bb16b7fULL, 0xab17d985852e855cULL, 0x73673cbdbdcebd81ULL, 0x34ba8f5d5d695dd2ULL, 0x5020901010401080ULL, 0x03f507f4f4f7f4f3ULL, 0xc08bddcbcb0bcb16ULL, 0xc67cd33e3ef83eedULL, 0x110a2d0505140528ULL, 0xe6ce78676781671fULL, 0x53d597e4e4b7e473ULL, 0xbb4e0227279c2725ULL, 0x5882734141194132ULL, 0x9d0ba78b8b168b2cULL, 0x0153f6a7a7a6a751ULL, 0x94fab27d7de97dcfULL, 0xfb374995956e95dcULL, 0x9fad56d8d847d88eULL, 0x30eb70fbfbcbfb8bULL, 0x71c1cdeeee9fee23ULL, 0x91f8bb7c7ced7cc7ULL, 0xe3cc716666856617ULL, 0x8ea77bdddd53dda6ULL, 0x4b2eaf17175c17b8ULL, 0x468e454747014702ULL, 0xdc211a9e9e429e84ULL, 0xc589d4caca0fca1eULL, 0x995a582d2db42d75ULL, 0x79632ebfbfc6bf91ULL, 0x1b0e3f07071c0738ULL, 0x2347acadad8ead01ULL, 0x2fb4b05a5a755aeaULL, 0xb51bef838336836cULL, 0xff66b63333cc3385ULL, 0xf2c65c636391633fULL, 0x0a04120202080210ULL, 0x384993aaaa92aa39ULL, 0xa8e2de7171d971afULL, 0xcf8dc6c8c807c80eULL, 0x7d32d119196419c8ULL, 0x70923b4949394972ULL, 0x9aaf5fd9d943d986ULL, 0x1df931f2f2eff2c3ULL, 0x48dba8e3e3abe34bULL, 0x2ab6b95b5b715be2ULL, 0x920dbc88881a8834ULL, 0xc8293e9a9a529aa4ULL, 0xbe4c0b262698262dULL, 0xfa64bf3232c8328dULL, 0x4a7d59b0b0fab0e9ULL, 0x6acff2e9e983e91bULL, 0x331e770f0f3c0f78ULL, 0xa6b733d5d573d5e6ULL, 0xba1df480803a8074ULL, 0x7c6127bebec2be99ULL, 0xde87ebcdcd13cd26ULL, 0xe468893434d034bdULL, 0x75903248483d487aULL, 0x24e354ffffdbffabULL, 0x8ff48d7a7af57af7ULL, 0xea3d6490907a90f4ULL, 0x3ebe9d5f5f615fc2ULL, 0xa0403d202080201dULL, 0xd5d00f6868bd6867ULL, 0x7234ca1a1a681ad0ULL, 0x2c41b7aeae82ae19ULL, 0x5e757db4b4eab4c9ULL, 0x19a8ce54544d549aULL, 0xe53b7f93937693ecULL, 0xaa442f222288220dULL, 0xe9c86364648d6407ULL, 0x12ff2af1f1e3f1dbULL, 0xa2e6cc7373d173bfULL, 0x5a24821212481290ULL, 0x5d807a40401d403aULL, 0x2810480808200840ULL, 0xe89b95c3c32bc356ULL, 0x7bc5dfecec97ec33ULL, 0x90ab4ddbdb4bdb96ULL, 0x1f5fc0a1a1bea161ULL, 0x8307918d8d0e8d1cULL, 0xc97ac83d3df43df5ULL, 0xf1335b97976697ccULL, 0x0000000000000000ULL, 0xd483f9cfcf1bcf36ULL, 0x87566e2b2bac2b45ULL, 0xb3ece17676c57697ULL, 0xb019e68282328264ULL, 0xa9b128d6d67fd6feULL, 0x7736c31b1b6c1bd8ULL, 0x5b7774b5b5eeb5c1ULL, 0x2943beafaf86af11ULL, 0xdfd41d6a6ab56a77ULL, 0x0da0ea50505d50baULL, 0x4c8a574545094512ULL, 0x18fb38f3f3ebf3cbULL, 0xf060ad3030c0309dULL, 0x74c3c4efef9bef2bULL, 0xc37eda3f3ffc3fe5ULL, 0x1caac75555495592ULL, 0x1059dba2a2b2a279ULL, 0x65c9e9eaea8fea03ULL, 0xecca6a656589650fULL, 0x686903babad2bab9ULL, 0x935e4a2f2fbc2f65ULL, 0xe79d8ec0c027c04eULL, 0x81a160dede5fdebeULL, 0x6c38fc1c1c701ce0ULL, 0x2ee746fdfdd3fdbbULL, 0x649a1f4d4d294d52ULL, 0xe0397692927292e4ULL, 0xbceafa7575c9758fULL, 0x1e0c360606180630ULL, 0x9809ae8a8a128a24ULL, 0x40794bb2b2f2b2f9ULL, 0x59d185e6e6bfe663ULL, 0x361c7e0e0e380e70ULL, 0x633ee71f1f7c1ff8ULL, 0xf7c4556262956237ULL, 0xa3b53ad4d477d4eeULL, 0x324d81a8a89aa829ULL, 0xf4315296966296c4ULL, 0x3aef62f9f9c3f99bULL, 0xf697a3c5c533c566ULL, 0xb14a102525942535ULL, 0x20b2ab59597959f2ULL, 0xae15d084842a8454ULL, 0xa7e4c57272d572b7ULL, 0xdd72ec3939e439d5ULL, 0x6198164c4c2d4c5aULL, 0x3bbc945e5e655ecaULL, 0x85f09f7878fd78e7ULL, 0xd870e53838e038ddULL, 0x8605988c8c0a8c14ULL, 0xb2bf17d1d163d1c6ULL, 0x0b57e4a5a5aea541ULL, 0x4dd9a1e2e2afe243ULL, 0xf8c24e616199612fULL, 0x457b42b3b3f6b3f1ULL, 0xa542342121842115ULL, 0xd625089c9c4a9c94ULL, 0x663cee1e1e781ef0ULL, 0x5286614343114322ULL, 0xfc93b1c7c73bc776ULL, 0x2be54ffcfcd7fcb3ULL, 0x1408240404100420ULL, 0x08a2e351515951b2ULL, 0xc72f2599995e99bcULL, 0xc4da226d6da96d4fULL, 0x391a650d0d340d68ULL, 0x35e979fafacffa83ULL, 0x84a369dfdf5bdfb6ULL, 0x9bfca97e7ee57ed7ULL, 0xb44819242490243dULL, 0xd776fe3b3bec3bc5ULL, 0x3d4b9aabab96ab31ULL, 0xd181f0cece1fce3eULL, 0x5522991111441188ULL, 0x8903838f8f068f0cULL, 0x6b9c044e4e254e4aULL, 0x517366b7b7e6b7d1ULL, 0x60cbe0ebeb8beb0bULL, 0xcc78c13c3cf03cfdULL, 0xbf1ffd81813e817cULL, 0xfe354094946a94d4ULL, 0x0cf31cf7f7fbf7ebULL, 0x676f18b9b9deb9a1ULL, 0x5f268b13134c1398ULL, 0x9c58512c2cb02c7dULL, 0xb8bb05d3d36bd3d6ULL, 0x5cd38ce7e7bbe76bULL, 0xcbdc396e6ea56e57ULL, 0xf395aac4c437c46eULL, 0x0f061b03030c0318ULL, 0x13acdc565645568aULL, 0x49885e44440d441aULL, 0x9efea07f7fe17fdfULL, 0x374f88a9a99ea921ULL, 0x8254672a2aa82a4dULL, 0x6d6b0abbbbd6bbb1ULL, 0xe29f87c1c123c146ULL, 0x02a6f153535153a2ULL, 0x8ba572dcdc57dcaeULL, 0x2716530b0b2c0b58ULL, 0xd327019d9d4e9d9cULL, 0xc1d82b6c6cad6c47ULL, 0xf562a43131c43195ULL, 0xb9e8f37474cd7487ULL, 0x09f115f6f6fff6e3ULL, 0x438c4c464605460aULL, 0x2645a5acac8aac09ULL, 0x970fb589891e893cULL, 0x4428b414145014a0ULL, 0x42dfbae1e1a3e15bULL, 0x4e2ca616165816b0ULL, 0xd274f73a3ae83acdULL, 0xd0d2066969b9696fULL, 0x2d12410909240948ULL, 0xade0d77070dd70a7ULL, 0x54716fb6b6e2b6d9ULL, 0xb7bd1ed0d067d0ceULL, 0x7ec7d6eded93ed3bULL, 0xdb85e2cccc17cc2eULL, 0x578468424215422aULL, 0xc22d2c98985a98b4ULL, 0x0e55eda4a4aaa449ULL, 0x8850752828a0285dULL, 0x31b8865c5c6d5cdaULL, 0x3fed6bf8f8c7f893ULL, 0xa411c28686228644ULL, }; static const u64 C4[256] = { 0xc07830d818186018ULL, 0x05af462623238c23ULL, 0x7ef991b8c6c63fc6ULL, 0x136fcdfbe8e887e8ULL, 0x4ca113cb87872687ULL, 0xa9626d11b8b8dab8ULL, 0x0805020901010401ULL, 0x426e9e0d4f4f214fULL, 0xadee6c9b3636d836ULL, 0x590451ffa6a6a2a6ULL, 0xdebdb90cd2d26fd2ULL, 0xfb06f70ef5f5f3f5ULL, 0xef80f2967979f979ULL, 0x5fcede306f6fa16fULL, 0xfcef3f6d91917e91ULL, 0xaa07a4f852525552ULL, 0x27fdc04760609d60ULL, 0x89766535bcbccabcULL, 0xaccd2b379b9b569bULL, 0x048c018a8e8e028eULL, 0x71155bd2a3a3b6a3ULL, 0x603c186c0c0c300cULL, 0xff8af6847b7bf17bULL, 0xb5e16a803535d435ULL, 0xe8693af51d1d741dULL, 0x5347ddb3e0e0a7e0ULL, 0xf6acb321d7d77bd7ULL, 0x5eed999cc2c22fc2ULL, 0x6d965c432e2eb82eULL, 0x627a96294b4b314bULL, 0xa321e15dfefedffeULL, 0x8216aed557574157ULL, 0xa8412abd15155415ULL, 0x9fb6eee87777c177ULL, 0xa5eb6e923737dc37ULL, 0x7b56d79ee5e5b3e5ULL, 0x8cd923139f9f469fULL, 0xd317fd23f0f0e7f0ULL, 0x6a7f94204a4a354aULL, 0x9e95a944dada4fdaULL, 0xfa25b0a258587d58ULL, 0x06ca8fcfc9c903c9ULL, 0x558d527c2929a429ULL, 0x5022145a0a0a280aULL, 0xe14f7f50b1b1feb1ULL, 0x691a5dc9a0a0baa0ULL, 0x7fdad6146b6bb16bULL, 0x5cab17d985852e85ULL, 0x8173673cbdbdcebdULL, 0xd234ba8f5d5d695dULL, 0x8050209010104010ULL, 0xf303f507f4f4f7f4ULL, 0x16c08bddcbcb0bcbULL, 0xedc67cd33e3ef83eULL, 0x28110a2d05051405ULL, 0x1fe6ce7867678167ULL, 0x7353d597e4e4b7e4ULL, 0x25bb4e0227279c27ULL, 0x3258827341411941ULL, 0x2c9d0ba78b8b168bULL, 0x510153f6a7a7a6a7ULL, 0xcf94fab27d7de97dULL, 0xdcfb374995956e95ULL, 0x8e9fad56d8d847d8ULL, 0x8b30eb70fbfbcbfbULL, 0x2371c1cdeeee9feeULL, 0xc791f8bb7c7ced7cULL, 0x17e3cc7166668566ULL, 0xa68ea77bdddd53ddULL, 0xb84b2eaf17175c17ULL, 0x02468e4547470147ULL, 0x84dc211a9e9e429eULL, 0x1ec589d4caca0fcaULL, 0x75995a582d2db42dULL, 0x9179632ebfbfc6bfULL, 0x381b0e3f07071c07ULL, 0x012347acadad8eadULL, 0xea2fb4b05a5a755aULL, 0x6cb51bef83833683ULL, 0x85ff66b63333cc33ULL, 0x3ff2c65c63639163ULL, 0x100a041202020802ULL, 0x39384993aaaa92aaULL, 0xafa8e2de7171d971ULL, 0x0ecf8dc6c8c807c8ULL, 0xc87d32d119196419ULL, 0x7270923b49493949ULL, 0x869aaf5fd9d943d9ULL, 0xc31df931f2f2eff2ULL, 0x4b48dba8e3e3abe3ULL, 0xe22ab6b95b5b715bULL, 0x34920dbc88881a88ULL, 0xa4c8293e9a9a529aULL, 0x2dbe4c0b26269826ULL, 0x8dfa64bf3232c832ULL, 0xe94a7d59b0b0fab0ULL, 0x1b6acff2e9e983e9ULL, 0x78331e770f0f3c0fULL, 0xe6a6b733d5d573d5ULL, 0x74ba1df480803a80ULL, 0x997c6127bebec2beULL, 0x26de87ebcdcd13cdULL, 0xbde468893434d034ULL, 0x7a75903248483d48ULL, 0xab24e354ffffdbffULL, 0xf78ff48d7a7af57aULL, 0xf4ea3d6490907a90ULL, 0xc23ebe9d5f5f615fULL, 0x1da0403d20208020ULL, 0x67d5d00f6868bd68ULL, 0xd07234ca1a1a681aULL, 0x192c41b7aeae82aeULL, 0xc95e757db4b4eab4ULL, 0x9a19a8ce54544d54ULL, 0xece53b7f93937693ULL, 0x0daa442f22228822ULL, 0x07e9c86364648d64ULL, 0xdb12ff2af1f1e3f1ULL, 0xbfa2e6cc7373d173ULL, 0x905a248212124812ULL, 0x3a5d807a40401d40ULL, 0x4028104808082008ULL, 0x56e89b95c3c32bc3ULL, 0x337bc5dfecec97ecULL, 0x9690ab4ddbdb4bdbULL, 0x611f5fc0a1a1bea1ULL, 0x1c8307918d8d0e8dULL, 0xf5c97ac83d3df43dULL, 0xccf1335b97976697ULL, 0x0000000000000000ULL, 0x36d483f9cfcf1bcfULL, 0x4587566e2b2bac2bULL, 0x97b3ece17676c576ULL, 0x64b019e682823282ULL, 0xfea9b128d6d67fd6ULL, 0xd87736c31b1b6c1bULL, 0xc15b7774b5b5eeb5ULL, 0x112943beafaf86afULL, 0x77dfd41d6a6ab56aULL, 0xba0da0ea50505d50ULL, 0x124c8a5745450945ULL, 0xcb18fb38f3f3ebf3ULL, 0x9df060ad3030c030ULL, 0x2b74c3c4efef9befULL, 0xe5c37eda3f3ffc3fULL, 0x921caac755554955ULL, 0x791059dba2a2b2a2ULL, 0x0365c9e9eaea8feaULL, 0x0fecca6a65658965ULL, 0xb9686903babad2baULL, 0x65935e4a2f2fbc2fULL, 0x4ee79d8ec0c027c0ULL, 0xbe81a160dede5fdeULL, 0xe06c38fc1c1c701cULL, 0xbb2ee746fdfdd3fdULL, 0x52649a1f4d4d294dULL, 0xe4e0397692927292ULL, 0x8fbceafa7575c975ULL, 0x301e0c3606061806ULL, 0x249809ae8a8a128aULL, 0xf940794bb2b2f2b2ULL, 0x6359d185e6e6bfe6ULL, 0x70361c7e0e0e380eULL, 0xf8633ee71f1f7c1fULL, 0x37f7c45562629562ULL, 0xeea3b53ad4d477d4ULL, 0x29324d81a8a89aa8ULL, 0xc4f4315296966296ULL, 0x9b3aef62f9f9c3f9ULL, 0x66f697a3c5c533c5ULL, 0x35b14a1025259425ULL, 0xf220b2ab59597959ULL, 0x54ae15d084842a84ULL, 0xb7a7e4c57272d572ULL, 0xd5dd72ec3939e439ULL, 0x5a6198164c4c2d4cULL, 0xca3bbc945e5e655eULL, 0xe785f09f7878fd78ULL, 0xddd870e53838e038ULL, 0x148605988c8c0a8cULL, 0xc6b2bf17d1d163d1ULL, 0x410b57e4a5a5aea5ULL, 0x434dd9a1e2e2afe2ULL, 0x2ff8c24e61619961ULL, 0xf1457b42b3b3f6b3ULL, 0x15a5423421218421ULL, 0x94d625089c9c4a9cULL, 0xf0663cee1e1e781eULL, 0x2252866143431143ULL, 0x76fc93b1c7c73bc7ULL, 0xb32be54ffcfcd7fcULL, 0x2014082404041004ULL, 0xb208a2e351515951ULL, 0xbcc72f2599995e99ULL, 0x4fc4da226d6da96dULL, 0x68391a650d0d340dULL, 0x8335e979fafacffaULL, 0xb684a369dfdf5bdfULL, 0xd79bfca97e7ee57eULL, 0x3db4481924249024ULL, 0xc5d776fe3b3bec3bULL, 0x313d4b9aabab96abULL, 0x3ed181f0cece1fceULL, 0x8855229911114411ULL, 0x0c8903838f8f068fULL, 0x4a6b9c044e4e254eULL, 0xd1517366b7b7e6b7ULL, 0x0b60cbe0ebeb8bebULL, 0xfdcc78c13c3cf03cULL, 0x7cbf1ffd81813e81ULL, 0xd4fe354094946a94ULL, 0xeb0cf31cf7f7fbf7ULL, 0xa1676f18b9b9deb9ULL, 0x985f268b13134c13ULL, 0x7d9c58512c2cb02cULL, 0xd6b8bb05d3d36bd3ULL, 0x6b5cd38ce7e7bbe7ULL, 0x57cbdc396e6ea56eULL, 0x6ef395aac4c437c4ULL, 0x180f061b03030c03ULL, 0x8a13acdc56564556ULL, 0x1a49885e44440d44ULL, 0xdf9efea07f7fe17fULL, 0x21374f88a9a99ea9ULL, 0x4d8254672a2aa82aULL, 0xb16d6b0abbbbd6bbULL, 0x46e29f87c1c123c1ULL, 0xa202a6f153535153ULL, 0xae8ba572dcdc57dcULL, 0x582716530b0b2c0bULL, 0x9cd327019d9d4e9dULL, 0x47c1d82b6c6cad6cULL, 0x95f562a43131c431ULL, 0x87b9e8f37474cd74ULL, 0xe309f115f6f6fff6ULL, 0x0a438c4c46460546ULL, 0x092645a5acac8aacULL, 0x3c970fb589891e89ULL, 0xa04428b414145014ULL, 0x5b42dfbae1e1a3e1ULL, 0xb04e2ca616165816ULL, 0xcdd274f73a3ae83aULL, 0x6fd0d2066969b969ULL, 0x482d124109092409ULL, 0xa7ade0d77070dd70ULL, 0xd954716fb6b6e2b6ULL, 0xceb7bd1ed0d067d0ULL, 0x3b7ec7d6eded93edULL, 0x2edb85e2cccc17ccULL, 0x2a57846842421542ULL, 0xb4c22d2c98985a98ULL, 0x490e55eda4a4aaa4ULL, 0x5d8850752828a028ULL, 0xda31b8865c5c6d5cULL, 0x933fed6bf8f8c7f8ULL, 0x44a411c286862286ULL, }; static const u64 C5[256] = { 0x18c07830d8181860ULL, 0x2305af462623238cULL, 0xc67ef991b8c6c63fULL, 0xe8136fcdfbe8e887ULL, 0x874ca113cb878726ULL, 0xb8a9626d11b8b8daULL, 0x0108050209010104ULL, 0x4f426e9e0d4f4f21ULL, 0x36adee6c9b3636d8ULL, 0xa6590451ffa6a6a2ULL, 0xd2debdb90cd2d26fULL, 0xf5fb06f70ef5f5f3ULL, 0x79ef80f2967979f9ULL, 0x6f5fcede306f6fa1ULL, 0x91fcef3f6d91917eULL, 0x52aa07a4f8525255ULL, 0x6027fdc04760609dULL, 0xbc89766535bcbccaULL, 0x9baccd2b379b9b56ULL, 0x8e048c018a8e8e02ULL, 0xa371155bd2a3a3b6ULL, 0x0c603c186c0c0c30ULL, 0x7bff8af6847b7bf1ULL, 0x35b5e16a803535d4ULL, 0x1de8693af51d1d74ULL, 0xe05347ddb3e0e0a7ULL, 0xd7f6acb321d7d77bULL, 0xc25eed999cc2c22fULL, 0x2e6d965c432e2eb8ULL, 0x4b627a96294b4b31ULL, 0xfea321e15dfefedfULL, 0x578216aed5575741ULL, 0x15a8412abd151554ULL, 0x779fb6eee87777c1ULL, 0x37a5eb6e923737dcULL, 0xe57b56d79ee5e5b3ULL, 0x9f8cd923139f9f46ULL, 0xf0d317fd23f0f0e7ULL, 0x4a6a7f94204a4a35ULL, 0xda9e95a944dada4fULL, 0x58fa25b0a258587dULL, 0xc906ca8fcfc9c903ULL, 0x29558d527c2929a4ULL, 0x0a5022145a0a0a28ULL, 0xb1e14f7f50b1b1feULL, 0xa0691a5dc9a0a0baULL, 0x6b7fdad6146b6bb1ULL, 0x855cab17d985852eULL, 0xbd8173673cbdbdceULL, 0x5dd234ba8f5d5d69ULL, 0x1080502090101040ULL, 0xf4f303f507f4f4f7ULL, 0xcb16c08bddcbcb0bULL, 0x3eedc67cd33e3ef8ULL, 0x0528110a2d050514ULL, 0x671fe6ce78676781ULL, 0xe47353d597e4e4b7ULL, 0x2725bb4e0227279cULL, 0x4132588273414119ULL, 0x8b2c9d0ba78b8b16ULL, 0xa7510153f6a7a7a6ULL, 0x7dcf94fab27d7de9ULL, 0x95dcfb374995956eULL, 0xd88e9fad56d8d847ULL, 0xfb8b30eb70fbfbcbULL, 0xee2371c1cdeeee9fULL, 0x7cc791f8bb7c7cedULL, 0x6617e3cc71666685ULL, 0xdda68ea77bdddd53ULL, 0x17b84b2eaf17175cULL, 0x4702468e45474701ULL, 0x9e84dc211a9e9e42ULL, 0xca1ec589d4caca0fULL, 0x2d75995a582d2db4ULL, 0xbf9179632ebfbfc6ULL, 0x07381b0e3f07071cULL, 0xad012347acadad8eULL, 0x5aea2fb4b05a5a75ULL, 0x836cb51bef838336ULL, 0x3385ff66b63333ccULL, 0x633ff2c65c636391ULL, 0x02100a0412020208ULL, 0xaa39384993aaaa92ULL, 0x71afa8e2de7171d9ULL, 0xc80ecf8dc6c8c807ULL, 0x19c87d32d1191964ULL, 0x497270923b494939ULL, 0xd9869aaf5fd9d943ULL, 0xf2c31df931f2f2efULL, 0xe34b48dba8e3e3abULL, 0x5be22ab6b95b5b71ULL, 0x8834920dbc88881aULL, 0x9aa4c8293e9a9a52ULL, 0x262dbe4c0b262698ULL, 0x328dfa64bf3232c8ULL, 0xb0e94a7d59b0b0faULL, 0xe91b6acff2e9e983ULL, 0x0f78331e770f0f3cULL, 0xd5e6a6b733d5d573ULL, 0x8074ba1df480803aULL, 0xbe997c6127bebec2ULL, 0xcd26de87ebcdcd13ULL, 0x34bde468893434d0ULL, 0x487a75903248483dULL, 0xffab24e354ffffdbULL, 0x7af78ff48d7a7af5ULL, 0x90f4ea3d6490907aULL, 0x5fc23ebe9d5f5f61ULL, 0x201da0403d202080ULL, 0x6867d5d00f6868bdULL, 0x1ad07234ca1a1a68ULL, 0xae192c41b7aeae82ULL, 0xb4c95e757db4b4eaULL, 0x549a19a8ce54544dULL, 0x93ece53b7f939376ULL, 0x220daa442f222288ULL, 0x6407e9c86364648dULL, 0xf1db12ff2af1f1e3ULL, 0x73bfa2e6cc7373d1ULL, 0x12905a2482121248ULL, 0x403a5d807a40401dULL, 0x0840281048080820ULL, 0xc356e89b95c3c32bULL, 0xec337bc5dfecec97ULL, 0xdb9690ab4ddbdb4bULL, 0xa1611f5fc0a1a1beULL, 0x8d1c8307918d8d0eULL, 0x3df5c97ac83d3df4ULL, 0x97ccf1335b979766ULL, 0x0000000000000000ULL, 0xcf36d483f9cfcf1bULL, 0x2b4587566e2b2bacULL, 0x7697b3ece17676c5ULL, 0x8264b019e6828232ULL, 0xd6fea9b128d6d67fULL, 0x1bd87736c31b1b6cULL, 0xb5c15b7774b5b5eeULL, 0xaf112943beafaf86ULL, 0x6a77dfd41d6a6ab5ULL, 0x50ba0da0ea50505dULL, 0x45124c8a57454509ULL, 0xf3cb18fb38f3f3ebULL, 0x309df060ad3030c0ULL, 0xef2b74c3c4efef9bULL, 0x3fe5c37eda3f3ffcULL, 0x55921caac7555549ULL, 0xa2791059dba2a2b2ULL, 0xea0365c9e9eaea8fULL, 0x650fecca6a656589ULL, 0xbab9686903babad2ULL, 0x2f65935e4a2f2fbcULL, 0xc04ee79d8ec0c027ULL, 0xdebe81a160dede5fULL, 0x1ce06c38fc1c1c70ULL, 0xfdbb2ee746fdfdd3ULL, 0x4d52649a1f4d4d29ULL, 0x92e4e03976929272ULL, 0x758fbceafa7575c9ULL, 0x06301e0c36060618ULL, 0x8a249809ae8a8a12ULL, 0xb2f940794bb2b2f2ULL, 0xe66359d185e6e6bfULL, 0x0e70361c7e0e0e38ULL, 0x1ff8633ee71f1f7cULL, 0x6237f7c455626295ULL, 0xd4eea3b53ad4d477ULL, 0xa829324d81a8a89aULL, 0x96c4f43152969662ULL, 0xf99b3aef62f9f9c3ULL, 0xc566f697a3c5c533ULL, 0x2535b14a10252594ULL, 0x59f220b2ab595979ULL, 0x8454ae15d084842aULL, 0x72b7a7e4c57272d5ULL, 0x39d5dd72ec3939e4ULL, 0x4c5a6198164c4c2dULL, 0x5eca3bbc945e5e65ULL, 0x78e785f09f7878fdULL, 0x38ddd870e53838e0ULL, 0x8c148605988c8c0aULL, 0xd1c6b2bf17d1d163ULL, 0xa5410b57e4a5a5aeULL, 0xe2434dd9a1e2e2afULL, 0x612ff8c24e616199ULL, 0xb3f1457b42b3b3f6ULL, 0x2115a54234212184ULL, 0x9c94d625089c9c4aULL, 0x1ef0663cee1e1e78ULL, 0x4322528661434311ULL, 0xc776fc93b1c7c73bULL, 0xfcb32be54ffcfcd7ULL, 0x0420140824040410ULL, 0x51b208a2e3515159ULL, 0x99bcc72f2599995eULL, 0x6d4fc4da226d6da9ULL, 0x0d68391a650d0d34ULL, 0xfa8335e979fafacfULL, 0xdfb684a369dfdf5bULL, 0x7ed79bfca97e7ee5ULL, 0x243db44819242490ULL, 0x3bc5d776fe3b3becULL, 0xab313d4b9aabab96ULL, 0xce3ed181f0cece1fULL, 0x1188552299111144ULL, 0x8f0c8903838f8f06ULL, 0x4e4a6b9c044e4e25ULL, 0xb7d1517366b7b7e6ULL, 0xeb0b60cbe0ebeb8bULL, 0x3cfdcc78c13c3cf0ULL, 0x817cbf1ffd81813eULL, 0x94d4fe354094946aULL, 0xf7eb0cf31cf7f7fbULL, 0xb9a1676f18b9b9deULL, 0x13985f268b13134cULL, 0x2c7d9c58512c2cb0ULL, 0xd3d6b8bb05d3d36bULL, 0xe76b5cd38ce7e7bbULL, 0x6e57cbdc396e6ea5ULL, 0xc46ef395aac4c437ULL, 0x03180f061b03030cULL, 0x568a13acdc565645ULL, 0x441a49885e44440dULL, 0x7fdf9efea07f7fe1ULL, 0xa921374f88a9a99eULL, 0x2a4d8254672a2aa8ULL, 0xbbb16d6b0abbbbd6ULL, 0xc146e29f87c1c123ULL, 0x53a202a6f1535351ULL, 0xdcae8ba572dcdc57ULL, 0x0b582716530b0b2cULL, 0x9d9cd327019d9d4eULL, 0x6c47c1d82b6c6cadULL, 0x3195f562a43131c4ULL, 0x7487b9e8f37474cdULL, 0xf6e309f115f6f6ffULL, 0x460a438c4c464605ULL, 0xac092645a5acac8aULL, 0x893c970fb589891eULL, 0x14a04428b4141450ULL, 0xe15b42dfbae1e1a3ULL, 0x16b04e2ca6161658ULL, 0x3acdd274f73a3ae8ULL, 0x696fd0d2066969b9ULL, 0x09482d1241090924ULL, 0x70a7ade0d77070ddULL, 0xb6d954716fb6b6e2ULL, 0xd0ceb7bd1ed0d067ULL, 0xed3b7ec7d6eded93ULL, 0xcc2edb85e2cccc17ULL, 0x422a578468424215ULL, 0x98b4c22d2c98985aULL, 0xa4490e55eda4a4aaULL, 0x285d8850752828a0ULL, 0x5cda31b8865c5c6dULL, 0xf8933fed6bf8f8c7ULL, 0x8644a411c2868622ULL, }; static const u64 C6[256] = { 0x6018c07830d81818ULL, 0x8c2305af46262323ULL, 0x3fc67ef991b8c6c6ULL, 0x87e8136fcdfbe8e8ULL, 0x26874ca113cb8787ULL, 0xdab8a9626d11b8b8ULL, 0x0401080502090101ULL, 0x214f426e9e0d4f4fULL, 0xd836adee6c9b3636ULL, 0xa2a6590451ffa6a6ULL, 0x6fd2debdb90cd2d2ULL, 0xf3f5fb06f70ef5f5ULL, 0xf979ef80f2967979ULL, 0xa16f5fcede306f6fULL, 0x7e91fcef3f6d9191ULL, 0x5552aa07a4f85252ULL, 0x9d6027fdc0476060ULL, 0xcabc89766535bcbcULL, 0x569baccd2b379b9bULL, 0x028e048c018a8e8eULL, 0xb6a371155bd2a3a3ULL, 0x300c603c186c0c0cULL, 0xf17bff8af6847b7bULL, 0xd435b5e16a803535ULL, 0x741de8693af51d1dULL, 0xa7e05347ddb3e0e0ULL, 0x7bd7f6acb321d7d7ULL, 0x2fc25eed999cc2c2ULL, 0xb82e6d965c432e2eULL, 0x314b627a96294b4bULL, 0xdffea321e15dfefeULL, 0x41578216aed55757ULL, 0x5415a8412abd1515ULL, 0xc1779fb6eee87777ULL, 0xdc37a5eb6e923737ULL, 0xb3e57b56d79ee5e5ULL, 0x469f8cd923139f9fULL, 0xe7f0d317fd23f0f0ULL, 0x354a6a7f94204a4aULL, 0x4fda9e95a944dadaULL, 0x7d58fa25b0a25858ULL, 0x03c906ca8fcfc9c9ULL, 0xa429558d527c2929ULL, 0x280a5022145a0a0aULL, 0xfeb1e14f7f50b1b1ULL, 0xbaa0691a5dc9a0a0ULL, 0xb16b7fdad6146b6bULL, 0x2e855cab17d98585ULL, 0xcebd8173673cbdbdULL, 0x695dd234ba8f5d5dULL, 0x4010805020901010ULL, 0xf7f4f303f507f4f4ULL, 0x0bcb16c08bddcbcbULL, 0xf83eedc67cd33e3eULL, 0x140528110a2d0505ULL, 0x81671fe6ce786767ULL, 0xb7e47353d597e4e4ULL, 0x9c2725bb4e022727ULL, 0x1941325882734141ULL, 0x168b2c9d0ba78b8bULL, 0xa6a7510153f6a7a7ULL, 0xe97dcf94fab27d7dULL, 0x6e95dcfb37499595ULL, 0x47d88e9fad56d8d8ULL, 0xcbfb8b30eb70fbfbULL, 0x9fee2371c1cdeeeeULL, 0xed7cc791f8bb7c7cULL, 0x856617e3cc716666ULL, 0x53dda68ea77bddddULL, 0x5c17b84b2eaf1717ULL, 0x014702468e454747ULL, 0x429e84dc211a9e9eULL, 0x0fca1ec589d4cacaULL, 0xb42d75995a582d2dULL, 0xc6bf9179632ebfbfULL, 0x1c07381b0e3f0707ULL, 0x8ead012347acadadULL, 0x755aea2fb4b05a5aULL, 0x36836cb51bef8383ULL, 0xcc3385ff66b63333ULL, 0x91633ff2c65c6363ULL, 0x0802100a04120202ULL, 0x92aa39384993aaaaULL, 0xd971afa8e2de7171ULL, 0x07c80ecf8dc6c8c8ULL, 0x6419c87d32d11919ULL, 0x39497270923b4949ULL, 0x43d9869aaf5fd9d9ULL, 0xeff2c31df931f2f2ULL, 0xabe34b48dba8e3e3ULL, 0x715be22ab6b95b5bULL, 0x1a8834920dbc8888ULL, 0x529aa4c8293e9a9aULL, 0x98262dbe4c0b2626ULL, 0xc8328dfa64bf3232ULL, 0xfab0e94a7d59b0b0ULL, 0x83e91b6acff2e9e9ULL, 0x3c0f78331e770f0fULL, 0x73d5e6a6b733d5d5ULL, 0x3a8074ba1df48080ULL, 0xc2be997c6127bebeULL, 0x13cd26de87ebcdcdULL, 0xd034bde468893434ULL, 0x3d487a7590324848ULL, 0xdbffab24e354ffffULL, 0xf57af78ff48d7a7aULL, 0x7a90f4ea3d649090ULL, 0x615fc23ebe9d5f5fULL, 0x80201da0403d2020ULL, 0xbd6867d5d00f6868ULL, 0x681ad07234ca1a1aULL, 0x82ae192c41b7aeaeULL, 0xeab4c95e757db4b4ULL, 0x4d549a19a8ce5454ULL, 0x7693ece53b7f9393ULL, 0x88220daa442f2222ULL, 0x8d6407e9c8636464ULL, 0xe3f1db12ff2af1f1ULL, 0xd173bfa2e6cc7373ULL, 0x4812905a24821212ULL, 0x1d403a5d807a4040ULL, 0x2008402810480808ULL, 0x2bc356e89b95c3c3ULL, 0x97ec337bc5dfececULL, 0x4bdb9690ab4ddbdbULL, 0xbea1611f5fc0a1a1ULL, 0x0e8d1c8307918d8dULL, 0xf43df5c97ac83d3dULL, 0x6697ccf1335b9797ULL, 0x0000000000000000ULL, 0x1bcf36d483f9cfcfULL, 0xac2b4587566e2b2bULL, 0xc57697b3ece17676ULL, 0x328264b019e68282ULL, 0x7fd6fea9b128d6d6ULL, 0x6c1bd87736c31b1bULL, 0xeeb5c15b7774b5b5ULL, 0x86af112943beafafULL, 0xb56a77dfd41d6a6aULL, 0x5d50ba0da0ea5050ULL, 0x0945124c8a574545ULL, 0xebf3cb18fb38f3f3ULL, 0xc0309df060ad3030ULL, 0x9bef2b74c3c4efefULL, 0xfc3fe5c37eda3f3fULL, 0x4955921caac75555ULL, 0xb2a2791059dba2a2ULL, 0x8fea0365c9e9eaeaULL, 0x89650fecca6a6565ULL, 0xd2bab9686903babaULL, 0xbc2f65935e4a2f2fULL, 0x27c04ee79d8ec0c0ULL, 0x5fdebe81a160dedeULL, 0x701ce06c38fc1c1cULL, 0xd3fdbb2ee746fdfdULL, 0x294d52649a1f4d4dULL, 0x7292e4e039769292ULL, 0xc9758fbceafa7575ULL, 0x1806301e0c360606ULL, 0x128a249809ae8a8aULL, 0xf2b2f940794bb2b2ULL, 0xbfe66359d185e6e6ULL, 0x380e70361c7e0e0eULL, 0x7c1ff8633ee71f1fULL, 0x956237f7c4556262ULL, 0x77d4eea3b53ad4d4ULL, 0x9aa829324d81a8a8ULL, 0x6296c4f431529696ULL, 0xc3f99b3aef62f9f9ULL, 0x33c566f697a3c5c5ULL, 0x942535b14a102525ULL, 0x7959f220b2ab5959ULL, 0x2a8454ae15d08484ULL, 0xd572b7a7e4c57272ULL, 0xe439d5dd72ec3939ULL, 0x2d4c5a6198164c4cULL, 0x655eca3bbc945e5eULL, 0xfd78e785f09f7878ULL, 0xe038ddd870e53838ULL, 0x0a8c148605988c8cULL, 0x63d1c6b2bf17d1d1ULL, 0xaea5410b57e4a5a5ULL, 0xafe2434dd9a1e2e2ULL, 0x99612ff8c24e6161ULL, 0xf6b3f1457b42b3b3ULL, 0x842115a542342121ULL, 0x4a9c94d625089c9cULL, 0x781ef0663cee1e1eULL, 0x1143225286614343ULL, 0x3bc776fc93b1c7c7ULL, 0xd7fcb32be54ffcfcULL, 0x1004201408240404ULL, 0x5951b208a2e35151ULL, 0x5e99bcc72f259999ULL, 0xa96d4fc4da226d6dULL, 0x340d68391a650d0dULL, 0xcffa8335e979fafaULL, 0x5bdfb684a369dfdfULL, 0xe57ed79bfca97e7eULL, 0x90243db448192424ULL, 0xec3bc5d776fe3b3bULL, 0x96ab313d4b9aababULL, 0x1fce3ed181f0ceceULL, 0x4411885522991111ULL, 0x068f0c8903838f8fULL, 0x254e4a6b9c044e4eULL, 0xe6b7d1517366b7b7ULL, 0x8beb0b60cbe0ebebULL, 0xf03cfdcc78c13c3cULL, 0x3e817cbf1ffd8181ULL, 0x6a94d4fe35409494ULL, 0xfbf7eb0cf31cf7f7ULL, 0xdeb9a1676f18b9b9ULL, 0x4c13985f268b1313ULL, 0xb02c7d9c58512c2cULL, 0x6bd3d6b8bb05d3d3ULL, 0xbbe76b5cd38ce7e7ULL, 0xa56e57cbdc396e6eULL, 0x37c46ef395aac4c4ULL, 0x0c03180f061b0303ULL, 0x45568a13acdc5656ULL, 0x0d441a49885e4444ULL, 0xe17fdf9efea07f7fULL, 0x9ea921374f88a9a9ULL, 0xa82a4d8254672a2aULL, 0xd6bbb16d6b0abbbbULL, 0x23c146e29f87c1c1ULL, 0x5153a202a6f15353ULL, 0x57dcae8ba572dcdcULL, 0x2c0b582716530b0bULL, 0x4e9d9cd327019d9dULL, 0xad6c47c1d82b6c6cULL, 0xc43195f562a43131ULL, 0xcd7487b9e8f37474ULL, 0xfff6e309f115f6f6ULL, 0x05460a438c4c4646ULL, 0x8aac092645a5acacULL, 0x1e893c970fb58989ULL, 0x5014a04428b41414ULL, 0xa3e15b42dfbae1e1ULL, 0x5816b04e2ca61616ULL, 0xe83acdd274f73a3aULL, 0xb9696fd0d2066969ULL, 0x2409482d12410909ULL, 0xdd70a7ade0d77070ULL, 0xe2b6d954716fb6b6ULL, 0x67d0ceb7bd1ed0d0ULL, 0x93ed3b7ec7d6ededULL, 0x17cc2edb85e2ccccULL, 0x15422a5784684242ULL, 0x5a98b4c22d2c9898ULL, 0xaaa4490e55eda4a4ULL, 0xa0285d8850752828ULL, 0x6d5cda31b8865c5cULL, 0xc7f8933fed6bf8f8ULL, 0x228644a411c28686ULL, }; static const u64 C7[256] = { 0x186018c07830d818ULL, 0x238c2305af462623ULL, 0xc63fc67ef991b8c6ULL, 0xe887e8136fcdfbe8ULL, 0x8726874ca113cb87ULL, 0xb8dab8a9626d11b8ULL, 0x0104010805020901ULL, 0x4f214f426e9e0d4fULL, 0x36d836adee6c9b36ULL, 0xa6a2a6590451ffa6ULL, 0xd26fd2debdb90cd2ULL, 0xf5f3f5fb06f70ef5ULL, 0x79f979ef80f29679ULL, 0x6fa16f5fcede306fULL, 0x917e91fcef3f6d91ULL, 0x525552aa07a4f852ULL, 0x609d6027fdc04760ULL, 0xbccabc89766535bcULL, 0x9b569baccd2b379bULL, 0x8e028e048c018a8eULL, 0xa3b6a371155bd2a3ULL, 0x0c300c603c186c0cULL, 0x7bf17bff8af6847bULL, 0x35d435b5e16a8035ULL, 0x1d741de8693af51dULL, 0xe0a7e05347ddb3e0ULL, 0xd77bd7f6acb321d7ULL, 0xc22fc25eed999cc2ULL, 0x2eb82e6d965c432eULL, 0x4b314b627a96294bULL, 0xfedffea321e15dfeULL, 0x5741578216aed557ULL, 0x155415a8412abd15ULL, 0x77c1779fb6eee877ULL, 0x37dc37a5eb6e9237ULL, 0xe5b3e57b56d79ee5ULL, 0x9f469f8cd923139fULL, 0xf0e7f0d317fd23f0ULL, 0x4a354a6a7f94204aULL, 0xda4fda9e95a944daULL, 0x587d58fa25b0a258ULL, 0xc903c906ca8fcfc9ULL, 0x29a429558d527c29ULL, 0x0a280a5022145a0aULL, 0xb1feb1e14f7f50b1ULL, 0xa0baa0691a5dc9a0ULL, 0x6bb16b7fdad6146bULL, 0x852e855cab17d985ULL, 0xbdcebd8173673cbdULL, 0x5d695dd234ba8f5dULL, 0x1040108050209010ULL, 0xf4f7f4f303f507f4ULL, 0xcb0bcb16c08bddcbULL, 0x3ef83eedc67cd33eULL, 0x05140528110a2d05ULL, 0x6781671fe6ce7867ULL, 0xe4b7e47353d597e4ULL, 0x279c2725bb4e0227ULL, 0x4119413258827341ULL, 0x8b168b2c9d0ba78bULL, 0xa7a6a7510153f6a7ULL, 0x7de97dcf94fab27dULL, 0x956e95dcfb374995ULL, 0xd847d88e9fad56d8ULL, 0xfbcbfb8b30eb70fbULL, 0xee9fee2371c1cdeeULL, 0x7ced7cc791f8bb7cULL, 0x66856617e3cc7166ULL, 0xdd53dda68ea77bddULL, 0x175c17b84b2eaf17ULL, 0x47014702468e4547ULL, 0x9e429e84dc211a9eULL, 0xca0fca1ec589d4caULL, 0x2db42d75995a582dULL, 0xbfc6bf9179632ebfULL, 0x071c07381b0e3f07ULL, 0xad8ead012347acadULL, 0x5a755aea2fb4b05aULL, 0x8336836cb51bef83ULL, 0x33cc3385ff66b633ULL, 0x6391633ff2c65c63ULL, 0x020802100a041202ULL, 0xaa92aa39384993aaULL, 0x71d971afa8e2de71ULL, 0xc807c80ecf8dc6c8ULL, 0x196419c87d32d119ULL, 0x4939497270923b49ULL, 0xd943d9869aaf5fd9ULL, 0xf2eff2c31df931f2ULL, 0xe3abe34b48dba8e3ULL, 0x5b715be22ab6b95bULL, 0x881a8834920dbc88ULL, 0x9a529aa4c8293e9aULL, 0x2698262dbe4c0b26ULL, 0x32c8328dfa64bf32ULL, 0xb0fab0e94a7d59b0ULL, 0xe983e91b6acff2e9ULL, 0x0f3c0f78331e770fULL, 0xd573d5e6a6b733d5ULL, 0x803a8074ba1df480ULL, 0xbec2be997c6127beULL, 0xcd13cd26de87ebcdULL, 0x34d034bde4688934ULL, 0x483d487a75903248ULL, 0xffdbffab24e354ffULL, 0x7af57af78ff48d7aULL, 0x907a90f4ea3d6490ULL, 0x5f615fc23ebe9d5fULL, 0x2080201da0403d20ULL, 0x68bd6867d5d00f68ULL, 0x1a681ad07234ca1aULL, 0xae82ae192c41b7aeULL, 0xb4eab4c95e757db4ULL, 0x544d549a19a8ce54ULL, 0x937693ece53b7f93ULL, 0x2288220daa442f22ULL, 0x648d6407e9c86364ULL, 0xf1e3f1db12ff2af1ULL, 0x73d173bfa2e6cc73ULL, 0x124812905a248212ULL, 0x401d403a5d807a40ULL, 0x0820084028104808ULL, 0xc32bc356e89b95c3ULL, 0xec97ec337bc5dfecULL, 0xdb4bdb9690ab4ddbULL, 0xa1bea1611f5fc0a1ULL, 0x8d0e8d1c8307918dULL, 0x3df43df5c97ac83dULL, 0x976697ccf1335b97ULL, 0x0000000000000000ULL, 0xcf1bcf36d483f9cfULL, 0x2bac2b4587566e2bULL, 0x76c57697b3ece176ULL, 0x82328264b019e682ULL, 0xd67fd6fea9b128d6ULL, 0x1b6c1bd87736c31bULL, 0xb5eeb5c15b7774b5ULL, 0xaf86af112943beafULL, 0x6ab56a77dfd41d6aULL, 0x505d50ba0da0ea50ULL, 0x450945124c8a5745ULL, 0xf3ebf3cb18fb38f3ULL, 0x30c0309df060ad30ULL, 0xef9bef2b74c3c4efULL, 0x3ffc3fe5c37eda3fULL, 0x554955921caac755ULL, 0xa2b2a2791059dba2ULL, 0xea8fea0365c9e9eaULL, 0x6589650fecca6a65ULL, 0xbad2bab9686903baULL, 0x2fbc2f65935e4a2fULL, 0xc027c04ee79d8ec0ULL, 0xde5fdebe81a160deULL, 0x1c701ce06c38fc1cULL, 0xfdd3fdbb2ee746fdULL, 0x4d294d52649a1f4dULL, 0x927292e4e0397692ULL, 0x75c9758fbceafa75ULL, 0x061806301e0c3606ULL, 0x8a128a249809ae8aULL, 0xb2f2b2f940794bb2ULL, 0xe6bfe66359d185e6ULL, 0x0e380e70361c7e0eULL, 0x1f7c1ff8633ee71fULL, 0x62956237f7c45562ULL, 0xd477d4eea3b53ad4ULL, 0xa89aa829324d81a8ULL, 0x966296c4f4315296ULL, 0xf9c3f99b3aef62f9ULL, 0xc533c566f697a3c5ULL, 0x25942535b14a1025ULL, 0x597959f220b2ab59ULL, 0x842a8454ae15d084ULL, 0x72d572b7a7e4c572ULL, 0x39e439d5dd72ec39ULL, 0x4c2d4c5a6198164cULL, 0x5e655eca3bbc945eULL, 0x78fd78e785f09f78ULL, 0x38e038ddd870e538ULL, 0x8c0a8c148605988cULL, 0xd163d1c6b2bf17d1ULL, 0xa5aea5410b57e4a5ULL, 0xe2afe2434dd9a1e2ULL, 0x6199612ff8c24e61ULL, 0xb3f6b3f1457b42b3ULL, 0x21842115a5423421ULL, 0x9c4a9c94d625089cULL, 0x1e781ef0663cee1eULL, 0x4311432252866143ULL, 0xc73bc776fc93b1c7ULL, 0xfcd7fcb32be54ffcULL, 0x0410042014082404ULL, 0x515951b208a2e351ULL, 0x995e99bcc72f2599ULL, 0x6da96d4fc4da226dULL, 0x0d340d68391a650dULL, 0xfacffa8335e979faULL, 0xdf5bdfb684a369dfULL, 0x7ee57ed79bfca97eULL, 0x2490243db4481924ULL, 0x3bec3bc5d776fe3bULL, 0xab96ab313d4b9aabULL, 0xce1fce3ed181f0ceULL, 0x1144118855229911ULL, 0x8f068f0c8903838fULL, 0x4e254e4a6b9c044eULL, 0xb7e6b7d1517366b7ULL, 0xeb8beb0b60cbe0ebULL, 0x3cf03cfdcc78c13cULL, 0x813e817cbf1ffd81ULL, 0x946a94d4fe354094ULL, 0xf7fbf7eb0cf31cf7ULL, 0xb9deb9a1676f18b9ULL, 0x134c13985f268b13ULL, 0x2cb02c7d9c58512cULL, 0xd36bd3d6b8bb05d3ULL, 0xe7bbe76b5cd38ce7ULL, 0x6ea56e57cbdc396eULL, 0xc437c46ef395aac4ULL, 0x030c03180f061b03ULL, 0x5645568a13acdc56ULL, 0x440d441a49885e44ULL, 0x7fe17fdf9efea07fULL, 0xa99ea921374f88a9ULL, 0x2aa82a4d8254672aULL, 0xbbd6bbb16d6b0abbULL, 0xc123c146e29f87c1ULL, 0x535153a202a6f153ULL, 0xdc57dcae8ba572dcULL, 0x0b2c0b582716530bULL, 0x9d4e9d9cd327019dULL, 0x6cad6c47c1d82b6cULL, 0x31c43195f562a431ULL, 0x74cd7487b9e8f374ULL, 0xf6fff6e309f115f6ULL, 0x4605460a438c4c46ULL, 0xac8aac092645a5acULL, 0x891e893c970fb589ULL, 0x145014a04428b414ULL, 0xe1a3e15b42dfbae1ULL, 0x165816b04e2ca616ULL, 0x3ae83acdd274f73aULL, 0x69b9696fd0d20669ULL, 0x092409482d124109ULL, 0x70dd70a7ade0d770ULL, 0xb6e2b6d954716fb6ULL, 0xd067d0ceb7bd1ed0ULL, 0xed93ed3b7ec7d6edULL, 0xcc17cc2edb85e2ccULL, 0x4215422a57846842ULL, 0x985a98b4c22d2c98ULL, 0xa4aaa4490e55eda4ULL, 0x28a0285d88507528ULL, 0x5c6d5cda31b8865cULL, 0xf8c7f8933fed6bf8ULL, 0x86228644a411c286ULL, }; static const u64 rc[WHIRLPOOL_ROUNDS] = { 0x1823c6e887b8014fULL, 0x36a6d2f5796f9152ULL, 0x60bc9b8ea30c7b35ULL, 0x1de0d7c22e4bfe57ULL, 0x157737e59ff04adaULL, 0x58c9290ab1a06b85ULL, 0xbd5d10f4cb3e0567ULL, 0xe427418ba77d95d8ULL, 0xfbee7c66dd17479eULL, 0xca2dbf07ad5a8333ULL, }; /* * The core Whirlpool transform. */ static __no_kmsan_checks void wp512_process_buffer(struct wp512_ctx *wctx) { int i, r; u64 K[8]; /* the round key */ u64 block[8]; /* mu(buffer) */ u64 state[8]; /* the cipher state */ u64 L[8]; const __be64 *buffer = (const __be64 *)wctx->buffer; for (i = 0; i < 8; i++) block[i] = be64_to_cpu(buffer[i]); state[0] = block[0] ^ (K[0] = wctx->hash[0]); state[1] = block[1] ^ (K[1] = wctx->hash[1]); state[2] = block[2] ^ (K[2] = wctx->hash[2]); state[3] = block[3] ^ (K[3] = wctx->hash[3]); state[4] = block[4] ^ (K[4] = wctx->hash[4]); state[5] = block[5] ^ (K[5] = wctx->hash[5]); state[6] = block[6] ^ (K[6] = wctx->hash[6]); state[7] = block[7] ^ (K[7] = wctx->hash[7]); for (r = 0; r < WHIRLPOOL_ROUNDS; r++) { L[0] = C0[(int)(K[0] >> 56) ] ^ C1[(int)(K[7] >> 48) & 0xff] ^ C2[(int)(K[6] >> 40) & 0xff] ^ C3[(int)(K[5] >> 32) & 0xff] ^ C4[(int)(K[4] >> 24) & 0xff] ^ C5[(int)(K[3] >> 16) & 0xff] ^ C6[(int)(K[2] >> 8) & 0xff] ^ C7[(int)(K[1] ) & 0xff] ^ rc[r]; L[1] = C0[(int)(K[1] >> 56) ] ^ C1[(int)(K[0] >> 48) & 0xff] ^ C2[(int)(K[7] >> 40) & 0xff] ^ C3[(int)(K[6] >> 32) & 0xff] ^ C4[(int)(K[5] >> 24) & 0xff] ^ C5[(int)(K[4] >> 16) & 0xff] ^ C6[(int)(K[3] >> 8) & 0xff] ^ C7[(int)(K[2] ) & 0xff]; L[2] = C0[(int)(K[2] >> 56) ] ^ C1[(int)(K[1] >> 48) & 0xff] ^ C2[(int)(K[0] >> 40) & 0xff] ^ C3[(int)(K[7] >> 32) & 0xff] ^ C4[(int)(K[6] >> 24) & 0xff] ^ C5[(int)(K[5] >> 16) & 0xff] ^ C6[(int)(K[4] >> 8) & 0xff] ^ C7[(int)(K[3] ) & 0xff]; L[3] = C0[(int)(K[3] >> 56) ] ^ C1[(int)(K[2] >> 48) & 0xff] ^ C2[(int)(K[1] >> 40) & 0xff] ^ C3[(int)(K[0] >> 32) & 0xff] ^ C4[(int)(K[7] >> 24) & 0xff] ^ C5[(int)(K[6] >> 16) & 0xff] ^ C6[(int)(K[5] >> 8) & 0xff] ^ C7[(int)(K[4] ) & 0xff]; L[4] = C0[(int)(K[4] >> 56) ] ^ C1[(int)(K[3] >> 48) & 0xff] ^ C2[(int)(K[2] >> 40) & 0xff] ^ C3[(int)(K[1] >> 32) & 0xff] ^ C4[(int)(K[0] >> 24) & 0xff] ^ C5[(int)(K[7] >> 16) & 0xff] ^ C6[(int)(K[6] >> 8) & 0xff] ^ C7[(int)(K[5] ) & 0xff]; L[5] = C0[(int)(K[5] >> 56) ] ^ C1[(int)(K[4] >> 48) & 0xff] ^ C2[(int)(K[3] >> 40) & 0xff] ^ C3[(int)(K[2] >> 32) & 0xff] ^ C4[(int)(K[1] >> 24) & 0xff] ^ C5[(int)(K[0] >> 16) & 0xff] ^ C6[(int)(K[7] >> 8) & 0xff] ^ C7[(int)(K[6] ) & 0xff]; L[6] = C0[(int)(K[6] >> 56) ] ^ C1[(int)(K[5] >> 48) & 0xff] ^ C2[(int)(K[4] >> 40) & 0xff] ^ C3[(int)(K[3] >> 32) & 0xff] ^ C4[(int)(K[2] >> 24) & 0xff] ^ C5[(int)(K[1] >> 16) & 0xff] ^ C6[(int)(K[0] >> 8) & 0xff] ^ C7[(int)(K[7] ) & 0xff]; L[7] = C0[(int)(K[7] >> 56) ] ^ C1[(int)(K[6] >> 48) & 0xff] ^ C2[(int)(K[5] >> 40) & 0xff] ^ C3[(int)(K[4] >> 32) & 0xff] ^ C4[(int)(K[3] >> 24) & 0xff] ^ C5[(int)(K[2] >> 16) & 0xff] ^ C6[(int)(K[1] >> 8) & 0xff] ^ C7[(int)(K[0] ) & 0xff]; K[0] = L[0]; K[1] = L[1]; K[2] = L[2]; K[3] = L[3]; K[4] = L[4]; K[5] = L[5]; K[6] = L[6]; K[7] = L[7]; L[0] = C0[(int)(state[0] >> 56) ] ^ C1[(int)(state[7] >> 48) & 0xff] ^ C2[(int)(state[6] >> 40) & 0xff] ^ C3[(int)(state[5] >> 32) & 0xff] ^ C4[(int)(state[4] >> 24) & 0xff] ^ C5[(int)(state[3] >> 16) & 0xff] ^ C6[(int)(state[2] >> 8) & 0xff] ^ C7[(int)(state[1] ) & 0xff] ^ K[0]; L[1] = C0[(int)(state[1] >> 56) ] ^ C1[(int)(state[0] >> 48) & 0xff] ^ C2[(int)(state[7] >> 40) & 0xff] ^ C3[(int)(state[6] >> 32) & 0xff] ^ C4[(int)(state[5] >> 24) & 0xff] ^ C5[(int)(state[4] >> 16) & 0xff] ^ C6[(int)(state[3] >> 8) & 0xff] ^ C7[(int)(state[2] ) & 0xff] ^ K[1]; L[2] = C0[(int)(state[2] >> 56) ] ^ C1[(int)(state[1] >> 48) & 0xff] ^ C2[(int)(state[0] >> 40) & 0xff] ^ C3[(int)(state[7] >> 32) & 0xff] ^ C4[(int)(state[6] >> 24) & 0xff] ^ C5[(int)(state[5] >> 16) & 0xff] ^ C6[(int)(state[4] >> 8) & 0xff] ^ C7[(int)(state[3] ) & 0xff] ^ K[2]; L[3] = C0[(int)(state[3] >> 56) ] ^ C1[(int)(state[2] >> 48) & 0xff] ^ C2[(int)(state[1] >> 40) & 0xff] ^ C3[(int)(state[0] >> 32) & 0xff] ^ C4[(int)(state[7] >> 24) & 0xff] ^ C5[(int)(state[6] >> 16) & 0xff] ^ C6[(int)(state[5] >> 8) & 0xff] ^ C7[(int)(state[4] ) & 0xff] ^ K[3]; L[4] = C0[(int)(state[4] >> 56) ] ^ C1[(int)(state[3] >> 48) & 0xff] ^ C2[(int)(state[2] >> 40) & 0xff] ^ C3[(int)(state[1] >> 32) & 0xff] ^ C4[(int)(state[0] >> 24) & 0xff] ^ C5[(int)(state[7] >> 16) & 0xff] ^ C6[(int)(state[6] >> 8) & 0xff] ^ C7[(int)(state[5] ) & 0xff] ^ K[4]; L[5] = C0[(int)(state[5] >> 56) ] ^ C1[(int)(state[4] >> 48) & 0xff] ^ C2[(int)(state[3] >> 40) & 0xff] ^ C3[(int)(state[2] >> 32) & 0xff] ^ C4[(int)(state[1] >> 24) & 0xff] ^ C5[(int)(state[0] >> 16) & 0xff] ^ C6[(int)(state[7] >> 8) & 0xff] ^ C7[(int)(state[6] ) & 0xff] ^ K[5]; L[6] = C0[(int)(state[6] >> 56) ] ^ C1[(int)(state[5] >> 48) & 0xff] ^ C2[(int)(state[4] >> 40) & 0xff] ^ C3[(int)(state[3] >> 32) & 0xff] ^ C4[(int)(state[2] >> 24) & 0xff] ^ C5[(int)(state[1] >> 16) & 0xff] ^ C6[(int)(state[0] >> 8) & 0xff] ^ C7[(int)(state[7] ) & 0xff] ^ K[6]; L[7] = C0[(int)(state[7] >> 56) ] ^ C1[(int)(state[6] >> 48) & 0xff] ^ C2[(int)(state[5] >> 40) & 0xff] ^ C3[(int)(state[4] >> 32) & 0xff] ^ C4[(int)(state[3] >> 24) & 0xff] ^ C5[(int)(state[2] >> 16) & 0xff] ^ C6[(int)(state[1] >> 8) & 0xff] ^ C7[(int)(state[0] ) & 0xff] ^ K[7]; state[0] = L[0]; state[1] = L[1]; state[2] = L[2]; state[3] = L[3]; state[4] = L[4]; state[5] = L[5]; state[6] = L[6]; state[7] = L[7]; } /* * apply the Miyaguchi-Preneel compression function: */ wctx->hash[0] ^= state[0] ^ block[0]; wctx->hash[1] ^= state[1] ^ block[1]; wctx->hash[2] ^= state[2] ^ block[2]; wctx->hash[3] ^= state[3] ^ block[3]; wctx->hash[4] ^= state[4] ^ block[4]; wctx->hash[5] ^= state[5] ^ block[5]; wctx->hash[6] ^= state[6] ^ block[6]; wctx->hash[7] ^= state[7] ^ block[7]; } static int wp512_init(struct shash_desc *desc) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; memset(wctx->bitLength, 0, 32); wctx->bufferBits = wctx->bufferPos = 0; wctx->buffer[0] = 0; for (i = 0; i < 8; i++) { wctx->hash[i] = 0L; } return 0; } static int wp512_update(struct shash_desc *desc, const u8 *source, unsigned int len) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int sourcePos = 0; unsigned int bits_len = len * 8; // convert to number of bits int sourceGap = (8 - ((int)bits_len & 7)) & 7; int bufferRem = wctx->bufferBits & 7; int i; u32 b, carry; u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; int bufferBits = wctx->bufferBits; int bufferPos = wctx->bufferPos; u64 value = bits_len; for (i = 31, carry = 0; i >= 0 && (carry != 0 || value != 0ULL); i--) { carry += bitLength[i] + ((u32)value & 0xff); bitLength[i] = (u8)carry; carry >>= 8; value >>= 8; } while (bits_len > 8) { b = ((source[sourcePos] << sourceGap) & 0xff) | ((source[sourcePos + 1] & 0xff) >> (8 - sourceGap)); buffer[bufferPos++] |= (u8)(b >> bufferRem); bufferBits += 8 - bufferRem; if (bufferBits == WP512_BLOCK_SIZE * 8) { wp512_process_buffer(wctx); bufferBits = bufferPos = 0; } buffer[bufferPos] = b << (8 - bufferRem); bufferBits += bufferRem; bits_len -= 8; sourcePos++; } if (bits_len > 0) { b = (source[sourcePos] << sourceGap) & 0xff; buffer[bufferPos] |= b >> bufferRem; } else { b = 0; } if (bufferRem + bits_len < 8) { bufferBits += bits_len; } else { bufferPos++; bufferBits += 8 - bufferRem; bits_len -= 8 - bufferRem; if (bufferBits == WP512_BLOCK_SIZE * 8) { wp512_process_buffer(wctx); bufferBits = bufferPos = 0; } buffer[bufferPos] = b << (8 - bufferRem); bufferBits += (int)bits_len; } wctx->bufferBits = bufferBits; wctx->bufferPos = bufferPos; return 0; } static int wp512_final(struct shash_desc *desc, u8 *out) { struct wp512_ctx *wctx = shash_desc_ctx(desc); int i; u8 *buffer = wctx->buffer; u8 *bitLength = wctx->bitLength; int bufferBits = wctx->bufferBits; int bufferPos = wctx->bufferPos; __be64 *digest = (__be64 *)out; buffer[bufferPos] |= 0x80U >> (bufferBits & 7); bufferPos++; if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) { if (bufferPos < WP512_BLOCK_SIZE) memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos); wp512_process_buffer(wctx); bufferPos = 0; } if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) memset(&buffer[bufferPos], 0, (WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos); bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES; memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES], bitLength, WP512_LENGTHBYTES); wp512_process_buffer(wctx); for (i = 0; i < WP512_DIGEST_SIZE/8; i++) digest[i] = cpu_to_be64(wctx->hash[i]); wctx->bufferBits = bufferBits; wctx->bufferPos = bufferPos; return 0; } static int wp384_final(struct shash_desc *desc, u8 *out) { u8 D[64]; wp512_final(desc, D); memcpy(out, D, WP384_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } static int wp256_final(struct shash_desc *desc, u8 *out) { u8 D[64]; wp512_final(desc, D); memcpy(out, D, WP256_DIGEST_SIZE); memzero_explicit(D, WP512_DIGEST_SIZE); return 0; } static struct shash_alg wp_algs[3] = { { .digestsize = WP512_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, .final = wp512_final, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp512", .cra_driver_name = "wp512-generic", .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = WP384_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, .final = wp384_final, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp384", .cra_driver_name = "wp384-generic", .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } }, { .digestsize = WP256_DIGEST_SIZE, .init = wp512_init, .update = wp512_update, .final = wp256_final, .descsize = sizeof(struct wp512_ctx), .base = { .cra_name = "wp256", .cra_driver_name = "wp256-generic", .cra_blocksize = WP512_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static int __init wp512_mod_init(void) { return crypto_register_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } static void __exit wp512_mod_fini(void) { crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs)); } MODULE_ALIAS_CRYPTO("wp512"); MODULE_ALIAS_CRYPTO("wp384"); MODULE_ALIAS_CRYPTO("wp256"); subsys_initcall(wp512_mod_init); module_exit(wp512_mod_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Whirlpool Message Digest Algorithm");
17812 786 118 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_PREEMPT_H #define __LINUX_PREEMPT_H /* * include/linux/preempt.h - macros for accessing and manipulating * preempt_count (used for kernel preemption, interrupt count, etc.) */ #include <linux/linkage.h> #include <linux/cleanup.h> #include <linux/types.h> /* * We put the hardirq and softirq counter into the preemption * counter. The bitmask has the following meaning: * * - bits 0-7 are the preemption count (max preemption depth: 256) * - bits 8-15 are the softirq count (max # of softirqs: 256) * * The hardirq count could in theory be the same as the number of * interrupts in the system, but we run all interrupt handlers with * interrupts disabled, so we cannot have nesting interrupts. Though * there are a few palaeontologic drivers which reenable interrupts in * the handler, so we need more than one bit here. * * PREEMPT_MASK: 0x000000ff * SOFTIRQ_MASK: 0x0000ff00 * HARDIRQ_MASK: 0x000f0000 * NMI_MASK: 0x00f00000 * PREEMPT_NEED_RESCHED: 0x80000000 */ #define PREEMPT_BITS 8 #define SOFTIRQ_BITS 8 #define HARDIRQ_BITS 4 #define NMI_BITS 4 #define PREEMPT_SHIFT 0 #define SOFTIRQ_SHIFT (PREEMPT_SHIFT + PREEMPT_BITS) #define HARDIRQ_SHIFT (SOFTIRQ_SHIFT + SOFTIRQ_BITS) #define NMI_SHIFT (HARDIRQ_SHIFT + HARDIRQ_BITS) #define __IRQ_MASK(x) ((1UL << (x))-1) #define PREEMPT_MASK (__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT) #define SOFTIRQ_MASK (__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT) #define HARDIRQ_MASK (__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT) #define NMI_MASK (__IRQ_MASK(NMI_BITS) << NMI_SHIFT) #define PREEMPT_OFFSET (1UL << PREEMPT_SHIFT) #define SOFTIRQ_OFFSET (1UL << SOFTIRQ_SHIFT) #define HARDIRQ_OFFSET (1UL << HARDIRQ_SHIFT) #define NMI_OFFSET (1UL << NMI_SHIFT) #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) #define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) /* * Disable preemption until the scheduler is running -- use an unconditional * value so that it also works on !PREEMPT_COUNT kernels. * * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). */ #define INIT_PREEMPT_COUNT PREEMPT_OFFSET /* * Initial preempt_count value; reflects the preempt_count schedule invariant * which states that during context switches: * * preempt_count() == 2*PREEMPT_DISABLE_OFFSET * * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. * Note: See finish_task_switch(). */ #define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) /* preempt_count() and related functions, depends on PREEMPT_NEED_RESCHED */ #include <asm/preempt.h> /** * interrupt_context_level - return interrupt context level * * Returns the current interrupt context level. * 0 - normal context * 1 - softirq context * 2 - hardirq context * 3 - NMI context */ static __always_inline unsigned char interrupt_context_level(void) { unsigned long pc = preempt_count(); unsigned char level = 0; level += !!(pc & (NMI_MASK)); level += !!(pc & (NMI_MASK | HARDIRQ_MASK)); level += !!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET)); return level; } /* * These macro definitions avoid redundant invocations of preempt_count() * because such invocations would result in redundant loads given that * preempt_count() is commonly implemented with READ_ONCE(). */ #define nmi_count() (preempt_count() & NMI_MASK) #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #ifdef CONFIG_PREEMPT_RT # define softirq_count() (current->softirq_disable_cnt & SOFTIRQ_MASK) # define irq_count() ((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | softirq_count()) #else # define softirq_count() (preempt_count() & SOFTIRQ_MASK) # define irq_count() (preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK)) #endif /* * Macros to retrieve the current execution context: * * in_nmi() - We're in NMI context * in_hardirq() - We're in hard IRQ context * in_serving_softirq() - We're in softirq context * in_task() - We're in task context */ #define in_nmi() (nmi_count()) #define in_hardirq() (hardirq_count()) #define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) #ifdef CONFIG_PREEMPT_RT # define in_task() (!((preempt_count() & (NMI_MASK | HARDIRQ_MASK)) | in_serving_softirq())) #else # define in_task() (!(preempt_count() & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) #endif /* * The following macros are deprecated and should not be used in new code: * in_irq() - Obsolete version of in_hardirq() * in_softirq() - We have BH disabled, or are processing softirqs * in_interrupt() - We're in NMI,IRQ,SoftIRQ context or have BH disabled */ #define in_irq() (hardirq_count()) #define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) /* * The preempt_count offset after preempt_disable(); */ #if defined(CONFIG_PREEMPT_COUNT) # define PREEMPT_DISABLE_OFFSET PREEMPT_OFFSET #else # define PREEMPT_DISABLE_OFFSET 0 #endif /* * The preempt_count offset after spin_lock() */ #if !defined(CONFIG_PREEMPT_RT) #define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET #else /* Locks on RT do not disable preemption */ #define PREEMPT_LOCK_OFFSET 0 #endif /* * The preempt_count offset needed for things like: * * spin_lock_bh() * * Which need to disable both preemption (CONFIG_PREEMPT_COUNT) and * softirqs, such that unlock sequences of: * * spin_unlock(); * local_bh_enable(); * * Work as expected. */ #define SOFTIRQ_LOCK_OFFSET (SOFTIRQ_DISABLE_OFFSET + PREEMPT_LOCK_OFFSET) /* * Are we running in atomic context? WARNING: this macro cannot * always detect atomic context; in particular, it cannot know about * held spinlocks in non-preemptible kernels. Thus it should not be * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ #define in_atomic() (preempt_count() != 0) /* * Check whether we were atomic before we did preempt_disable(): * (used by the scheduler) */ #define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET) #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE) extern void preempt_count_add(int val); extern void preempt_count_sub(int val); #define preempt_count_dec_and_test() \ ({ preempt_count_sub(1); should_resched(0); }) #else #define preempt_count_add(val) __preempt_count_add(val) #define preempt_count_sub(val) __preempt_count_sub(val) #define preempt_count_dec_and_test() __preempt_count_dec_and_test() #endif #define __preempt_count_inc() __preempt_count_add(1) #define __preempt_count_dec() __preempt_count_sub(1) #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ do { \ preempt_count_inc(); \ barrier(); \ } while (0) #define sched_preempt_enable_no_resched() \ do { \ barrier(); \ preempt_count_dec(); \ } while (0) #define preempt_enable_no_resched() sched_preempt_enable_no_resched() #define preemptible() (preempt_count() == 0 && !irqs_disabled()) #ifdef CONFIG_PREEMPTION #define preempt_enable() \ do { \ barrier(); \ if (unlikely(preempt_count_dec_and_test())) \ __preempt_schedule(); \ } while (0) #define preempt_enable_notrace() \ do { \ barrier(); \ if (unlikely(__preempt_count_dec_and_test())) \ __preempt_schedule_notrace(); \ } while (0) #define preempt_check_resched() \ do { \ if (should_resched(0)) \ __preempt_schedule(); \ } while (0) #else /* !CONFIG_PREEMPTION */ #define preempt_enable() \ do { \ barrier(); \ preempt_count_dec(); \ } while (0) #define preempt_enable_notrace() \ do { \ barrier(); \ __preempt_count_dec(); \ } while (0) #define preempt_check_resched() do { } while (0) #endif /* CONFIG_PREEMPTION */ #define preempt_disable_notrace() \ do { \ __preempt_count_inc(); \ barrier(); \ } while (0) #define preempt_enable_no_resched_notrace() \ do { \ barrier(); \ __preempt_count_dec(); \ } while (0) #else /* !CONFIG_PREEMPT_COUNT */ /* * Even if we don't have any preemption, we need preempt disable/enable * to be barriers, so that we don't have things like get_user/put_user * that can cause faults and scheduling migrate into our preempt-protected * region. */ #define preempt_disable() barrier() #define sched_preempt_enable_no_resched() barrier() #define preempt_enable_no_resched() barrier() #define preempt_enable() barrier() #define preempt_check_resched() do { } while (0) #define preempt_disable_notrace() barrier() #define preempt_enable_no_resched_notrace() barrier() #define preempt_enable_notrace() barrier() #define preemptible() 0 #endif /* CONFIG_PREEMPT_COUNT */ #ifdef MODULE /* * Modules have no business playing preemption tricks. */ #undef sched_preempt_enable_no_resched #undef preempt_enable_no_resched #undef preempt_enable_no_resched_notrace #undef preempt_check_resched #endif #define preempt_set_need_resched() \ do { \ set_preempt_need_resched(); \ } while (0) #define preempt_fold_need_resched() \ do { \ if (tif_need_resched()) \ set_preempt_need_resched(); \ } while (0) #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier; struct task_struct; /** * preempt_ops - notifiers called when a task is preempted and rescheduled * @sched_in: we're about to be rescheduled: * notifier: struct preempt_notifier for the task being scheduled * cpu: cpu we're scheduled on * @sched_out: we've just been preempted * notifier: struct preempt_notifier for the task being preempted * next: the task that's kicking us out * * Please note that sched_in and out are called under different * contexts. sched_out is called with rq lock held and irq disabled * while sched_in is called without rq lock and irq enabled. This * difference is intentional and depended upon by its users. */ struct preempt_ops { void (*sched_in)(struct preempt_notifier *notifier, int cpu); void (*sched_out)(struct preempt_notifier *notifier, struct task_struct *next); }; /** * preempt_notifier - key for installing preemption notifiers * @link: internal use * @ops: defines the notifier functions to be called * * Usually used in conjunction with container_of(). */ struct preempt_notifier { struct hlist_node link; struct preempt_ops *ops; }; void preempt_notifier_inc(void); void preempt_notifier_dec(void); void preempt_notifier_register(struct preempt_notifier *notifier); void preempt_notifier_unregister(struct preempt_notifier *notifier); static inline void preempt_notifier_init(struct preempt_notifier *notifier, struct preempt_ops *ops) { /* INIT_HLIST_NODE() open coded, to avoid dependency on list.h */ notifier->link.next = NULL; notifier->link.pprev = NULL; notifier->ops = ops; } #endif #ifdef CONFIG_SMP /* * Migrate-Disable and why it is undesired. * * When a preempted task becomes elegible to run under the ideal model (IOW it * becomes one of the M highest priority tasks), it might still have to wait * for the preemptee's migrate_disable() section to complete. Thereby suffering * a reduction in bandwidth in the exact duration of the migrate_disable() * section. * * Per this argument, the change from preempt_disable() to migrate_disable() * gets us: * * - a higher priority tasks gains reduced wake-up latency; with preempt_disable() * it would have had to wait for the lower priority task. * * - a lower priority tasks; which under preempt_disable() could've instantly * migrated away when another CPU becomes available, is now constrained * by the ability to push the higher priority task away, which might itself be * in a migrate_disable() section, reducing it's available bandwidth. * * IOW it trades latency / moves the interference term, but it stays in the * system, and as long as it remains unbounded, the system is not fully * deterministic. * * * The reason we have it anyway. * * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a * number of primitives into becoming preemptible, they would also allow * migration. This turns out to break a bunch of per-cpu usage. To this end, * all these primitives employ migirate_disable() to restore this implicit * assumption. * * This is a 'temporary' work-around at best. The correct solution is getting * rid of the above assumptions and reworking the code to employ explicit * per-cpu locking or short preempt-disable regions. * * The end goal must be to get rid of migrate_disable(), alternatively we need * a schedulability theory that does not depend on abritrary migration. * * * Notes on the implementation. * * The implementation is particularly tricky since existing code patterns * dictate neither migrate_disable() nor migrate_enable() is allowed to block. * This means that it cannot use cpus_read_lock() to serialize against hotplug, * nor can it easily migrate itself into a pending affinity mask change on * migrate_enable(). * * * Note: even non-work-conserving schedulers like semi-partitioned depends on * migration, so migrate_disable() is not only a problem for * work-conserving schedulers. * */ extern void migrate_disable(void); extern void migrate_enable(void); #else static inline void migrate_disable(void) { } static inline void migrate_enable(void) { } #endif /* CONFIG_SMP */ /** * preempt_disable_nested - Disable preemption inside a normally preempt disabled section * * Use for code which requires preemption protection inside a critical * section which has preemption disabled implicitly on non-PREEMPT_RT * enabled kernels, by e.g.: * - holding a spinlock/rwlock * - soft interrupt context * - regular interrupt handlers * * On PREEMPT_RT enabled kernels spinlock/rwlock held sections, soft * interrupt context and regular interrupt handlers are preemptible and * only prevent migration. preempt_disable_nested() ensures that preemption * is disabled for cases which require CPU local serialization even on * PREEMPT_RT. For non-PREEMPT_RT kernels this is a NOP. * * The use cases are code sequences which are not serialized by a * particular lock instance, e.g.: * - seqcount write side critical sections where the seqcount is not * associated to a particular lock and therefore the automatic * protection mechanism does not work. This prevents a live lock * against a preempting high priority reader. * - RMW per CPU variable updates like vmstat. */ /* Macro to avoid header recursion hell vs. lockdep */ #define preempt_disable_nested() \ do { \ if (IS_ENABLED(CONFIG_PREEMPT_RT)) \ preempt_disable(); \ else \ lockdep_assert_preemption_disabled(); \ } while (0) /** * preempt_enable_nested - Undo the effect of preempt_disable_nested() */ static __always_inline void preempt_enable_nested(void) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) preempt_enable(); } DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable()) DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace()) DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) #ifdef CONFIG_PREEMPT_DYNAMIC extern bool preempt_model_none(void); extern bool preempt_model_voluntary(void); extern bool preempt_model_full(void); extern bool preempt_model_lazy(void); #else static inline bool preempt_model_none(void) { return IS_ENABLED(CONFIG_PREEMPT_NONE); } static inline bool preempt_model_voluntary(void) { return IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY); } static inline bool preempt_model_full(void) { return IS_ENABLED(CONFIG_PREEMPT); } static inline bool preempt_model_lazy(void) { return IS_ENABLED(CONFIG_PREEMPT_LAZY); } #endif static inline bool preempt_model_rt(void) { return IS_ENABLED(CONFIG_PREEMPT_RT); } extern const char *preempt_model_str(void); /* * Does the preemption model allow non-cooperative preemption? * * For !CONFIG_PREEMPT_DYNAMIC kernels this is an exact match with * CONFIG_PREEMPTION; for CONFIG_PREEMPT_DYNAMIC this doesn't work as the * kernel is *built* with CONFIG_PREEMPTION=y but may run with e.g. the * PREEMPT_NONE model. */ static inline bool preempt_model_preemptible(void) { return preempt_model_full() || preempt_model_lazy() || preempt_model_rt(); } #endif /* __LINUX_PREEMPT_H */
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 // SPDX-License-Identifier: GPL-2.0+ /* * drivers/of/property.c - Procedures for accessing and interpreting * Devicetree properties and graphs. * * Initially created by copying procedures from drivers/of/base.c. This * file contains the OF property as well as the OF graph interface * functions. * * Paul Mackerras August 1996. * Copyright (C) 1996-2005 Paul Mackerras. * * Adapted for 64bit PowerPC by Dave Engebretsen and Peter Bergner. * {engebret|bergner}@us.ibm.com * * Adapted for sparc and sparc64 by David S. Miller davem@davemloft.net * * Reconsolidated from arch/x/kernel/prom.c by Stephen Rothwell and * Grant Likely. */ #define pr_fmt(fmt) "OF: " fmt #include <linux/of.h> #include <linux/of_address.h> #include <linux/of_device.h> #include <linux/of_graph.h> #include <linux/of_irq.h> #include <linux/string.h> #include <linux/moduleparam.h> #include "of_private.h" /** * of_property_read_bool - Find a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * * Search for a boolean property in a device node. Usage on non-boolean * property types is deprecated. * * Return: true if the property exists false otherwise. */ bool of_property_read_bool(const struct device_node *np, const char *propname) { struct property *prop = of_find_property(np, propname, NULL); /* * Boolean properties should not have a value. Testing for property * presence should either use of_property_present() or just read the * property value and check the returned error code. */ if (prop && prop->length) pr_warn("%pOF: Read of boolean property '%s' with a value.\n", np, propname); return prop ? true : false; } EXPORT_SYMBOL(of_property_read_bool); /** * of_graph_is_present() - check graph's presence * @node: pointer to device_node containing graph port * * Return: True if @node has a port or ports (with a port) sub-node, * false otherwise. */ bool of_graph_is_present(const struct device_node *node) { struct device_node *ports __free(device_node) = of_get_child_by_name(node, "ports"); if (ports) node = ports; struct device_node *port __free(device_node) = of_get_child_by_name(node, "port"); return !!port; } EXPORT_SYMBOL(of_graph_is_present); /** * of_property_count_elems_of_size - Count the number of elements in a property * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @elem_size: size of the individual element * * Search for a property in a device node and count the number of elements of * size elem_size in it. * * Return: The number of elements on sucess, -EINVAL if the property does not * exist or its length does not match a multiple of elem_size and -ENODATA if * the property does not have a value. */ int of_property_count_elems_of_size(const struct device_node *np, const char *propname, int elem_size) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; if (prop->length % elem_size != 0) { pr_err("size of %s in node %pOF is not a multiple of %d\n", propname, np, elem_size); return -EINVAL; } return prop->length / elem_size; } EXPORT_SYMBOL_GPL(of_property_count_elems_of_size); /** * of_find_property_value_of_size * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @min: minimum allowed length of property value * @max: maximum allowed length of property value (0 means unlimited) * @len: if !=NULL, actual length is written to here * * Search for a property in a device node and valid the requested size. * * Return: The property value on success, -EINVAL if the property does not * exist, -ENODATA if property does not have a value, and -EOVERFLOW if the * property data is too small or too large. * */ static void *of_find_property_value_of_size(const struct device_node *np, const char *propname, u32 min, u32 max, size_t *len) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return ERR_PTR(-EINVAL); if (!prop->value) return ERR_PTR(-ENODATA); if (prop->length < min) return ERR_PTR(-EOVERFLOW); if (max && prop->length > max) return ERR_PTR(-EOVERFLOW); if (len) *len = prop->length; return prop->value; } /** * of_property_read_u16_index - Find and read a u16 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u16 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 16-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u16 value can be decoded. */ int of_property_read_u16_index(const struct device_node *np, const char *propname, u32 index, u16 *out_value) { const u16 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be16_to_cpup(((__be16 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u16_index); /** * of_property_read_u32_index - Find and read a u32 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u32 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 32-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u32 value can be decoded. */ int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value) { const u32 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be32_to_cpup(((__be32 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u32_index); /** * of_property_read_u64_index - Find and read a u64 from a multi-value property. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @index: index of the u64 in the list of values * @out_value: pointer to return value, modified only if no error. * * Search for a property in a device node and read nth 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64_index(const struct device_node *np, const char *propname, u32 index, u64 *out_value) { const u64 *val = of_find_property_value_of_size(np, propname, ((index + 1) * sizeof(*out_value)), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = be64_to_cpup(((__be64 *)val) + index); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64_index); /** * of_property_read_variable_u8_array - Find and read an array of u8 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 8-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 8 <0x50 0x60 0x70>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u8 value can be decoded. */ int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const u8 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = *val++; return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u8_array); /** * of_property_read_variable_u16_array - Find and read an array of u16 from a * property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 16-bit value(s) from * it. * * dts entry of array should be like: * ``property = /bits/ 16 <0x5000 0x6000 0x7000>;`` * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u16 value can be decoded. */ int of_property_read_variable_u16_array(const struct device_node *np, const char *propname, u16 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be16 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be16_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u16_array); /** * of_property_read_variable_u32_array - Find and read an array of 32 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to return found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 32-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u32 value can be decoded. */ int of_property_read_variable_u32_array(const struct device_node *np, const char *propname, u32 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) *out_values++ = be32_to_cpup(val++); return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u32_array); /** * of_property_read_u64 - Find and read a 64 bit integer from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_value: pointer to return value, modified only if return value is 0. * * Search for a property in a device node and read a 64-bit value from * it. * * Return: 0 on success, -EINVAL if the property does not exist, * -ENODATA if property does not have a value, and -EOVERFLOW if the * property data isn't large enough. * * The out_value is modified only if a valid u64 value can be decoded. */ int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { const __be32 *val = of_find_property_value_of_size(np, propname, sizeof(*out_value), 0, NULL); if (IS_ERR(val)) return PTR_ERR(val); *out_value = of_read_number(val, 2); return 0; } EXPORT_SYMBOL_GPL(of_property_read_u64); /** * of_property_read_variable_u64_array - Find and read an array of 64 bit * integers from a property, with bounds on the minimum and maximum array size. * * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_values: pointer to found values. * @sz_min: minimum number of array elements to read * @sz_max: maximum number of array elements to read, if zero there is no * upper limit on the number of elements in the dts entry but only * sz_min will be read. * * Search for a property in a device node and read 64-bit value(s) from * it. * * Return: The number of elements read on success, -EINVAL if the property * does not exist, -ENODATA if property does not have a value, and -EOVERFLOW * if the property data is smaller than sz_min or longer than sz_max. * * The out_values is modified only if a valid u64 value can be decoded. */ int of_property_read_variable_u64_array(const struct device_node *np, const char *propname, u64 *out_values, size_t sz_min, size_t sz_max) { size_t sz, count; const __be32 *val = of_find_property_value_of_size(np, propname, (sz_min * sizeof(*out_values)), (sz_max * sizeof(*out_values)), &sz); if (IS_ERR(val)) return PTR_ERR(val); if (!sz_max) sz = sz_min; else sz /= sizeof(*out_values); count = sz; while (count--) { *out_values++ = of_read_number(val, 2); val += 2; } return sz; } EXPORT_SYMBOL_GPL(of_property_read_variable_u64_array); /** * of_property_read_string - Find and read a string from a property * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_string: pointer to null terminated return string, modified only if * return value is 0. * * Search for a property in a device tree node and retrieve a null * terminated string value (pointer to data, not a copy). * * Return: 0 on success, -EINVAL if the property does not exist, -ENODATA if * property does not have a value, and -EILSEQ if the string is not * null-terminated within the length of the property data. * * Note that the empty string "" has length of 1, thus -ENODATA cannot * be interpreted as an empty string. * * The out_string pointer is modified only if a valid string can be decoded. */ int of_property_read_string(const struct device_node *np, const char *propname, const char **out_string) { const struct property *prop = of_find_property(np, propname, NULL); if (!prop) return -EINVAL; if (!prop->length) return -ENODATA; if (strnlen(prop->value, prop->length) >= prop->length) return -EILSEQ; *out_string = prop->value; return 0; } EXPORT_SYMBOL_GPL(of_property_read_string); /** * of_property_match_string() - Find string in a list and return index * @np: pointer to the node containing the string list property * @propname: string list property name * @string: pointer to the string to search for in the string list * * Search for an exact match of string in a device node property which is a * string of lists. * * Return: the index of the first occurrence of the string on success, -EINVAL * if the property does not exist, -ENODATA if the property does not have a * value, and -EILSEQ if the string is not null-terminated within the length of * the property data. */ int of_property_match_string(const struct device_node *np, const char *propname, const char *string) { const struct property *prop = of_find_property(np, propname, NULL); size_t l; int i; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end; i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; pr_debug("comparing %s with %s\n", string, p); if (strcmp(string, p) == 0) return i; /* Found it; return index */ } return -ENODATA; } EXPORT_SYMBOL_GPL(of_property_match_string); /** * of_property_read_string_helper() - Utility helper for parsing string properties * @np: device node from which the property value is to be read. * @propname: name of the property to be searched. * @out_strs: output array of string pointers. * @sz: number of array elements to read. * @skip: Number of strings to skip over at beginning of list. * * Don't call this function directly. It is a utility helper for the * of_property_read_string*() family of functions. */ int of_property_read_string_helper(const struct device_node *np, const char *propname, const char **out_strs, size_t sz, int skip) { const struct property *prop = of_find_property(np, propname, NULL); int l = 0, i = 0; const char *p, *end; if (!prop) return -EINVAL; if (!prop->value) return -ENODATA; p = prop->value; end = p + prop->length; for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) { l = strnlen(p, end - p) + 1; if (p + l > end) return -EILSEQ; if (out_strs && i >= skip) *out_strs++ = p; } i -= skip; return i <= 0 ? -ENODATA : i; } EXPORT_SYMBOL_GPL(of_property_read_string_helper); const __be32 *of_prop_next_u32(const struct property *prop, const __be32 *cur, u32 *pu) { const void *curv = cur; if (!prop) return NULL; if (!cur) { curv = prop->value; goto out_val; } curv += sizeof(*cur); if (curv >= prop->value + prop->length) return NULL; out_val: *pu = be32_to_cpup(curv); return curv; } EXPORT_SYMBOL_GPL(of_prop_next_u32); const char *of_prop_next_string(const struct property *prop, const char *cur) { const void *curv = cur; if (!prop) return NULL; if (!cur) return prop->value; curv += strlen(cur) + 1; if (curv >= prop->value + prop->length) return NULL; return curv; } EXPORT_SYMBOL_GPL(of_prop_next_string); /** * of_graph_parse_endpoint() - parse common endpoint node properties * @node: pointer to endpoint device_node * @endpoint: pointer to the OF endpoint data structure * * The caller should hold a reference to @node. */ int of_graph_parse_endpoint(const struct device_node *node, struct of_endpoint *endpoint) { struct device_node *port_node __free(device_node) = of_get_parent(node); WARN_ONCE(!port_node, "%s(): endpoint %pOF has no parent node\n", __func__, node); memset(endpoint, 0, sizeof(*endpoint)); endpoint->local_node = node; /* * It doesn't matter whether the two calls below succeed. * If they don't then the default value 0 is used. */ of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); return 0; } EXPORT_SYMBOL(of_graph_parse_endpoint); /** * of_graph_get_port_by_id() - get the port matching a given id * @parent: pointer to the parent device node * @id: id of the port * * Return: A 'port' node pointer with refcount incremented. The caller * has to use of_node_put() on it when done. */ struct device_node *of_graph_get_port_by_id(struct device_node *parent, u32 id) { struct device_node *node __free(device_node) = of_get_child_by_name(parent, "ports"); if (node) parent = node; for_each_child_of_node_scoped(parent, port) { u32 port_id = 0; if (!of_node_name_eq(port, "port")) continue; of_property_read_u32(port, "reg", &port_id); if (id == port_id) return_ptr(port); } return NULL; } EXPORT_SYMBOL(of_graph_get_port_by_id); /** * of_graph_get_next_port() - get next port node. * @parent: pointer to the parent device node, or parent ports node * @prev: previous port node, or NULL to get first * * Parent device node can be used as @parent whether device node has ports node * or not. It will work same as ports@0 node. * * Return: A 'port' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_port(const struct device_node *parent, struct device_node *prev) { if (!parent) return NULL; if (!prev) { struct device_node *node __free(device_node) = of_get_child_by_name(parent, "ports"); if (node) parent = node; return of_get_child_by_name(parent, "port"); } do { prev = of_get_next_child(parent, prev); if (!prev) break; } while (!of_node_name_eq(prev, "port")); return prev; } EXPORT_SYMBOL(of_graph_get_next_port); /** * of_graph_get_next_port_endpoint() - get next endpoint node in port. * If it reached to end of the port, it will return NULL. * @port: pointer to the target port node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_port_endpoint(const struct device_node *port, struct device_node *prev) { while (1) { prev = of_get_next_child(port, prev); if (!prev) break; if (WARN(!of_node_name_eq(prev, "endpoint"), "non endpoint node is used (%pOF)", prev)) continue; break; } return prev; } EXPORT_SYMBOL(of_graph_get_next_port_endpoint); /** * of_graph_get_next_endpoint() - get next endpoint node * @parent: pointer to the parent device node * @prev: previous endpoint node, or NULL to get first * * Return: An 'endpoint' node pointer with refcount incremented. Refcount * of the passed @prev node is decremented. */ struct device_node *of_graph_get_next_endpoint(const struct device_node *parent, struct device_node *prev) { struct device_node *endpoint; struct device_node *port; if (!parent) return NULL; /* * Start by locating the port node. If no previous endpoint is specified * search for the first port node, otherwise get the previous endpoint * parent port node. */ if (!prev) { port = of_graph_get_next_port(parent, NULL); if (!port) { pr_debug("graph: no port node found in %pOF\n", parent); return NULL; } } else { port = of_get_parent(prev); if (WARN_ONCE(!port, "%s(): endpoint %pOF has no parent node\n", __func__, prev)) return NULL; } while (1) { /* * Now that we have a port node, get the next endpoint by * getting the next child. If the previous endpoint is NULL this * will return the first child. */ endpoint = of_graph_get_next_port_endpoint(port, prev); if (endpoint) { of_node_put(port); return endpoint; } /* No more endpoints under this port, try the next one. */ prev = NULL; port = of_graph_get_next_port(parent, port); if (!port) return NULL; } } EXPORT_SYMBOL(of_graph_get_next_endpoint); /** * of_graph_get_endpoint_by_regs() - get endpoint node of specific identifiers * @parent: pointer to the parent device node * @port_reg: identifier (value of reg property) of the parent port node * @reg: identifier (value of reg property) of the endpoint node * * Return: An 'endpoint' node pointer which is identified by reg and at the same * is the child of a port node identified by port_reg. reg and port_reg are * ignored when they are -1. Use of_node_put() on the pointer when done. */ struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { struct of_endpoint endpoint; struct device_node *node = NULL; for_each_endpoint_of_node(parent, node) { of_graph_parse_endpoint(node, &endpoint); if (((port_reg == -1) || (endpoint.port == port_reg)) && ((reg == -1) || (endpoint.id == reg))) return node; } return NULL; } EXPORT_SYMBOL(of_graph_get_endpoint_by_regs); /** * of_graph_get_remote_endpoint() - get remote endpoint node * @node: pointer to a local endpoint device_node * * Return: Remote endpoint node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_endpoint(const struct device_node *node) { /* Get remote endpoint node. */ return of_parse_phandle(node, "remote-endpoint", 0); } EXPORT_SYMBOL(of_graph_get_remote_endpoint); /** * of_graph_get_port_parent() - get port's parent node * @node: pointer to a local endpoint device_node * * Return: device node associated with endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_port_parent(struct device_node *node) { unsigned int depth; if (!node) return NULL; /* * Preserve usecount for passed in node as of_get_next_parent() * will do of_node_put() on it. */ of_node_get(node); /* Walk 3 levels up only if there is 'ports' node. */ for (depth = 3; depth && node; depth--) { node = of_get_next_parent(node); if (depth == 2 && !of_node_name_eq(node, "ports") && !of_node_name_eq(node, "in-ports") && !of_node_name_eq(node, "out-ports")) break; } return node; } EXPORT_SYMBOL(of_graph_get_port_parent); /** * of_graph_get_remote_port_parent() - get remote port's parent node * @node: pointer to a local endpoint device_node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port_parent( const struct device_node *node) { /* Get remote endpoint node. */ struct device_node *np __free(device_node) = of_graph_get_remote_endpoint(node); return of_graph_get_port_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port_parent); /** * of_graph_get_remote_port() - get remote port node * @node: pointer to a local endpoint device_node * * Return: Remote port node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_port(const struct device_node *node) { struct device_node *np; /* Get remote endpoint node. */ np = of_graph_get_remote_endpoint(node); if (!np) return NULL; return of_get_next_parent(np); } EXPORT_SYMBOL(of_graph_get_remote_port); /** * of_graph_get_endpoint_count() - get the number of endpoints in a device node * @np: parent device node containing ports and endpoints * * Return: count of endpoint of this device node */ unsigned int of_graph_get_endpoint_count(const struct device_node *np) { struct device_node *endpoint; unsigned int num = 0; for_each_endpoint_of_node(np, endpoint) num++; return num; } EXPORT_SYMBOL(of_graph_get_endpoint_count); /** * of_graph_get_port_count() - get the number of port in a device or ports node * @np: pointer to the device or ports node * * Return: count of port of this device or ports node */ unsigned int of_graph_get_port_count(struct device_node *np) { unsigned int num = 0; for_each_of_graph_port(np, port) num++; return num; } EXPORT_SYMBOL(of_graph_get_port_count); /** * of_graph_get_remote_node() - get remote parent device_node for given port/endpoint * @node: pointer to parent device_node containing graph port/endpoint * @port: identifier (value of reg property) of the parent port node * @endpoint: identifier (value of reg property) of the endpoint node * * Return: Remote device node associated with remote endpoint node linked * to @node. Use of_node_put() on it when done. */ struct device_node *of_graph_get_remote_node(const struct device_node *node, u32 port, u32 endpoint) { struct device_node *endpoint_node, *remote; endpoint_node = of_graph_get_endpoint_by_regs(node, port, endpoint); if (!endpoint_node) { pr_debug("no valid endpoint (%d, %d) for node %pOF\n", port, endpoint, node); return NULL; } remote = of_graph_get_remote_port_parent(endpoint_node); of_node_put(endpoint_node); if (!remote) { pr_debug("no valid remote node\n"); return NULL; } if (!of_device_is_available(remote)) { pr_debug("not available for remote node\n"); of_node_put(remote); return NULL; } return remote; } EXPORT_SYMBOL(of_graph_get_remote_node); static struct fwnode_handle *of_fwnode_get(struct fwnode_handle *fwnode) { return of_fwnode_handle(of_node_get(to_of_node(fwnode))); } static void of_fwnode_put(struct fwnode_handle *fwnode) { of_node_put(to_of_node(fwnode)); } static bool of_fwnode_device_is_available(const struct fwnode_handle *fwnode) { return of_device_is_available(to_of_node(fwnode)); } static bool of_fwnode_device_dma_supported(const struct fwnode_handle *fwnode) { return true; } static enum dev_dma_attr of_fwnode_device_get_dma_attr(const struct fwnode_handle *fwnode) { if (of_dma_is_coherent(to_of_node(fwnode))) return DEV_DMA_COHERENT; else return DEV_DMA_NON_COHERENT; } static bool of_fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname) { return of_property_present(to_of_node(fwnode), propname); } static bool of_fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { return of_property_read_bool(to_of_node(fwnode), propname); } static int of_fwnode_property_read_int_array(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval) { const struct device_node *node = to_of_node(fwnode); if (!val) return of_property_count_elems_of_size(node, propname, elem_size); switch (elem_size) { case sizeof(u8): return of_property_read_u8_array(node, propname, val, nval); case sizeof(u16): return of_property_read_u16_array(node, propname, val, nval); case sizeof(u32): return of_property_read_u32_array(node, propname, val, nval); case sizeof(u64): return of_property_read_u64_array(node, propname, val, nval); } return -ENXIO; } static int of_fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { const struct device_node *node = to_of_node(fwnode); return val ? of_property_read_string_array(node, propname, val, nval) : of_property_count_strings(node, propname); } static const char *of_fwnode_get_name(const struct fwnode_handle *fwnode) { return kbasename(to_of_node(fwnode)->full_name); } static const char *of_fwnode_get_name_prefix(const struct fwnode_handle *fwnode) { /* Root needs no prefix here (its name is "/"). */ if (!to_of_node(fwnode)->parent) return ""; return "/"; } static struct fwnode_handle * of_fwnode_get_parent(const struct fwnode_handle *fwnode) { return of_fwnode_handle(of_get_parent(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return of_fwnode_handle(of_get_next_available_child(to_of_node(fwnode), to_of_node(child))); } static struct fwnode_handle * of_fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { const struct device_node *node = to_of_node(fwnode); struct device_node *child; for_each_available_child_of_node(node, child) if (of_node_name_eq(child, childname)) return of_fwnode_handle(child); return NULL; } static int of_fwnode_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { struct of_phandle_args of_args; unsigned int i; int ret; if (nargs_prop) ret = of_parse_phandle_with_args(to_of_node(fwnode), prop, nargs_prop, index, &of_args); else ret = of_parse_phandle_with_fixed_args(to_of_node(fwnode), prop, nargs, index, &of_args); if (ret < 0) return ret; if (!args) { of_node_put(of_args.np); return 0; } args->nargs = of_args.args_count; args->fwnode = of_fwnode_handle(of_args.np); for (i = 0; i < NR_FWNODE_REFERENCE_ARGS; i++) args->args[i] = i < of_args.args_count ? of_args.args[i] : 0; return 0; } static struct fwnode_handle * of_fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { return of_fwnode_handle(of_graph_get_next_endpoint(to_of_node(fwnode), to_of_node(prev))); } static struct fwnode_handle * of_fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) { return of_fwnode_handle( of_graph_get_remote_endpoint(to_of_node(fwnode))); } static struct fwnode_handle * of_fwnode_graph_get_port_parent(struct fwnode_handle *fwnode) { struct device_node *np; /* Get the parent of the port */ np = of_get_parent(to_of_node(fwnode)); if (!np) return NULL; /* Is this the "ports" node? If not, it's the port parent. */ if (!of_node_name_eq(np, "ports")) return of_fwnode_handle(np); return of_fwnode_handle(of_get_next_parent(np)); } static int of_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint) { const struct device_node *node = to_of_node(fwnode); struct device_node *port_node __free(device_node) = of_get_parent(node); endpoint->local_fwnode = fwnode; of_property_read_u32(port_node, "reg", &endpoint->port); of_property_read_u32(node, "reg", &endpoint->id); return 0; } static const void * of_fwnode_device_get_match_data(const struct fwnode_handle *fwnode, const struct device *dev) { return of_device_get_match_data(dev); } static void of_link_to_phandle(struct device_node *con_np, struct device_node *sup_np, u8 flags) { struct device_node *tmp_np __free(device_node) = of_node_get(sup_np); /* Check that sup_np and its ancestors are available. */ while (tmp_np) { if (of_fwnode_handle(tmp_np)->dev) break; if (!of_device_is_available(tmp_np)) return; tmp_np = of_get_next_parent(tmp_np); } fwnode_link_add(of_fwnode_handle(con_np), of_fwnode_handle(sup_np), flags); } /** * parse_prop_cells - Property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @list_name: Property name that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed name * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_prop_cells(struct device_node *np, const char *prop_name, int index, const char *list_name, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp(prop_name, list_name)) return NULL; if (__of_parse_phandle_with_args(np, list_name, cells_name, 0, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SIMPLE_PROP(fname, name, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_prop_cells(np, prop_name, index, name, cells); \ } static int strcmp_suffix(const char *str, const char *suffix) { unsigned int len, suffix_len; len = strlen(str); suffix_len = strlen(suffix); if (len <= suffix_len) return -1; return strcmp(str + len - suffix_len, suffix); } /** * parse_suffix_prop_cells - Suffix property parsing function for suppliers * * @np: Pointer to device tree node containing a list * @prop_name: Name of property to be parsed. Expected to hold phandle values * @index: For properties holding a list of phandles, this is the index * into the list. * @suffix: Property suffix that is known to contain list of phandle(s) to * supplier(s) * @cells_name: property name that specifies phandles' arguments count * * This is a helper function to parse properties that have a known fixed suffix * and are a list of phandles and phandle arguments. * * Returns: * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ static struct device_node *parse_suffix_prop_cells(struct device_node *np, const char *prop_name, int index, const char *suffix, const char *cells_name) { struct of_phandle_args sup_args; if (strcmp_suffix(prop_name, suffix)) return NULL; if (of_parse_phandle_with_args(np, prop_name, cells_name, index, &sup_args)) return NULL; return sup_args.np; } #define DEFINE_SUFFIX_PROP(fname, suffix, cells) \ static struct device_node *parse_##fname(struct device_node *np, \ const char *prop_name, int index) \ { \ return parse_suffix_prop_cells(np, prop_name, index, suffix, cells); \ } /** * struct supplier_bindings - Property parsing functions for suppliers * * @parse_prop: function name * parse_prop() finds the node corresponding to a supplier phandle * parse_prop.np: Pointer to device node holding supplier phandle property * parse_prop.prop_name: Name of property holding a phandle value * parse_prop.index: For properties holding a list of phandles, this is the * index into the list * @get_con_dev: If the consumer node containing the property is never converted * to a struct device, implement this ops so fw_devlink can use it * to find the true consumer. * @optional: Describes whether a supplier is mandatory or not * @fwlink_flags: Optional fwnode link flags to use when creating a fwnode link * for this property. * * Returns: * parse_prop() return values are * - phandle node pointer with refcount incremented. Caller must of_node_put() * on it when done. * - NULL if no phandle found at index */ struct supplier_bindings { struct device_node *(*parse_prop)(struct device_node *np, const char *prop_name, int index); struct device_node *(*get_con_dev)(struct device_node *np); bool optional; u8 fwlink_flags; }; DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells") DEFINE_SIMPLE_PROP(interconnects, "interconnects", "#interconnect-cells") DEFINE_SIMPLE_PROP(iommus, "iommus", "#iommu-cells") DEFINE_SIMPLE_PROP(mboxes, "mboxes", "#mbox-cells") DEFINE_SIMPLE_PROP(io_channels, "io-channels", "#io-channel-cells") DEFINE_SIMPLE_PROP(io_backends, "io-backends", "#io-backend-cells") DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells") DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells") DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells") DEFINE_SIMPLE_PROP(extcon, "extcon", NULL) DEFINE_SIMPLE_PROP(nvmem_cells, "nvmem-cells", "#nvmem-cell-cells") DEFINE_SIMPLE_PROP(phys, "phys", "#phy-cells") DEFINE_SIMPLE_PROP(wakeup_parent, "wakeup-parent", NULL) DEFINE_SIMPLE_PROP(pinctrl0, "pinctrl-0", NULL) DEFINE_SIMPLE_PROP(pinctrl1, "pinctrl-1", NULL) DEFINE_SIMPLE_PROP(pinctrl2, "pinctrl-2", NULL) DEFINE_SIMPLE_PROP(pinctrl3, "pinctrl-3", NULL) DEFINE_SIMPLE_PROP(pinctrl4, "pinctrl-4", NULL) DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) DEFINE_SIMPLE_PROP(pwms, "pwms", "#pwm-cells") DEFINE_SIMPLE_PROP(resets, "resets", "#reset-cells") DEFINE_SIMPLE_PROP(leds, "leds", NULL) DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SIMPLE_PROP(panel, "panel", NULL) DEFINE_SIMPLE_PROP(msi_parent, "msi-parent", "#msi-cells") DEFINE_SIMPLE_PROP(post_init_providers, "post-init-providers", NULL) DEFINE_SIMPLE_PROP(access_controllers, "access-controllers", "#access-controller-cells") DEFINE_SIMPLE_PROP(pses, "pses", "#pse-cells") DEFINE_SIMPLE_PROP(power_supplies, "power-supplies", NULL) DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") static struct device_node *parse_gpios(struct device_node *np, const char *prop_name, int index) { if (!strcmp_suffix(prop_name, ",nr-gpios")) return NULL; return parse_suffix_prop_cells(np, prop_name, index, "-gpios", "#gpio-cells"); } static struct device_node *parse_iommu_maps(struct device_node *np, const char *prop_name, int index) { if (strcmp(prop_name, "iommu-map")) return NULL; return of_parse_phandle(np, prop_name, (index * 4) + 1); } static struct device_node *parse_gpio_compat(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (strcmp(prop_name, "gpio") && strcmp(prop_name, "gpios")) return NULL; /* * Ignore node with gpio-hog property since its gpios are all provided * by its parent. */ if (of_property_read_bool(np, "gpio-hog")) return NULL; if (of_parse_phandle_with_args(np, prop_name, "#gpio-cells", index, &sup_args)) return NULL; return sup_args.np; } static struct device_node *parse_interrupts(struct device_node *np, const char *prop_name, int index) { struct of_phandle_args sup_args; if (!IS_ENABLED(CONFIG_OF_IRQ) || IS_ENABLED(CONFIG_PPC)) return NULL; if (strcmp(prop_name, "interrupts") && strcmp(prop_name, "interrupts-extended")) return NULL; return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np; } static struct device_node *parse_interrupt_map(struct device_node *np, const char *prop_name, int index) { const __be32 *imap, *imap_end; struct of_phandle_args sup_args; u32 addrcells, intcells; int imaplen; if (!IS_ENABLED(CONFIG_OF_IRQ)) return NULL; if (strcmp(prop_name, "interrupt-map")) return NULL; if (of_property_read_u32(np, "#interrupt-cells", &intcells)) return NULL; addrcells = of_bus_n_addr_cells(np); imap = of_get_property(np, "interrupt-map", &imaplen); if (!imap) return NULL; imaplen /= sizeof(*imap); imap_end = imap + imaplen; for (int i = 0; imap + addrcells + intcells + 1 < imap_end; i++) { imap += addrcells + intcells; imap = of_irq_parse_imap_parent(imap, imap_end - imap, &sup_args); if (!imap) return NULL; if (i == index) return sup_args.np; of_node_put(sup_args.np); } return NULL; } static struct device_node *parse_remote_endpoint(struct device_node *np, const char *prop_name, int index) { /* Return NULL for index > 0 to signify end of remote-endpoints. */ if (index > 0 || strcmp(prop_name, "remote-endpoint")) return NULL; return of_graph_get_remote_port_parent(np); } static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_clocks, }, { .parse_prop = parse_interconnects, }, { .parse_prop = parse_iommus, .optional = true, }, { .parse_prop = parse_iommu_maps, .optional = true, }, { .parse_prop = parse_mboxes, }, { .parse_prop = parse_io_channels, }, { .parse_prop = parse_io_backends, }, { .parse_prop = parse_dmas, .optional = true, }, { .parse_prop = parse_power_domains, }, { .parse_prop = parse_hwlocks, }, { .parse_prop = parse_extcon, }, { .parse_prop = parse_nvmem_cells, }, { .parse_prop = parse_phys, }, { .parse_prop = parse_wakeup_parent, }, { .parse_prop = parse_pinctrl0, }, { .parse_prop = parse_pinctrl1, }, { .parse_prop = parse_pinctrl2, }, { .parse_prop = parse_pinctrl3, }, { .parse_prop = parse_pinctrl4, }, { .parse_prop = parse_pinctrl5, }, { .parse_prop = parse_pinctrl6, }, { .parse_prop = parse_pinctrl7, }, { .parse_prop = parse_pinctrl8, }, { .parse_prop = parse_remote_endpoint, .get_con_dev = of_graph_get_port_parent, }, { .parse_prop = parse_pwms, }, { .parse_prop = parse_resets, }, { .parse_prop = parse_leds, }, { .parse_prop = parse_backlight, }, { .parse_prop = parse_panel, }, { .parse_prop = parse_msi_parent, }, { .parse_prop = parse_pses, }, { .parse_prop = parse_power_supplies, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_interrupt_map, }, { .parse_prop = parse_access_controllers, }, { .parse_prop = parse_regulators, }, { .parse_prop = parse_gpio, }, { .parse_prop = parse_gpios, }, { .parse_prop = parse_post_init_providers, .fwlink_flags = FWLINK_FLAG_IGNORE, }, {} }; /** * of_link_property - Create device links to suppliers listed in a property * @con_np: The consumer device tree node which contains the property * @prop_name: Name of property to be parsed * * This function checks if the property @prop_name that is present in the * @con_np device tree node is one of the known common device tree bindings * that list phandles to suppliers. If @prop_name isn't one, this function * doesn't do anything. * * If @prop_name is one, this function attempts to create fwnode links from the * consumer device tree node @con_np to all the suppliers device tree nodes * listed in @prop_name. * * Any failed attempt to create a fwnode link will NOT result in an immediate * return. of_link_property() must create links to all the available supplier * device tree nodes even when attempts to create a link to one or more * suppliers fail. */ static int of_link_property(struct device_node *con_np, const char *prop_name) { struct device_node *phandle; const struct supplier_bindings *s = of_supplier_bindings; unsigned int i = 0; bool matched = false; /* Do not stop at first failed link, link all available suppliers. */ while (!matched && s->parse_prop) { if (s->optional && !fw_devlink_is_strict()) { s++; continue; } while ((phandle = s->parse_prop(con_np, prop_name, i))) { struct device_node *con_dev_np __free(device_node) = s->get_con_dev ? s->get_con_dev(con_np) : of_node_get(con_np); matched = true; i++; of_link_to_phandle(con_dev_np, phandle, s->fwlink_flags); of_node_put(phandle); } s++; } return 0; } static void __iomem *of_fwnode_iomap(struct fwnode_handle *fwnode, int index) { #ifdef CONFIG_OF_ADDRESS return of_iomap(to_of_node(fwnode), index); #else return NULL; #endif } static int of_fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index) { return of_irq_get(to_of_node(fwnode), index); } static int of_fwnode_add_links(struct fwnode_handle *fwnode) { const struct property *p; struct device_node *con_np = to_of_node(fwnode); if (IS_ENABLED(CONFIG_X86)) return 0; if (!con_np) return -EINVAL; for_each_property_of_node(con_np, p) of_link_property(con_np, p->name); return 0; } const struct fwnode_operations of_fwnode_ops = { .get = of_fwnode_get, .put = of_fwnode_put, .device_is_available = of_fwnode_device_is_available, .device_get_match_data = of_fwnode_device_get_match_data, .device_dma_supported = of_fwnode_device_dma_supported, .device_get_dma_attr = of_fwnode_device_get_dma_attr, .property_present = of_fwnode_property_present, .property_read_bool = of_fwnode_property_read_bool, .property_read_int_array = of_fwnode_property_read_int_array, .property_read_string_array = of_fwnode_property_read_string_array, .get_name = of_fwnode_get_name, .get_name_prefix = of_fwnode_get_name_prefix, .get_parent = of_fwnode_get_parent, .get_next_child_node = of_fwnode_get_next_child_node, .get_named_child_node = of_fwnode_get_named_child_node, .get_reference_args = of_fwnode_get_reference_args, .graph_get_next_endpoint = of_fwnode_graph_get_next_endpoint, .graph_get_remote_endpoint = of_fwnode_graph_get_remote_endpoint, .graph_get_port_parent = of_fwnode_graph_get_port_parent, .graph_parse_endpoint = of_fwnode_graph_parse_endpoint, .iomap = of_fwnode_iomap, .irq_get = of_fwnode_irq_get, .add_links = of_fwnode_add_links, }; EXPORT_SYMBOL_GPL(of_fwnode_ops);
4 4 1 3 1 3 2 2 1 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 // SPDX-License-Identifier: GPL-2.0-only /* * iptables module to match inet_addr_type() of an ip. * * Copyright (c) 2004 Patrick McHardy <kaber@trash.net> * (C) 2007 Laszlo Attila Toth <panther@balabit.hu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/ip.h> #include <net/route.h> #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/ip6_fib.h> #endif #include <linux/netfilter_ipv6.h> #include <linux/netfilter/xt_addrtype.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: address type match"); MODULE_ALIAS("ipt_addrtype"); MODULE_ALIAS("ip6t_addrtype"); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; int route_err; memset(&flow, 0, sizeof(flow)); flow.daddr = *addr; if (dev) flow.flowi6_oif = dev->ifindex; if (dev && (mask & XT_ADDRTYPE_LOCAL)) { if (nf_ipv6_chk_addr(net, addr, dev, true)) ret = XT_ADDRTYPE_LOCAL; } route_err = nf_ip6_route(net, (struct dst_entry **)&rt, flowi6_to_flowi(&flow), false); if (route_err) return XT_ADDRTYPE_UNREACHABLE; if (rt->rt6i_flags & RTF_REJECT) ret = XT_ADDRTYPE_UNREACHABLE; if (dev == NULL && rt->rt6i_flags & RTF_LOCAL) ret |= XT_ADDRTYPE_LOCAL; if (ipv6_anycast_destination((struct dst_entry *)rt, addr)) ret |= XT_ADDRTYPE_ANYCAST; dst_release(&rt->dst); return ret; } static bool match_type6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { int addr_type = ipv6_addr_type(addr); if ((mask & XT_ADDRTYPE_MULTICAST) && !(addr_type & IPV6_ADDR_MULTICAST)) return false; if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST)) return false; if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY) return false; if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST | XT_ADDRTYPE_UNREACHABLE) & mask) return !!(mask & match_lookup_rt6(net, dev, addr, mask)); return true; } static bool addrtype_mt6(struct net *net, const struct net_device *dev, const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info) { const struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = true; if (info->source) ret &= match_type6(net, dev, &iph->saddr, info->source) ^ (info->flags & XT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type6(net, dev, &iph->daddr, info->dest) ^ !!(info->flags & XT_ADDRTYPE_INVERT_DEST); return ret; } #endif static inline bool match_type(struct net *net, const struct net_device *dev, __be32 addr, u_int16_t mask) { return !!(mask & (1 << inet_dev_addr_type(net, dev, addr))); } static bool addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); const struct xt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; if (info->source) ret &= match_type(net, NULL, iph->saddr, info->source) ^ info->invert_source; if (info->dest) ret &= match_type(net, NULL, iph->daddr, info->dest) ^ info->invert_dest; return ret; } static bool addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); const struct xt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph; const struct net_device *dev = NULL; bool ret = true; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) dev = xt_in(par); else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) dev = xt_out(par); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (xt_family(par) == NFPROTO_IPV6) return addrtype_mt6(net, dev, skb, info); #endif iph = ip_hdr(skb); if (info->source) ret &= match_type(net, dev, iph->saddr, info->source) ^ (info->flags & XT_ADDRTYPE_INVERT_SOURCE); if (ret && info->dest) ret &= match_type(net, dev, iph->daddr, info->dest) ^ !!(info->flags & XT_ADDRTYPE_INVERT_DEST); return ret; } static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) goto err; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { errmsg = "output interface limitation not valid in PREROUTING and INPUT"; goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT"; goto err; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { errmsg = "ipv6 BLACKHOLE matching not supported"; goto err; } if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported"; goto err; } if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { errmsg = "ipv6 does not support BROADCAST matching"; goto err; } } #endif return 0; err: pr_info_ratelimited("%s\n", errmsg); return -EINVAL; } static struct xt_match addrtype_mt_reg[] __read_mostly = { { .name = "addrtype", .family = NFPROTO_IPV4, .match = addrtype_mt_v0, .matchsize = sizeof(struct xt_addrtype_info), .me = THIS_MODULE }, { .name = "addrtype", .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE }, #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { .name = "addrtype", .family = NFPROTO_IPV6, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, .matchsize = sizeof(struct xt_addrtype_info_v1), .me = THIS_MODULE }, #endif }; static int __init addrtype_mt_init(void) { return xt_register_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); } static void __exit addrtype_mt_exit(void) { xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg)); } module_init(addrtype_mt_init); module_exit(addrtype_mt_exit);
2 46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the RAW-IP module. * * Version: @(#)raw.h 1.0.2 05/07/93 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _RAW_H #define _RAW_H #include <net/inet_sock.h> #include <net/protocol.h> #include <net/netns/hash.h> #include <linux/hash.h> #include <linux/icmp.h> extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; bool raw_v4_match(struct net *net, const struct sock *sk, unsigned short num, __be32 raddr, __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); int raw_local_deliver(struct sk_buff *, int); int raw_rcv(struct sock *, struct sk_buff *); #define RAW_HTABLE_LOG 8 #define RAW_HTABLE_SIZE (1U << RAW_HTABLE_LOG) struct raw_hashinfo { spinlock_t lock; struct hlist_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned; }; static inline u32 raw_hashfunc(const struct net *net, u32 proto) { return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG); } static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) { int i; spin_lock_init(&hashinfo->lock); for (i = 0; i < RAW_HTABLE_SIZE; i++) INIT_HLIST_HEAD(&hashinfo->ht[i]); } #ifdef CONFIG_PROC_FS int raw_proc_init(void); void raw_proc_exit(void); struct raw_iter_state { struct seq_net_private p; int bucket; }; static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) { return seq->private; } void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); #endif int raw_hash_sk(struct sock *sk); void raw_unhash_sk(struct sock *sk); void raw_init(void); struct raw_sock { /* inet_sock has to be the first member */ struct inet_sock inet; struct icmp_filter filter; u32 ipmr_table; }; #define raw_sk(ptr) container_of_const(ptr, struct raw_sock, inet.sk) static inline bool raw_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif) { #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept), bound_dev_if, dif, sdif); #else return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); #endif } #endif /* _RAW_H */
28 28 28 28 28 28 27 27 28 27 27 28 28 28 28 76 15 16 16 12 12 4 71 73 75 48 75 40 43 43 41 42 40 43 6 6 2 2 2 2 2 28 28 28 28 28 27 28 28 28 28 28 28 27 27 28 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 28 27 28 28 27 28 27 28 28 28 28 28 28 28 28 28 28 28 29 29 29 28 28 29 28 27 2 2 28 29 29 29 28 29 1 27 28 27 28 28 28 27 27 28 28 27 28 2 2 1 1 1 1 1 1 14 14 14 14 3 31 31 31 2 31 26 26 24 26 26 26 31 26 26 26 26 26 31 31 31 31 31 30 30 30 30 30 29 30 30 30 29 30 30 30 30 30 30 30 31 31 31 31 31 31 31 30 30 30 31 29 31 31 31 31 31 31 30 31 31 31 31 31 31 30 30 31 31 31 31 31 30 30 30 31 30 30 31 30 31 31 31 1 30 1 30 31 30 30 31 31 31 3 31 31 31 31 31 3 30 3 31 30 31 31 31 30 34 34 32 34 33 29 3 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright (C) 2015 - 2017 Intel Deutschland GmbH * Copyright (C) 2018-2024 Intel Corporation */ #include <linux/module.h> #include <linux/init.h> #include <linux/etherdevice.h> #include <linux/netdevice.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/skbuff.h> #include <linux/if_arp.h> #include <linux/timer.h> #include <linux/rtnetlink.h> #include <net/codel.h> #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" #include "rate.h" #include "sta_info.h" #include "debugfs_sta.h" #include "mesh.h" #include "wme.h" /** * DOC: STA information lifetime rules * * STA info structures (&struct sta_info) are managed in a hash table * for faster lookup and a list for iteration. They are managed using * RCU, i.e. access to the list and hash table is protected by RCU. * * Upon allocating a STA info structure with sta_info_alloc(), the caller * owns that structure. It must then insert it into the hash table using * either sta_info_insert() or sta_info_insert_rcu(); only in the latter * case (which acquires an rcu read section but must not be called from * within one) will the pointer still be valid after the call. Note that * the caller may not do much with the STA info before inserting it; in * particular, it may not start any mesh peer link management or add * encryption keys. * * When the insertion fails (sta_info_insert()) returns non-zero), the * structure will have been freed by sta_info_insert()! * * Station entries are added by mac80211 when you establish a link with a * peer. This means different things for the different type of interfaces * we support. For a regular station this mean we add the AP sta when we * receive an association response from the AP. For IBSS this occurs when * get to know about a peer on the same IBSS. For WDS we add the sta for * the peer immediately upon device open. When using AP mode we add stations * for each respective station upon request from userspace through nl80211. * * In order to remove a STA info structure, various sta_info_destroy_*() * calls are available. * * There is no concept of ownership on a STA entry; each structure is * owned by the global hash table/list until it is removed. All users of * the structure need to be RCU protected so that the structure won't be * freed before they are done using it. */ struct sta_link_alloc { struct link_sta_info info; struct ieee80211_link_sta sta; struct rcu_head rcu_head; }; static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), .key_len = ETH_ALEN, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; static const struct rhashtable_params link_sta_rht_params = { .nelem_hint = 3, /* start small */ .automatic_shrinking = true, .head_offset = offsetof(struct link_sta_info, link_hash_node), .key_offset = offsetof(struct link_sta_info, addr), .key_len = ETH_ALEN, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { return rhltable_remove(&local->sta_hash, &sta->hash_node, sta_rht_params); } static int link_sta_info_hash_add(struct ieee80211_local *local, struct link_sta_info *link_sta) { lockdep_assert_wiphy(local->hw.wiphy); return rhltable_insert(&local->link_sta_hash, &link_sta->link_hash_node, link_sta_rht_params); } static int link_sta_info_hash_del(struct ieee80211_local *local, struct link_sta_info *link_sta) { lockdep_assert_wiphy(local->hw.wiphy); return rhltable_remove(&local->link_sta_hash, &link_sta->link_hash_node, link_sta_rht_params); } void ieee80211_purge_sta_txqs(struct sta_info *sta) { struct ieee80211_local *local = sta->sdata->local; int i; for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { struct txq_info *txqi; if (!sta->sta.txq[i]) continue; txqi = to_txq_info(sta->sta.txq[i]); ieee80211_txq_purge(local, txqi); } } static void __cleanup_single_sta(struct sta_info *sta) { int ac, i; struct tid_ampdu_tx *tid_tx; struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ps_data *ps; if (test_sta_flag(sta, WLAN_STA_PS_STA) || test_sta_flag(sta, WLAN_STA_PS_DRIVER) || test_sta_flag(sta, WLAN_STA_PS_DELIVER)) { if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) ps = &sdata->bss->ps; else if (ieee80211_vif_is_mesh(&sdata->vif)) ps = &sdata->u.mesh.ps; else return; clear_sta_flag(sta, WLAN_STA_PS_STA); clear_sta_flag(sta, WLAN_STA_PS_DRIVER); clear_sta_flag(sta, WLAN_STA_PS_DELIVER); atomic_dec(&ps->num_sta_ps); } ieee80211_purge_sta_txqs(sta); for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]); ieee80211_purge_tx_queue(&local->hw, &sta->tx_filtered[ac]); } if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_sta_cleanup(sta); cancel_work_sync(&sta->drv_deliver_wk); /* * Destroy aggregation state here. It would be nice to wait for the * driver to finish aggregation stop and then clean up, but for now * drivers have to handle aggregation stop being requested, followed * directly by station destruction. */ for (i = 0; i < IEEE80211_NUM_TIDS; i++) { kfree(sta->ampdu_mlme.tid_start_tx[i]); tid_tx = rcu_dereference_raw(sta->ampdu_mlme.tid_tx[i]); if (!tid_tx) continue; ieee80211_purge_tx_queue(&local->hw, &tid_tx->pending); kfree(tid_tx); } } static void cleanup_single_sta(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; __cleanup_single_sta(sta); sta_info_free(local, sta); } struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, const u8 *addr) { return rhltable_lookup(&local->sta_hash, addr, sta_rht_params); } /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; struct rhlist_head *tmp; struct sta_info *sta; rcu_read_lock(); for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata) { rcu_read_unlock(); /* this is safe as the caller must already hold * another rcu read section or the mutex */ return sta; } } rcu_read_unlock(); return NULL; } /* * Get sta info either from the specified interface * or from one of its vlans */ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; struct rhlist_head *tmp; struct sta_info *sta; rcu_read_lock(); for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); /* this is safe as the caller must already hold * another rcu read section or the mutex */ return sta; } } rcu_read_unlock(); return NULL; } struct rhlist_head *link_sta_info_hash_lookup(struct ieee80211_local *local, const u8 *addr) { return rhltable_lookup(&local->link_sta_hash, addr, link_sta_rht_params); } struct link_sta_info * link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; struct rhlist_head *tmp; struct link_sta_info *link_sta; rcu_read_lock(); for_each_link_sta_info(local, addr, link_sta, tmp) { struct sta_info *sta = link_sta->sta; if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); /* this is safe as the caller must already hold * another rcu read section or the mutex */ return link_sta; } } rcu_read_unlock(); return NULL; } struct ieee80211_sta * ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, const u8 *addr, const u8 *localaddr, unsigned int *link_id) { struct ieee80211_local *local = hw_to_local(hw); struct link_sta_info *link_sta; struct rhlist_head *tmp; for_each_link_sta_info(local, addr, link_sta, tmp) { struct sta_info *sta = link_sta->sta; struct ieee80211_link_data *link; u8 _link_id = link_sta->link_id; if (!localaddr) { if (link_id) *link_id = _link_id; return &sta->sta; } link = rcu_dereference(sta->sdata->link[_link_id]); if (!link) continue; if (memcmp(link->conf->addr, localaddr, ETH_ALEN)) continue; if (link_id) *link_id = _link_id; return &sta->sta; } return NULL; } EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_link_addrs); struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr) { struct rhlist_head *tmp; struct sta_info *sta; for_each_sta_info(local, sta_addr, sta, tmp) { if (ether_addr_equal(vif_addr, sta->sdata->vif.addr)) return sta; } return NULL; } struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, int idx) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; int i = 0; list_for_each_entry_rcu(sta, &local->sta_list, list, lockdep_is_held(&local->hw.wiphy->mtx)) { if (sdata != sta->sdata) continue; if (i < idx) { ++i; continue; } return sta; } return NULL; } static void sta_info_free_link(struct link_sta_info *link_sta) { free_percpu(link_sta->pcpu_rx_stats); } static void sta_remove_link(struct sta_info *sta, unsigned int link_id, bool unhash) { struct sta_link_alloc *alloc = NULL; struct link_sta_info *link_sta; lockdep_assert_wiphy(sta->local->hw.wiphy); link_sta = rcu_access_pointer(sta->link[link_id]); if (WARN_ON(!link_sta)) return; if (unhash) link_sta_info_hash_del(sta->local, link_sta); if (test_sta_flag(sta, WLAN_STA_INSERTED)) ieee80211_link_sta_debugfs_remove(link_sta); if (link_sta != &sta->deflink) alloc = container_of(link_sta, typeof(*alloc), info); sta->sta.valid_links &= ~BIT(link_id); RCU_INIT_POINTER(sta->link[link_id], NULL); RCU_INIT_POINTER(sta->sta.link[link_id], NULL); if (alloc) { sta_info_free_link(&alloc->info); kfree_rcu(alloc, rcu_head); } ieee80211_sta_recalc_aggregates(&sta->sta); } /** * sta_info_free - free STA * * @local: pointer to the global information * @sta: STA info to free * * This function must undo everything done by sta_info_alloc() * that may happen before sta_info_insert(). It may only be * called when sta_info_insert() has not been attempted (and * if that fails, the station is freed anyway.) */ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) { int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { struct link_sta_info *link_sta; link_sta = rcu_access_pointer(sta->link[i]); if (!link_sta) continue; sta_remove_link(sta, i, false); } /* * If we had used sta_info_pre_move_state() then we might not * have gone through the state transitions down again, so do * it here now (and warn if it's inserted). * * This will clear state such as fast TX/RX that may have been * allocated during state transitions. */ while (sta->sta_state > IEEE80211_STA_NONE) { int ret; WARN_ON_ONCE(test_sta_flag(sta, WLAN_STA_INSERTED)); ret = sta_info_move_state(sta, sta->sta_state - 1); if (WARN_ONCE(ret, "sta_info_move_state() returned %d\n", ret)) break; } if (sta->rate_ctrl) rate_control_free_sta(sta); sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); kfree(to_txq_info(sta->sta.txq[0])); kfree(rcu_dereference_raw(sta->sta.rates)); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif sta_info_free_link(&sta->deflink); kfree(sta); } static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { return rhltable_insert(&local->sta_hash, &sta->hash_node, sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) { struct sta_info *sta; sta = container_of(wk, struct sta_info, drv_deliver_wk); if (sta->dead) return; local_bh_disable(); if (!test_sta_flag(sta, WLAN_STA_PS_STA)) ieee80211_sta_ps_deliver_wakeup(sta); else if (test_and_clear_sta_flag(sta, WLAN_STA_PSPOLL)) ieee80211_sta_ps_deliver_poll_response(sta); else if (test_and_clear_sta_flag(sta, WLAN_STA_UAPSD)) ieee80211_sta_ps_deliver_uapsd(sta); local_bh_enable(); } static int sta_prepare_rate_control(struct ieee80211_local *local, struct sta_info *sta, gfp_t gfp) { if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) return 0; sta->rate_ctrl = local->rate_ctrl; sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, sta, gfp); if (!sta->rate_ctrl_priv) return -ENOMEM; return 0; } static int sta_info_alloc_link(struct ieee80211_local *local, struct link_sta_info *link_info, gfp_t gfp) { struct ieee80211_hw *hw = &local->hw; int i; if (ieee80211_hw_check(hw, USES_RSS)) { link_info->pcpu_rx_stats = alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); if (!link_info->pcpu_rx_stats) return -ENOMEM; } link_info->rx_stats.last_rx = jiffies; u64_stats_init(&link_info->rx_stats.syncp); ewma_signal_init(&link_info->rx_stats_avg.signal); ewma_avg_signal_init(&link_info->status_stats.avg_ack_signal); for (i = 0; i < ARRAY_SIZE(link_info->rx_stats_avg.chain_signal); i++) ewma_signal_init(&link_info->rx_stats_avg.chain_signal[i]); link_info->rx_omi_bw_rx = IEEE80211_STA_RX_BW_MAX; link_info->rx_omi_bw_tx = IEEE80211_STA_RX_BW_MAX; link_info->rx_omi_bw_staging = IEEE80211_STA_RX_BW_MAX; /* * Cause (a) warning(s) if IEEE80211_STA_RX_BW_MAX != 320 * or if new values are added to the enum. */ switch (link_info->cur_max_bandwidth) { case IEEE80211_STA_RX_BW_20: case IEEE80211_STA_RX_BW_40: case IEEE80211_STA_RX_BW_80: case IEEE80211_STA_RX_BW_160: case IEEE80211_STA_RX_BW_MAX: /* intentionally nothing */ break; } return 0; } static void sta_info_add_link(struct sta_info *sta, unsigned int link_id, struct link_sta_info *link_info, struct ieee80211_link_sta *link_sta) { link_info->sta = sta; link_info->link_id = link_id; link_info->pub = link_sta; link_info->pub->sta = &sta->sta; link_sta->link_id = link_id; rcu_assign_pointer(sta->link[link_id], link_info); rcu_assign_pointer(sta->sta.link[link_id], link_sta); link_sta->smps_mode = IEEE80211_SMPS_OFF; link_sta->agg.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; } static struct sta_info * __sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, int link_id, const u8 *link_addr, gfp_t gfp) { struct ieee80211_local *local = sdata->local; struct ieee80211_hw *hw = &local->hw; struct sta_info *sta; void *txq_data; int size; int i; sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); if (!sta) return NULL; sta->local = local; sta->sdata = sdata; if (sta_info_alloc_link(local, &sta->deflink, gfp)) goto free; if (link_id >= 0) { sta_info_add_link(sta, link_id, &sta->deflink, &sta->sta.deflink); sta->sta.valid_links = BIT(link_id); } else { sta_info_add_link(sta, 0, &sta->deflink, &sta->sta.deflink); } sta->sta.cur = &sta->sta.deflink.agg; spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); wiphy_work_init(&sta->ampdu_mlme.work, ieee80211_ba_session_work); #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) { sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); if (!sta->mesh) goto free; sta->mesh->plink_sta = sta; spin_lock_init(&sta->mesh->plink_lock); if (!sdata->u.mesh.user_mpm) timer_setup(&sta->mesh->plink_timer, mesh_plink_timer, 0); sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; } #endif memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); memcpy(sta->deflink.addr, link_addr, ETH_ALEN); memcpy(sta->sta.deflink.addr, link_addr, ETH_ALEN); sta->sta.max_rx_aggregation_subframes = local->hw.max_rx_aggregation_subframes; /* TODO link specific alloc and assignments for MLO Link STA */ /* Extended Key ID needs to install keys for keyid 0 and 1 Rx-only. * The Tx path starts to use a key as soon as the key slot ptk_idx * references to is not NULL. To not use the initial Rx-only key * prematurely for Tx initialize ptk_idx to an impossible PTK keyid * which always will refer to a NULL key. */ BUILD_BUG_ON(ARRAY_SIZE(sta->ptk) <= INVALID_PTK_KEYIDX); sta->ptk_idx = INVALID_PTK_KEYIDX; ieee80211_init_frag_cache(&sta->frags); sta->sta_state = IEEE80211_STA_NONE; if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) sta->amsdu_mesh_control = -1; /* Mark TID as unreserved */ sta->reserved_tid = IEEE80211_TID_UNRESERVED; sta->last_connected = ktime_get_seconds(); size = sizeof(struct txq_info) + ALIGN(hw->txq_data_size, sizeof(void *)); txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp); if (!txq_data) goto free; for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { struct txq_info *txq = txq_data + i * size; /* might not do anything for the (bufferable) MMPDU TXQ */ ieee80211_txq_init(sdata, sta, txq, i); } if (sta_prepare_rate_control(local, sta, gfp)) goto free_txq; sta->airtime_weight = IEEE80211_DEFAULT_AIRTIME_WEIGHT; for (i = 0; i < IEEE80211_NUM_ACS; i++) { skb_queue_head_init(&sta->ps_tx_buf[i]); skb_queue_head_init(&sta->tx_filtered[i]); sta->airtime[i].deficit = sta->airtime_weight; atomic_set(&sta->airtime[i].aql_tx_pending, 0); sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i]; sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i]; } for (i = 0; i < IEEE80211_NUM_TIDS; i++) sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); for (i = 0; i < NUM_NL80211_BANDS; i++) { u32 mandatory = 0; int r; if (!hw->wiphy->bands[i]) continue; switch (i) { case NL80211_BAND_2GHZ: case NL80211_BAND_LC: /* * We use both here, even if we cannot really know for * sure the station will support both, but the only use * for this is when we don't know anything yet and send * management frames, and then we'll pick the lowest * possible rate anyway. * If we don't include _G here, we cannot find a rate * in P2P, and thus trigger the WARN_ONCE() in rate.c */ mandatory = IEEE80211_RATE_MANDATORY_B | IEEE80211_RATE_MANDATORY_G; break; case NL80211_BAND_5GHZ: mandatory = IEEE80211_RATE_MANDATORY_A; break; case NL80211_BAND_60GHZ: WARN_ON(1); mandatory = 0; break; } for (r = 0; r < hw->wiphy->bands[i]->n_bitrates; r++) { struct ieee80211_rate *rate; rate = &hw->wiphy->bands[i]->bitrates[r]; if (!(rate->flags & mandatory)) continue; sta->sta.deflink.supp_rates[i] |= BIT(r); } } sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; sta->cparams.ce_threshold_selector = 0; sta->cparams.ce_threshold_mask = 0; sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; free_txq: kfree(to_txq_info(sta->sta.txq[0])); free: sta_info_free_link(&sta->deflink); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif kfree(sta); return NULL; } struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, gfp_t gfp) { return __sta_info_alloc(sdata, addr, -1, addr, gfp); } struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata, const u8 *mld_addr, unsigned int link_id, const u8 *link_addr, gfp_t gfp) { return __sta_info_alloc(sdata, mld_addr, link_id, link_addr, gfp); } static int sta_info_insert_check(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; lockdep_assert_wiphy(sdata->local->hw.wiphy); /* * Can't be a WARN_ON because it can be triggered through a race: * something inserts a STA (on one CPU) without holding the RTNL * and another CPU turns off the net device. */ if (unlikely(!ieee80211_sdata_running(sdata))) return -ENETDOWN; if (WARN_ON(ether_addr_equal(sta->sta.addr, sdata->vif.addr) || !is_valid_ether_addr(sta->sta.addr))) return -EINVAL; /* The RCU read lock is required by rhashtable due to * asynchronous resize/rehash. We also require the mutex * for correctness. */ rcu_read_lock(); if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { rcu_read_unlock(); return -ENOTUNIQ; } rcu_read_unlock(); return 0; } static int sta_info_insert_drv_state(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) { enum ieee80211_sta_state state; int err = 0; for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; state++) { err = drv_sta_state(local, sdata, sta, state, state + 1); if (err) break; } if (!err) { /* * Drivers using legacy sta_add/sta_remove callbacks only * get uploaded set to true after sta_add is called. */ if (!local->ops->sta_add) sta->uploaded = true; return 0; } if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { sdata_info(sdata, "failed to move IBSS STA %pM to state %d (%d) - keeping it anyway\n", sta->sta.addr, state + 1, err); err = 0; } /* unwind on error */ for (; state > IEEE80211_STA_NOTEXIST; state--) WARN_ON(drv_sta_state(local, sdata, sta, state, state - 1)); return err; } static void ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) { struct ieee80211_local *local = sdata->local; bool allow_p2p_go_ps = sdata->vif.p2p; struct sta_info *sta; rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sdata != sta->sdata || !test_sta_flag(sta, WLAN_STA_ASSOC)) continue; if (!sta->sta.support_p2p_ps) { allow_p2p_go_ps = false; break; } } rcu_read_unlock(); if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_P2P_PS); } } static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; struct station_info *sinfo = NULL; int err = 0; lockdep_assert_wiphy(local->hw.wiphy); /* check if STA exists already */ if (sta_info_get_bss(sdata, sta->sta.addr)) { err = -EEXIST; goto out_cleanup; } sinfo = kzalloc(sizeof(struct station_info), GFP_KERNEL); if (!sinfo) { err = -ENOMEM; goto out_cleanup; } local->num_sta++; local->sta_generation++; smp_mb(); /* simplify things and don't accept BA sessions yet */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); /* make the station visible */ err = sta_info_hash_add(local, sta); if (err) goto out_drop_sta; if (sta->sta.valid_links) { err = link_sta_info_hash_add(local, &sta->deflink); if (err) { sta_info_hash_del(local, sta); goto out_drop_sta; } } list_add_tail_rcu(&sta->list, &local->sta_list); /* update channel context before notifying the driver about state * change, this enables driver using the updated channel context right away. */ if (sta->sta_state >= IEEE80211_STA_ASSOC) { ieee80211_recalc_min_chandef(sta->sdata, -1); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } /* notify driver */ err = sta_info_insert_drv_state(local, sdata, sta); if (err) goto out_remove; set_sta_flag(sta, WLAN_STA_INSERTED); /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_debugfs_add(sta); rate_control_add_sta_debugfs(sta); if (sta->sta.valid_links) { int i; for (i = 0; i < ARRAY_SIZE(sta->link); i++) { struct link_sta_info *link_sta; link_sta = rcu_dereference_protected(sta->link[i], lockdep_is_held(&local->hw.wiphy->mtx)); if (!link_sta) continue; ieee80211_link_sta_debugfs_add(link_sta); if (sdata->vif.active_links & BIT(i)) ieee80211_link_sta_debugfs_drv_add(link_sta); } } else { ieee80211_link_sta_debugfs_add(&sta->deflink); ieee80211_link_sta_debugfs_drv_add(&sta->deflink); } sinfo->generation = local->sta_generation; cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); /* move reference to rcu-protected */ rcu_read_lock(); if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_accept_plinks_update(sdata); ieee80211_check_fast_xmit(sta); return 0; out_remove: if (sta->sta.valid_links) link_sta_info_hash_del(local, &sta->deflink); sta_info_hash_del(local, sta); list_del_rcu(&sta->list); out_drop_sta: local->num_sta--; synchronize_net(); out_cleanup: cleanup_single_sta(sta); kfree(sinfo); rcu_read_lock(); return err; } int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU) { struct ieee80211_local *local = sta->local; int err; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); err = sta_info_insert_check(sta); if (err) { sta_info_free(local, sta); rcu_read_lock(); return err; } return sta_info_insert_finish(sta); } int sta_info_insert(struct sta_info *sta) { int err = sta_info_insert_rcu(sta); rcu_read_unlock(); return err; } static inline void __bss_tim_set(u8 *tim, u16 id) { /* * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __set_bit() format. */ tim[id / 8] |= (1 << (id % 8)); } static inline void __bss_tim_clear(u8 *tim, u16 id) { /* * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the __clear_bit() format. */ tim[id / 8] &= ~(1 << (id % 8)); } static inline bool __bss_tim_get(u8 *tim, u16 id) { /* * This format has been mandated by the IEEE specifications, * so this line may not be changed to use the test_bit() format. */ return tim[id / 8] & (1 << (id % 8)); } static unsigned long ieee80211_tids_for_ac(int ac) { /* If we ever support TIDs > 7, this obviously needs to be adjusted */ switch (ac) { case IEEE80211_AC_VO: return BIT(6) | BIT(7); case IEEE80211_AC_VI: return BIT(4) | BIT(5); case IEEE80211_AC_BE: return BIT(0) | BIT(3); case IEEE80211_AC_BK: return BIT(1) | BIT(2); default: WARN_ON(1); return 0; } } static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) { struct ieee80211_local *local = sta->local; struct ps_data *ps; bool indicate_tim = false; u8 ignore_for_tim = sta->sta.uapsd_queues; int ac; u16 id = sta->sta.aid; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { if (WARN_ON_ONCE(!sta->sdata->bss)) return; ps = &sta->sdata->bss->ps; #ifdef CONFIG_MAC80211_MESH } else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) { ps = &sta->sdata->u.mesh.ps; #endif } else { return; } /* No need to do anything if the driver does all */ if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim) return; if (sta->dead) goto done; /* * If all ACs are delivery-enabled then we should build * the TIM bit for all ACs anyway; if only some are then * we ignore those and build the TIM bit using only the * non-enabled ones. */ if (ignore_for_tim == BIT(IEEE80211_NUM_ACS) - 1) ignore_for_tim = 0; if (ignore_pending) ignore_for_tim = BIT(IEEE80211_NUM_ACS) - 1; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; if (ignore_for_tim & ieee80211_ac_to_qos_mask[ac]) continue; indicate_tim |= !skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac]); if (indicate_tim) break; tids = ieee80211_tids_for_ac(ac); indicate_tim |= sta->driver_buffered_tids & tids; indicate_tim |= sta->txq_buffered_tids & tids; } done: spin_lock_bh(&local->tim_lock); if (indicate_tim == __bss_tim_get(ps->tim, id)) goto out_unlock; if (indicate_tim) __bss_tim_set(ps->tim, id); else __bss_tim_clear(ps->tim, id); if (local->ops->set_tim && !WARN_ON(sta->dead)) { local->tim_in_locked_section = true; drv_set_tim(local, &sta->sta, indicate_tim); local->tim_in_locked_section = false; } out_unlock: spin_unlock_bh(&local->tim_lock); } void sta_info_recalc_tim(struct sta_info *sta) { __sta_info_recalc_tim(sta, false); } static bool sta_info_buffer_expired(struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_tx_info *info; int timeout; if (!skb) return false; info = IEEE80211_SKB_CB(skb); /* Timeout: (2 * listen_interval * beacon_int * 1024 / 1000000) sec */ timeout = (sta->listen_interval * sta->sdata->vif.bss_conf.beacon_int * 32 / 15625) * HZ; if (timeout < STA_TX_BUFFER_EXPIRE) timeout = STA_TX_BUFFER_EXPIRE; return time_after(jiffies, info->control.jiffies + timeout); } static bool sta_info_cleanup_expire_buffered_ac(struct ieee80211_local *local, struct sta_info *sta, int ac) { unsigned long flags; struct sk_buff *skb; /* * First check for frames that should expire on the filtered * queue. Frames here were rejected by the driver and are on * a separate queue to avoid reordering with normal PS-buffered * frames. They also aren't accounted for right now in the * total_ps_buffered counter. */ for (;;) { spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); skb = skb_peek(&sta->tx_filtered[ac]); if (sta_info_buffer_expired(sta, skb)) skb = __skb_dequeue(&sta->tx_filtered[ac]); else skb = NULL; spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); /* * Frames are queued in order, so if this one * hasn't expired yet we can stop testing. If * we actually reached the end of the queue we * also need to stop, of course. */ if (!skb) break; ieee80211_free_txskb(&local->hw, skb); } /* * Now also check the normal PS-buffered queue, this will * only find something if the filtered queue was emptied * since the filtered frames are all before the normal PS * buffered frames. */ for (;;) { spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); skb = skb_peek(&sta->ps_tx_buf[ac]); if (sta_info_buffer_expired(sta, skb)) skb = __skb_dequeue(&sta->ps_tx_buf[ac]); else skb = NULL; spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); /* * frames are queued in order, so if this one * hasn't expired yet (or we reached the end of * the queue) we can stop testing */ if (!skb) break; local->total_ps_buffered--; ps_dbg(sta->sdata, "Buffered frame expired (STA %pM)\n", sta->sta.addr); ieee80211_free_txskb(&local->hw, skb); } /* * Finally, recalculate the TIM bit for this station -- it might * now be clear because the station was too slow to retrieve its * frames. */ sta_info_recalc_tim(sta); /* * Return whether there are any frames still buffered, this is * used to check whether the cleanup timer still needs to run, * if there are no frames we don't need to rearm the timer. */ return !(skb_queue_empty(&sta->ps_tx_buf[ac]) && skb_queue_empty(&sta->tx_filtered[ac])); } static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local, struct sta_info *sta) { bool have_buffered = false; int ac; /* This is only necessary for stations on BSS/MBSS interfaces */ if (!sta->sdata->bss && !ieee80211_vif_is_mesh(&sta->sdata->vif)) return false; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) have_buffered |= sta_info_cleanup_expire_buffered_ac(local, sta, ac); return have_buffered; } static int __must_check __sta_info_destroy_part1(struct sta_info *sta) { struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; int ret, i; might_sleep(); if (!sta) return -ENOENT; local = sta->local; sdata = sta->sdata; lockdep_assert_wiphy(local->hw.wiphy); /* * Before removing the station from the driver and * rate control, it might still start new aggregation * sessions -- block that to make sure the tear-down * will be sufficient. */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); /* * Before removing the station from the driver there might be pending * rx frames on RSS queues sent prior to the disassociation - wait for * all such frames to be processed. */ drv_sync_rx_queues(local, sta); for (i = 0; i < ARRAY_SIZE(sta->link); i++) { struct link_sta_info *link_sta; if (!(sta->sta.valid_links & BIT(i))) continue; link_sta = rcu_dereference_protected(sta->link[i], lockdep_is_held(&local->hw.wiphy->mtx)); link_sta_info_hash_del(local, link_sta); } ret = sta_info_hash_del(local, sta); if (WARN_ON(ret)) return ret; /* * for TDLS peers, make sure to return to the base channel before * removal. */ if (test_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL)) { drv_tdls_cancel_channel_switch(local, sdata, &sta->sta); clear_sta_flag(sta, WLAN_STA_TDLS_OFF_CHANNEL); } list_del_rcu(&sta->list); sta->removed = true; if (sta->uploaded) drv_sta_pre_rcu_remove(local, sta->sdata, sta); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && rcu_access_pointer(sdata->u.vlan.sta) == sta) RCU_INIT_POINTER(sdata->u.vlan.sta, NULL); return 0; } static int _sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state, bool recalc) { struct ieee80211_local *local = sta->local; might_sleep(); if (sta->sta_state == new_state) return 0; /* check allowed transitions first */ switch (new_state) { case IEEE80211_STA_NONE: if (sta->sta_state != IEEE80211_STA_AUTH) return -EINVAL; break; case IEEE80211_STA_AUTH: if (sta->sta_state != IEEE80211_STA_NONE && sta->sta_state != IEEE80211_STA_ASSOC) return -EINVAL; break; case IEEE80211_STA_ASSOC: if (sta->sta_state != IEEE80211_STA_AUTH && sta->sta_state != IEEE80211_STA_AUTHORIZED) return -EINVAL; break; case IEEE80211_STA_AUTHORIZED: if (sta->sta_state != IEEE80211_STA_ASSOC) return -EINVAL; break; default: WARN(1, "invalid state %d", new_state); return -EINVAL; } sta_dbg(sta->sdata, "moving STA %pM to state %d\n", sta->sta.addr, new_state); /* notify the driver before the actual changes so it can * fail the transition if the state is increasing. * The driver is required not to fail when the transition * is decreasing the state, so first, do all the preparation * work and only then, notify the driver. */ if (new_state > sta->sta_state && test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); if (err) return err; } /* reflect the change in all state variables */ switch (new_state) { case IEEE80211_STA_NONE: if (sta->sta_state == IEEE80211_STA_AUTH) clear_bit(WLAN_STA_AUTH, &sta->_flags); break; case IEEE80211_STA_AUTH: if (sta->sta_state == IEEE80211_STA_NONE) { set_bit(WLAN_STA_AUTH, &sta->_flags); } else if (sta->sta_state == IEEE80211_STA_ASSOC) { clear_bit(WLAN_STA_ASSOC, &sta->_flags); if (recalc) { ieee80211_recalc_min_chandef(sta->sdata, -1); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } } break; case IEEE80211_STA_ASSOC: if (sta->sta_state == IEEE80211_STA_AUTH) { set_bit(WLAN_STA_ASSOC, &sta->_flags); sta->assoc_at = ktime_get_boottime_ns(); if (recalc) { ieee80211_recalc_min_chandef(sta->sdata, -1); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); } } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); /* * If we have encryption offload, flush (station) queues * (after ensuring concurrent TX completed) so we won't * transmit anything later unencrypted if/when keys are * also removed, which might otherwise happen depending * on how the hardware offload works. */ if (local->ops->set_key) { synchronize_net(); if (local->ops->flush_sta) drv_flush_sta(local, sta->sdata, sta); else ieee80211_flush_queues(local, sta->sdata, false); } ieee80211_clear_fast_xmit(sta); ieee80211_clear_fast_rx(sta); } break; case IEEE80211_STA_AUTHORIZED: if (sta->sta_state == IEEE80211_STA_ASSOC) { ieee80211_vif_inc_num_mcast(sta->sdata); set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sta->sdata->vif.type == NL80211_IFTYPE_AP) cfg80211_send_layer2_update(sta->sdata->dev, sta->sta.addr); break; default: break; } if (new_state < sta->sta_state && test_sta_flag(sta, WLAN_STA_INSERTED)) { int err = drv_sta_state(sta->local, sta->sdata, sta, sta->sta_state, new_state); WARN_ONCE(err, "Driver is not allowed to fail if the sta_state is transitioning down the list: %d\n", err); } sta->sta_state = new_state; return 0; } int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { return _sta_info_move_state(sta, new_state, true); } static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) { struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; struct station_info *sinfo; int ret; /* * NOTE: This assumes at least synchronize_net() was done * after _part1 and before _part2! */ /* * There's a potential race in _part1 where we set WLAN_STA_BLOCK_BA * but someone might have just gotten past a check, and not yet into * queuing the work/creating the data/etc. * * Do another round of destruction so that the worker is certainly * canceled before we later free the station. * * Since this is after synchronize_rcu()/synchronize_net() we're now * certain that nobody can actually hold a reference to the STA and * be calling e.g. ieee80211_start_tx_ba_session(). */ ieee80211_sta_tear_down_BA_sessions(sta, AGG_STOP_DESTROY_STA); might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc); WARN_ON_ONCE(ret); } /* now keys can no longer be reached */ ieee80211_free_sta_keys(local, sta); /* disable TIM bit - last chance to tell driver */ __sta_info_recalc_tim(sta, true); sta->dead = true; local->num_sta--; local->sta_generation++; while (sta->sta_state > IEEE80211_STA_NONE) { ret = _sta_info_move_state(sta, sta->sta_state - 1, recalc); if (ret) { WARN_ON_ONCE(1); break; } } if (sta->uploaded) { ret = drv_sta_state(local, sdata, sta, IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); WARN_ON_ONCE(ret != 0); } sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); sinfo = kzalloc(sizeof(*sinfo), GFP_KERNEL); if (sinfo) sta_set_sinfo(sta, sinfo, true); cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); ieee80211_sta_debugfs_remove(sta); ieee80211_destroy_frag_cache(&sta->frags); cleanup_single_sta(sta); } int __must_check __sta_info_destroy(struct sta_info *sta) { int err = __sta_info_destroy_part1(sta); if (err) return err; synchronize_net(); __sta_info_destroy_part2(sta, true); return 0; } int sta_info_destroy_addr(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get(sdata, addr); return __sta_info_destroy(sta); } int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct sta_info *sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta = sta_info_get_bss(sdata, addr); return __sta_info_destroy(sta); } static void sta_info_cleanup(struct timer_list *t) { struct ieee80211_local *local = from_timer(local, t, sta_cleanup); struct sta_info *sta; bool timer_needed = false; rcu_read_lock(); list_for_each_entry_rcu(sta, &local->sta_list, list) if (sta_info_cleanup_expire_buffered(local, sta)) timer_needed = true; rcu_read_unlock(); if (local->quiescing) return; if (!timer_needed) return; mod_timer(&local->sta_cleanup, round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } int sta_info_init(struct ieee80211_local *local) { int err; err = rhltable_init(&local->sta_hash, &sta_rht_params); if (err) return err; err = rhltable_init(&local->link_sta_hash, &link_sta_rht_params); if (err) { rhltable_destroy(&local->sta_hash); return err; } spin_lock_init(&local->tim_lock); INIT_LIST_HEAD(&local->sta_list); timer_setup(&local->sta_cleanup, sta_info_cleanup, 0); return 0; } void sta_info_stop(struct ieee80211_local *local) { timer_delete_sync(&local->sta_cleanup); rhltable_destroy(&local->sta_hash); rhltable_destroy(&local->link_sta_hash); } int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans, int link_id, struct sta_info *do_not_flush_sta) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; LIST_HEAD(free_list); int ret = 0; might_sleep(); lockdep_assert_wiphy(local->hw.wiphy); WARN_ON(vlans && sdata->vif.type != NL80211_IFTYPE_AP); WARN_ON(vlans && !sdata->bss); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { if (sdata != sta->sdata && (!vlans || sdata->bss != sta->sdata->bss)) continue; if (sta == do_not_flush_sta) continue; if (link_id >= 0 && sta->sta.valid_links && !(sta->sta.valid_links & BIT(link_id))) continue; if (!WARN_ON(__sta_info_destroy_part1(sta))) list_add(&sta->free_list, &free_list); ret++; } if (!list_empty(&free_list)) { bool support_p2p_ps = true; synchronize_net(); list_for_each_entry_safe(sta, tmp, &free_list, free_list) { if (!sta->sta.support_p2p_ps) support_p2p_ps = false; __sta_info_destroy_part2(sta, false); } ieee80211_recalc_min_chandef(sdata, -1); if (!support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sdata); } return ret; } void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time) { struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; lockdep_assert_wiphy(local->hw.wiphy); list_for_each_entry_safe(sta, tmp, &local->sta_list, list) { unsigned long last_active = ieee80211_sta_last_active(sta); if (sdata != sta->sdata) continue; if (time_is_before_jiffies(last_active + exp_time)) { sta_dbg(sta->sdata, "expiring inactive STA %pM\n", sta->sta.addr); if (ieee80211_vif_is_mesh(&sdata->vif) && test_sta_flag(sta, WLAN_STA_PS_STA)) atomic_dec(&sdata->u.mesh.ps.num_sta_ps); WARN_ON(__sta_info_destroy(sta)); } } } struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *addr, const u8 *localaddr) { struct ieee80211_local *local = hw_to_local(hw); struct rhlist_head *tmp; struct sta_info *sta; /* * Just return a random station if localaddr is NULL * ... first in list. */ for_each_sta_info(local, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; if (!sta->uploaded) return NULL; return &sta->sta; } return NULL; } EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr); struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr) { struct sta_info *sta; if (!vif) return NULL; sta = sta_info_get_bss(vif_to_sdata(vif), addr); if (!sta) return NULL; if (!sta->uploaded) return NULL; return &sta->sta; } EXPORT_SYMBOL(ieee80211_find_sta); /* powersave support code */ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff_head pending; int filtered = 0, buffered = 0, ac, i; unsigned long flags; struct ps_data *ps; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; else if (ieee80211_vif_is_mesh(&sdata->vif)) ps = &sdata->u.mesh.ps; else return; clear_sta_flag(sta, WLAN_STA_SP); BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); sta->driver_buffered_tids = 0; sta->txq_buffered_tids = 0; if (!ieee80211_hw_check(&local->hw, AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { if (!sta->sta.txq[i] || !txq_has_queue(sta->sta.txq[i])) continue; schedule_and_wake_txq(local, to_txq_info(sta->sta.txq[i])); } skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ spin_lock_bh(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; spin_lock_irqsave(&sta->tx_filtered[ac].lock, flags); skb_queue_splice_tail_init(&sta->tx_filtered[ac], &pending); spin_unlock_irqrestore(&sta->tx_filtered[ac].lock, flags); tmp = skb_queue_len(&pending); filtered += tmp - count; count = tmp; spin_lock_irqsave(&sta->ps_tx_buf[ac].lock, flags); skb_queue_splice_tail_init(&sta->ps_tx_buf[ac], &pending); spin_unlock_irqrestore(&sta->ps_tx_buf[ac].lock, flags); tmp = skb_queue_len(&pending); buffered += tmp - count; } ieee80211_add_pending_skbs(local, &pending); /* now we're no longer in the deliver code */ clear_sta_flag(sta, WLAN_STA_PS_DELIVER); /* The station might have polled and then woken up before we responded, * so clear these flags now to avoid them sticking around. */ clear_sta_flag(sta, WLAN_STA_PSPOLL); clear_sta_flag(sta, WLAN_STA_UAPSD); spin_unlock_bh(&sta->ps_lock); atomic_dec(&ps->num_sta_ps); local->total_ps_buffered -= buffered; sta_info_recalc_tim(sta); ps_dbg(sdata, "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n", sta->sta.addr, sta->sta.aid, filtered, buffered); ieee80211_check_fast_xmit(sta); } static void ieee80211_send_null_response(struct sta_info *sta, int tid, enum ieee80211_frame_release_type reason, bool call_driver, bool more_data) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ieee80211_qos_hdr *nullfunc; struct sk_buff *skb; int size = sizeof(*nullfunc); __le16 fc; bool qos = sta->sta.wme; struct ieee80211_tx_info *info; struct ieee80211_chanctx_conf *chanctx_conf; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC | IEEE80211_FCTL_FROMDS); } else { size -= 2; fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS); } skb = dev_alloc_skb(local->hw.extra_tx_headroom + size); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); nullfunc = skb_put(skb, size); nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); nullfunc->seq_ctrl = 0; skb->priority = tid; skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]); if (qos) { nullfunc->qos_ctrl = cpu_to_le16(tid); if (reason == IEEE80211_FRAME_RELEASE_UAPSD) { nullfunc->qos_ctrl |= cpu_to_le16(IEEE80211_QOS_CTL_EOSP); if (more_data) nullfunc->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); } } info = IEEE80211_SKB_CB(skb); /* * Tell TX path to send this frame even though the * STA may still remain is PS mode after this frame * exchange. Also set EOSP to indicate this packet * ends the poll/service period. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER | IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; if (call_driver) drv_allow_buffered_frames(local, sta, BIT(tid), 1, reason, false); skb->dev = sdata->dev; rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); kfree_skb(skb); return; } info->band = chanctx_conf->def.chan->band; ieee80211_xmit(sdata, sta, skb); rcu_read_unlock(); } static int find_highest_prio_tid(unsigned long tids) { /* lower 3 TIDs aren't ordered perfectly */ if (tids & 0xF8) return fls(tids) - 1; /* TID 0 is BE just like TID 3 */ if (tids & BIT(0)) return 0; return fls(tids) - 1; } /* Indicates if the MORE_DATA bit should be set in the last * frame obtained by ieee80211_sta_ps_get_frames. * Note that driver_release_tids is relevant only if * reason = IEEE80211_FRAME_RELEASE_PSPOLL */ static bool ieee80211_sta_ps_more_data(struct sta_info *sta, u8 ignored_acs, enum ieee80211_frame_release_type reason, unsigned long driver_release_tids) { int ac; /* If the driver has data on more than one TID then * certainly there's more data if we release just a * single frame now (from a single TID). This will * only happen for PS-Poll. */ if (reason == IEEE80211_FRAME_RELEASE_PSPOLL && hweight16(driver_release_tids) > 1) return true; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; if (!skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac])) return true; } return false; } static void ieee80211_sta_ps_get_frames(struct sta_info *sta, int n_frames, u8 ignored_acs, enum ieee80211_frame_release_type reason, struct sk_buff_head *frames, unsigned long *driver_release_tids) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; int ac; /* Get response frame(s) and more data bit for the last one. */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { unsigned long tids; if (ignored_acs & ieee80211_ac_to_qos_mask[ac]) continue; tids = ieee80211_tids_for_ac(ac); /* if we already have frames from software, then we can't also * release from hardware queues */ if (skb_queue_empty(frames)) { *driver_release_tids |= sta->driver_buffered_tids & tids; *driver_release_tids |= sta->txq_buffered_tids & tids; } if (!*driver_release_tids) { struct sk_buff *skb; while (n_frames > 0) { skb = skb_dequeue(&sta->tx_filtered[ac]); if (!skb) { skb = skb_dequeue( &sta->ps_tx_buf[ac]); if (skb) local->total_ps_buffered--; } if (!skb) break; n_frames--; __skb_queue_tail(frames, skb); } } /* If we have more frames buffered on this AC, then abort the * loop since we can't send more data from other ACs before * the buffered frames from this. */ if (!skb_queue_empty(&sta->tx_filtered[ac]) || !skb_queue_empty(&sta->ps_tx_buf[ac])) break; } } static void ieee80211_sta_ps_deliver_response(struct sta_info *sta, int n_frames, u8 ignored_acs, enum ieee80211_frame_release_type reason) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; unsigned long driver_release_tids = 0; struct sk_buff_head frames; bool more_data; /* Service or PS-Poll period starts */ set_sta_flag(sta, WLAN_STA_SP); __skb_queue_head_init(&frames); ieee80211_sta_ps_get_frames(sta, n_frames, ignored_acs, reason, &frames, &driver_release_tids); more_data = ieee80211_sta_ps_more_data(sta, ignored_acs, reason, driver_release_tids); if (driver_release_tids && reason == IEEE80211_FRAME_RELEASE_PSPOLL) driver_release_tids = BIT(find_highest_prio_tid(driver_release_tids)); if (skb_queue_empty(&frames) && !driver_release_tids) { int tid, ac; /* * For PS-Poll, this can only happen due to a race condition * when we set the TIM bit and the station notices it, but * before it can poll for the frame we expire it. * * For uAPSD, this is said in the standard (11.2.1.5 h): * At each unscheduled SP for a non-AP STA, the AP shall * attempt to transmit at least one MSDU or MMPDU, but no * more than the value specified in the Max SP Length field * in the QoS Capability element from delivery-enabled ACs, * that are destined for the non-AP STA. * * Since we have no other MSDU/MMPDU, transmit a QoS null frame. */ /* This will evaluate to 1, 3, 5 or 7. */ for (ac = IEEE80211_AC_VO; ac < IEEE80211_NUM_ACS; ac++) if (!(ignored_acs & ieee80211_ac_to_qos_mask[ac])) break; tid = 7 - 2 * ac; ieee80211_send_null_response(sta, tid, reason, true, false); } else if (!driver_release_tids) { struct sk_buff_head pending; struct sk_buff *skb; int num = 0; u16 tids = 0; bool need_null = false; skb_queue_head_init(&pending); while ((skb = __skb_dequeue(&frames))) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (void *) skb->data; u8 *qoshdr = NULL; num++; /* * Tell TX path to send this frame even though the * STA may still remain is PS mode after this frame * exchange. */ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; info->control.flags |= IEEE80211_TX_CTRL_PS_RESPONSE; /* * Use MoreData flag to indicate whether there are * more buffered frames for this STA */ if (more_data || !skb_queue_empty(&frames)) hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); else hdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREDATA); if (ieee80211_is_data_qos(hdr->frame_control) || ieee80211_is_qos_nullfunc(hdr->frame_control)) qoshdr = ieee80211_get_qos_ctl(hdr); tids |= BIT(skb->priority); __skb_queue_tail(&pending, skb); /* end service period after last frame or add one */ if (!skb_queue_empty(&frames)) continue; if (reason != IEEE80211_FRAME_RELEASE_UAPSD) { /* for PS-Poll, there's only one frame */ info->flags |= IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; break; } /* For uAPSD, things are a bit more complicated. If the * last frame has a QoS header (i.e. is a QoS-data or * QoS-nulldata frame) then just set the EOSP bit there * and be done. * If the frame doesn't have a QoS header (which means * it should be a bufferable MMPDU) then we can't set * the EOSP bit in the QoS header; add a QoS-nulldata * frame to the list to send it after the MMPDU. * * Note that this code is only in the mac80211-release * code path, we assume that the driver will not buffer * anything but QoS-data frames, or if it does, will * create the QoS-nulldata frame by itself if needed. * * Cf. 802.11-2012 10.2.1.10 (c). */ if (qoshdr) { *qoshdr |= IEEE80211_QOS_CTL_EOSP; info->flags |= IEEE80211_TX_STATUS_EOSP | IEEE80211_TX_CTL_REQ_TX_STATUS; } else { /* The standard isn't completely clear on this * as it says the more-data bit should be set * if there are more BUs. The QoS-Null frame * we're about to send isn't buffered yet, we * only create it below, but let's pretend it * was buffered just in case some clients only * expect more-data=0 when eosp=1. */ hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); need_null = true; num++; } break; } drv_allow_buffered_frames(local, sta, tids, num, reason, more_data); ieee80211_add_pending_skbs(local, &pending); if (need_null) ieee80211_send_null_response( sta, find_highest_prio_tid(tids), reason, false, false); sta_info_recalc_tim(sta); } else { int tid; /* * We need to release a frame that is buffered somewhere in the * driver ... it'll have to handle that. * Note that the driver also has to check the number of frames * on the TIDs we're releasing from - if there are more than * n_frames it has to set the more-data bit (if we didn't ask * it to set it anyway due to other buffered frames); if there * are fewer than n_frames it has to make sure to adjust that * to allow the service period to end properly. */ drv_release_buffered_frames(local, sta, driver_release_tids, n_frames, reason, more_data); /* * Note that we don't recalculate the TIM bit here as it would * most likely have no effect at all unless the driver told us * that the TID(s) became empty before returning here from the * release function. * Either way, however, when the driver tells us that the TID(s) * became empty or we find that a txq became empty, we'll do the * TIM recalculation. */ for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { if (!sta->sta.txq[tid] || !(driver_release_tids & BIT(tid)) || txq_has_queue(sta->sta.txq[tid])) continue; sta_info_recalc_tim(sta); break; } } } void ieee80211_sta_ps_deliver_poll_response(struct sta_info *sta) { u8 ignore_for_response = sta->sta.uapsd_queues; /* * If all ACs are delivery-enabled then we should reply * from any of them, if only some are enabled we reply * only from the non-enabled ones. */ if (ignore_for_response == BIT(IEEE80211_NUM_ACS) - 1) ignore_for_response = 0; ieee80211_sta_ps_deliver_response(sta, 1, ignore_for_response, IEEE80211_FRAME_RELEASE_PSPOLL); } void ieee80211_sta_ps_deliver_uapsd(struct sta_info *sta) { int n_frames = sta->sta.max_sp; u8 delivery_enabled = sta->sta.uapsd_queues; /* * If we ever grow support for TSPEC this might happen if * the TSPEC update from hostapd comes in between a trigger * frame setting WLAN_STA_UAPSD in the RX path and this * actually getting called. */ if (!delivery_enabled) return; switch (sta->sta.max_sp) { case 1: n_frames = 2; break; case 2: n_frames = 4; break; case 3: n_frames = 6; break; case 0: /* XXX: what is a good value? */ n_frames = 128; break; } ieee80211_sta_ps_deliver_response(sta, n_frames, ~delivery_enabled, IEEE80211_FRAME_RELEASE_UAPSD); } void ieee80211_sta_block_awake(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, bool block) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); trace_api_sta_block_awake(sta->local, pubsta, block); if (block) { set_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_clear_fast_xmit(sta); return; } if (!test_sta_flag(sta, WLAN_STA_PS_DRIVER)) return; if (!test_sta_flag(sta, WLAN_STA_PS_STA)) { set_sta_flag(sta, WLAN_STA_PS_DELIVER); clear_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else if (test_sta_flag(sta, WLAN_STA_PSPOLL) || test_sta_flag(sta, WLAN_STA_UAPSD)) { /* must be asleep in this case */ clear_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_queue_work(hw, &sta->drv_deliver_wk); } else { clear_sta_flag(sta, WLAN_STA_PS_DRIVER); ieee80211_check_fast_xmit(sta); } } EXPORT_SYMBOL(ieee80211_sta_block_awake); void ieee80211_sta_eosp(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->local; trace_api_eosp(local, pubsta); clear_sta_flag(sta, WLAN_STA_SP); } EXPORT_SYMBOL(ieee80211_sta_eosp); void ieee80211_send_eosp_nullfunc(struct ieee80211_sta *pubsta, int tid) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); enum ieee80211_frame_release_type reason; bool more_data; trace_api_send_eosp_nullfunc(sta->local, pubsta, tid); reason = IEEE80211_FRAME_RELEASE_UAPSD; more_data = ieee80211_sta_ps_more_data(sta, ~sta->sta.uapsd_queues, reason, 0); ieee80211_send_null_response(sta, tid, reason, false, more_data); } EXPORT_SYMBOL(ieee80211_send_eosp_nullfunc); void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, u8 tid, bool buffered) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); if (WARN_ON(tid >= IEEE80211_NUM_TIDS)) return; trace_api_sta_set_buffered(sta->local, pubsta, tid, buffered); if (buffered) set_bit(tid, &sta->driver_buffered_tids); else clear_bit(tid, &sta->driver_buffered_tids); sta_info_recalc_tim(sta); } EXPORT_SYMBOL(ieee80211_sta_set_buffered); void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, u32 tx_airtime, u32 rx_airtime) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); struct ieee80211_local *local = sta->sdata->local; u8 ac = ieee80211_ac_from_tid(tid); u32 airtime = 0; if (sta->local->airtime_flags & AIRTIME_USE_TX) airtime += tx_airtime; if (sta->local->airtime_flags & AIRTIME_USE_RX) airtime += rx_airtime; spin_lock_bh(&local->active_txq_lock[ac]); sta->airtime[ac].tx_airtime += tx_airtime; sta->airtime[ac].rx_airtime += rx_airtime; if (ieee80211_sta_keep_active(sta, ac)) sta->airtime[ac].deficit -= airtime; spin_unlock_bh(&local->active_txq_lock[ac]); } EXPORT_SYMBOL(ieee80211_sta_register_airtime); void __ieee80211_sta_recalc_aggregates(struct sta_info *sta, u16 active_links) { bool first = true; int link_id; if (!sta->sta.valid_links || !sta->sta.mlo) { sta->sta.cur = &sta->sta.deflink.agg; return; } rcu_read_lock(); for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) { struct ieee80211_link_sta *link_sta; int i; if (!(active_links & BIT(link_id))) continue; link_sta = rcu_dereference(sta->sta.link[link_id]); if (!link_sta) continue; if (first) { sta->cur = sta->sta.deflink.agg; first = false; continue; } sta->cur.max_amsdu_len = min(sta->cur.max_amsdu_len, link_sta->agg.max_amsdu_len); sta->cur.max_rc_amsdu_len = min(sta->cur.max_rc_amsdu_len, link_sta->agg.max_rc_amsdu_len); for (i = 0; i < ARRAY_SIZE(sta->cur.max_tid_amsdu_len); i++) sta->cur.max_tid_amsdu_len[i] = min(sta->cur.max_tid_amsdu_len[i], link_sta->agg.max_tid_amsdu_len[i]); } rcu_read_unlock(); sta->sta.cur = &sta->cur; } void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); __ieee80211_sta_recalc_aggregates(sta, sta->sdata->vif.active_links); } EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates); void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, u16 tx_airtime, bool tx_completed) { int tx_pending; if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) return; if (!tx_completed) { if (sta) atomic_add(tx_airtime, &sta->airtime[ac].aql_tx_pending); atomic_add(tx_airtime, &local->aql_total_pending_airtime); atomic_add(tx_airtime, &local->aql_ac_pending_airtime[ac]); return; } if (sta) { tx_pending = atomic_sub_return(tx_airtime, &sta->airtime[ac].aql_tx_pending); if (tx_pending < 0) atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, tx_pending, 0); } atomic_sub(tx_airtime, &local->aql_total_pending_airtime); tx_pending = atomic_sub_return(tx_airtime, &local->aql_ac_pending_airtime[ac]); if (WARN_ONCE(tx_pending < 0, "Device %s AC %d pending airtime underflow: %u, %u", wiphy_name(local->hw.wiphy), ac, tx_pending, tx_airtime)) { atomic_cmpxchg(&local->aql_ac_pending_airtime[ac], tx_pending, 0); atomic_sub(tx_pending, &local->aql_total_pending_airtime); } } static struct ieee80211_sta_rx_stats * sta_get_last_rx_stats(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = &sta->deflink.rx_stats; int cpu; if (!sta->deflink.pcpu_rx_stats) return stats; for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpustats; cpustats = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); if (time_after(cpustats->last_rx, stats->last_rx)) stats = cpustats; } return stats; } static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate, struct rate_info *rinfo) { rinfo->bw = STA_STATS_GET(BW, rate); switch (STA_STATS_GET(TYPE, rate)) { case STA_STATS_RATE_TYPE_VHT: rinfo->flags = RATE_INFO_FLAGS_VHT_MCS; rinfo->mcs = STA_STATS_GET(VHT_MCS, rate); rinfo->nss = STA_STATS_GET(VHT_NSS, rate); if (STA_STATS_GET(SGI, rate)) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; break; case STA_STATS_RATE_TYPE_HT: rinfo->flags = RATE_INFO_FLAGS_MCS; rinfo->mcs = STA_STATS_GET(HT_MCS, rate); if (STA_STATS_GET(SGI, rate)) rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; break; case STA_STATS_RATE_TYPE_LEGACY: { struct ieee80211_supported_band *sband; u16 brate; unsigned int shift; int band = STA_STATS_GET(LEGACY_BAND, rate); int rate_idx = STA_STATS_GET(LEGACY_IDX, rate); sband = local->hw.wiphy->bands[band]; if (WARN_ON_ONCE(!sband->bitrates)) break; brate = sband->bitrates[rate_idx].bitrate; if (rinfo->bw == RATE_INFO_BW_5) shift = 2; else if (rinfo->bw == RATE_INFO_BW_10) shift = 1; else shift = 0; rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift); break; } case STA_STATS_RATE_TYPE_HE: rinfo->flags = RATE_INFO_FLAGS_HE_MCS; rinfo->mcs = STA_STATS_GET(HE_MCS, rate); rinfo->nss = STA_STATS_GET(HE_NSS, rate); rinfo->he_gi = STA_STATS_GET(HE_GI, rate); rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate); rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate); break; case STA_STATS_RATE_TYPE_EHT: rinfo->flags = RATE_INFO_FLAGS_EHT_MCS; rinfo->mcs = STA_STATS_GET(EHT_MCS, rate); rinfo->nss = STA_STATS_GET(EHT_NSS, rate); rinfo->eht_gi = STA_STATS_GET(EHT_GI, rate); rinfo->eht_ru_alloc = STA_STATS_GET(EHT_RU, rate); break; } } static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) { u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; sta_stats_decode_rate(sta->local, rate, rinfo); return 0; } static inline u64 sta_get_tidstats_msdu(struct ieee80211_sta_rx_stats *rxstats, int tid) { unsigned int start; u64 value; do { start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->msdu[tid]; } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } static void sta_set_tidstats(struct sta_info *sta, struct cfg80211_tid_stats *tidstats, int tid) { struct ieee80211_local *local = sta->local; int cpu; if (!(tidstats->filled & BIT(NL80211_TID_STATS_RX_MSDU))) { tidstats->rx_msdu += sta_get_tidstats_msdu(&sta->deflink.rx_stats, tid); if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); tidstats->rx_msdu += sta_get_tidstats_msdu(cpurxs, tid); } } tidstats->filled |= BIT(NL80211_TID_STATS_RX_MSDU); } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU))) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU); tidstats->tx_msdu = sta->deflink.tx_stats.msdu[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_RETRIES)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_RETRIES); tidstats->tx_msdu_retries = sta->deflink.status_stats.msdu_retries[tid]; } if (!(tidstats->filled & BIT(NL80211_TID_STATS_TX_MSDU_FAILED)) && ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) { tidstats->filled |= BIT(NL80211_TID_STATS_TX_MSDU_FAILED); tidstats->tx_msdu_failed = sta->deflink.status_stats.msdu_failed[tid]; } if (tid < IEEE80211_NUM_TIDS) { spin_lock_bh(&local->fq.lock); rcu_read_lock(); tidstats->filled |= BIT(NL80211_TID_STATS_TXQ_STATS); ieee80211_fill_txq_stats(&tidstats->txq_stats, to_txq_info(sta->sta.txq[tid])); rcu_read_unlock(); spin_unlock_bh(&local->fq.lock); } } static inline u64 sta_get_stats_bytes(struct ieee80211_sta_rx_stats *rxstats) { unsigned int start; u64 value; do { start = u64_stats_fetch_begin(&rxstats->syncp); value = rxstats->bytes; } while (u64_stats_fetch_retry(&rxstats->syncp, start)); return value; } #ifdef CONFIG_MAC80211_MESH static void sta_set_mesh_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_local *local = sta->sdata->local; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_LLID) | BIT_ULL(NL80211_STA_INFO_PLID) | BIT_ULL(NL80211_STA_INFO_PLINK_STATE) | BIT_ULL(NL80211_STA_INFO_LOCAL_PM) | BIT_ULL(NL80211_STA_INFO_PEER_PM) | BIT_ULL(NL80211_STA_INFO_NONPEER_PM) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_GATE) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TO_AS); sinfo->llid = sta->mesh->llid; sinfo->plid = sta->mesh->plid; sinfo->plink_state = sta->mesh->plink_state; if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_T_OFFSET); sinfo->t_offset = sta->mesh->t_offset; } sinfo->local_pm = sta->mesh->local_pm; sinfo->peer_pm = sta->mesh->peer_pm; sinfo->nonpeer_pm = sta->mesh->nonpeer_pm; sinfo->connected_to_gate = sta->mesh->connected_to_gate; sinfo->connected_to_as = sta->mesh->connected_to_as; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_LINK_METRIC); sinfo->airtime_link_metric = airtime_link_metric_get(local, sta); } #endif void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, bool tidstats) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; u32 thr = 0; int i, ac, cpu; struct ieee80211_sta_rx_stats *last_rxstats; last_rxstats = sta_get_last_rx_stats(sta); sinfo->generation = sdata->local->sta_generation; /* do before driver, so beacon filtering drivers have a * chance to e.g. just add the number of filtered beacons * (or just modify the value entirely, of course) */ if (sdata->vif.type == NL80211_IFTYPE_STATION) sinfo->rx_beacon = sdata->deflink.u.mgd.count_beacon_signal; drv_sta_statistics(local, sdata, &sta->sta, sinfo); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_INACTIVE_TIME) | BIT_ULL(NL80211_STA_INFO_STA_FLAGS) | BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) | BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) | BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); if (sdata->vif.type == NL80211_IFTYPE_STATION) { sinfo->beacon_loss_count = sdata->deflink.u.mgd.beacon_loss_count; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_LOSS); } sinfo->connected_time = ktime_get_seconds() - sta->last_connected; sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_TX_BYTES64) | BIT_ULL(NL80211_STA_INFO_TX_BYTES)))) { sinfo->tx_bytes = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_bytes += sta->deflink.tx_stats.bytes[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BYTES64); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_PACKETS))) { sinfo->tx_packets = 0; for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_packets += sta->deflink.tx_stats.packets[ac]; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_PACKETS); } if (!(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES64) | BIT_ULL(NL80211_STA_INFO_RX_BYTES)))) { sinfo->rx_bytes += sta_get_stats_bytes(&sta->deflink.rx_stats); if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_bytes += sta_get_stats_bytes(cpurxs); } } sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BYTES64); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_PACKETS))) { sinfo->rx_packets = sta->deflink.rx_stats.packets; if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_packets += cpurxs->packets; } } sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_PACKETS); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_RETRIES))) { sinfo->tx_retries = sta->deflink.status_stats.retry_count; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_FAILED))) { sinfo->tx_failed = sta->deflink.status_stats.retry_failed; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_DURATION))) { for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->rx_duration += sta->airtime[ac].rx_airtime; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_DURATION))) { for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) sinfo->tx_duration += sta->airtime[ac].tx_airtime; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_DURATION); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT))) { sinfo->airtime_weight = sta->airtime_weight; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_AIRTIME_WEIGHT); } sinfo->rx_dropped_misc = sta->deflink.rx_stats.dropped; if (sta->deflink.pcpu_rx_stats) { for_each_possible_cpu(cpu) { struct ieee80211_sta_rx_stats *cpurxs; cpurxs = per_cpu_ptr(sta->deflink.pcpu_rx_stats, cpu); sinfo->rx_dropped_misc += cpurxs->dropped; } } if (sdata->vif.type == NL80211_IFTYPE_STATION && !(sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER)) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_BEACON_RX) | BIT_ULL(NL80211_STA_INFO_BEACON_SIGNAL_AVG); sinfo->rx_beacon_signal_avg = ieee80211_ave_rssi(&sdata->vif); } if (ieee80211_hw_check(&sta->local->hw, SIGNAL_DBM) || ieee80211_hw_check(&sta->local->hw, SIGNAL_UNSPEC)) { if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL))) { sinfo->signal = (s8)last_rxstats->last_signal; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL); } if (!sta->deflink.pcpu_rx_stats && !(sinfo->filled & BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG))) { sinfo->signal_avg = -ewma_signal_read(&sta->deflink.rx_stats_avg.signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_SIGNAL_AVG); } } /* for the average - if pcpu_rx_stats isn't set - rxstats must point to * the sta->rx_stats struct, so the check here is fine with and without * pcpu statistics */ if (last_rxstats->chains && !(sinfo->filled & (BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL) | BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL); if (!sta->deflink.pcpu_rx_stats) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_CHAIN_SIGNAL_AVG); sinfo->chains = last_rxstats->chains; for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) { sinfo->chain_signal[i] = last_rxstats->chain_signal_last[i]; sinfo->chain_signal_avg[i] = -ewma_signal_read(&sta->deflink.rx_stats_avg.chain_signal[i]); } } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_TX_BITRATE)) && !sta->sta.valid_links && ieee80211_rate_valid(&sta->deflink.tx_stats.last_rate)) { sta_set_rate_info_tx(sta, &sta->deflink.tx_stats.last_rate, &sinfo->txrate); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_RX_BITRATE)) && !sta->sta.valid_links) { if (sta_set_rate_info_rx(sta, &sinfo->rxrate) == 0) sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE); } if (tidstats && !cfg80211_sinfo_alloc_tid_stats(sinfo, GFP_KERNEL)) { for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) sta_set_tidstats(sta, &sinfo->pertid[i], i); } #ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) sta_set_mesh_sinfo(sta, sinfo); #endif sinfo->bss_param.flags = 0; if (sdata->vif.bss_conf.use_cts_prot) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_CTS_PROT; if (sdata->vif.bss_conf.use_short_preamble) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (sdata->vif.bss_conf.use_short_slot) sinfo->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; sinfo->bss_param.dtim_period = sdata->vif.bss_conf.dtim_period; sinfo->bss_param.beacon_interval = sdata->vif.bss_conf.beacon_int; sinfo->sta_flags.set = 0; sinfo->sta_flags.mask = BIT(NL80211_STA_FLAG_AUTHORIZED) | BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) | BIT(NL80211_STA_FLAG_WME) | BIT(NL80211_STA_FLAG_MFP) | BIT(NL80211_STA_FLAG_AUTHENTICATED) | BIT(NL80211_STA_FLAG_ASSOCIATED) | BIT(NL80211_STA_FLAG_TDLS_PEER); if (test_sta_flag(sta, WLAN_STA_AUTHORIZED)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHORIZED); if (test_sta_flag(sta, WLAN_STA_SHORT_PREAMBLE)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_SHORT_PREAMBLE); if (sta->sta.wme) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_WME); if (test_sta_flag(sta, WLAN_STA_MFP)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_MFP); if (test_sta_flag(sta, WLAN_STA_AUTH)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_AUTHENTICATED); if (test_sta_flag(sta, WLAN_STA_ASSOC)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_ASSOCIATED); if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); thr = sta_get_expected_throughput(sta); if (thr != 0) { sinfo->filled |= BIT_ULL(NL80211_STA_INFO_EXPECTED_THROUGHPUT); sinfo->expected_throughput = thr; } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL)) && sta->deflink.status_stats.ack_signal_filled) { sinfo->ack_signal = sta->deflink.status_stats.last_ack_signal; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL); } if (!(sinfo->filled & BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG)) && sta->deflink.status_stats.ack_signal_filled) { sinfo->avg_ack_signal = -(s8)ewma_avg_signal_read( &sta->deflink.status_stats.avg_ack_signal); sinfo->filled |= BIT_ULL(NL80211_STA_INFO_ACK_SIGNAL_AVG); } } u32 sta_get_expected_throughput(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct rate_control_ref *ref = NULL; u32 thr = 0; if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) ref = local->rate_ctrl; /* check if the driver has a SW RC implementation */ if (ref && ref->ops->get_expected_throughput) thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); else thr = drv_get_expected_throughput(local, sta); return thr; } unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); if (!sta->deflink.status_stats.last_ack || time_after(stats->last_rx, sta->deflink.status_stats.last_ack)) return stats->last_rx; return sta->deflink.status_stats.last_ack; } static void sta_update_codel_params(struct sta_info *sta, u32 thr) { if (thr && thr < STA_SLOW_THRESHOLD * sta->local->num_sta) { sta->cparams.target = MS2TIME(50); sta->cparams.interval = MS2TIME(300); sta->cparams.ecn = false; } else { sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); sta->cparams.ecn = true; } } void ieee80211_sta_set_expected_throughput(struct ieee80211_sta *pubsta, u32 thr) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); sta_update_codel_params(sta, thr); } int ieee80211_sta_allocate_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct sta_link_alloc *alloc; int ret; lockdep_assert_wiphy(sdata->local->hw.wiphy); WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); /* must represent an MLD from the start */ if (WARN_ON(!sta->sta.valid_links)) return -EINVAL; if (WARN_ON(sta->sta.valid_links & BIT(link_id) || sta->link[link_id])) return -EBUSY; alloc = kzalloc(sizeof(*alloc), GFP_KERNEL); if (!alloc) return -ENOMEM; ret = sta_info_alloc_link(sdata->local, &alloc->info, GFP_KERNEL); if (ret) { kfree(alloc); return ret; } sta_info_add_link(sta, link_id, &alloc->info, &alloc->sta); ieee80211_link_sta_debugfs_add(&alloc->info); return 0; } void ieee80211_sta_free_link(struct sta_info *sta, unsigned int link_id) { lockdep_assert_wiphy(sta->sdata->local->hw.wiphy); WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED)); sta_remove_link(sta, link_id, false); } int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct link_sta_info *link_sta; u16 old_links = sta->sta.valid_links; u16 new_links = old_links | BIT(link_id); int ret; link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&sdata->local->hw.wiphy->mtx)); if (WARN_ON(old_links == new_links || !link_sta)) return -EINVAL; rcu_read_lock(); if (link_sta_info_hash_lookup(sdata->local, link_sta->addr)) { rcu_read_unlock(); return -EALREADY; } /* we only modify under the mutex so this is fine */ rcu_read_unlock(); sta->sta.valid_links = new_links; if (WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) goto hash; ieee80211_recalc_min_chandef(sdata, link_id); /* Ensure the values are updated for the driver, * redone by sta_remove_link on failure. */ ieee80211_sta_recalc_aggregates(&sta->sta); ret = drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, new_links); if (ret) { sta->sta.valid_links = old_links; sta_remove_link(sta, link_id, false); return ret; } hash: ret = link_sta_info_hash_add(sdata->local, link_sta); WARN_ON(ret); return 0; } void ieee80211_sta_remove_link(struct sta_info *sta, unsigned int link_id) { struct ieee80211_sub_if_data *sdata = sta->sdata; u16 old_links = sta->sta.valid_links; lockdep_assert_wiphy(sdata->local->hw.wiphy); sta->sta.valid_links &= ~BIT(link_id); if (!WARN_ON(!test_sta_flag(sta, WLAN_STA_INSERTED))) drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, sta->sta.valid_links); sta_remove_link(sta, link_id, true); } void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, const u8 *ext_capab, unsigned int ext_capab_len) { u8 val; sta->sta.max_amsdu_subframes = 0; if (ext_capab_len < 8) return; /* The sender might not have sent the last bit, consider it to be 0 */ val = u8_get_bits(ext_capab[7], WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB); /* we did get all the bits, take the MSB as well */ if (ext_capab_len >= 9) val |= u8_get_bits(ext_capab[8], WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB) << 1; if (val) sta->sta.max_amsdu_subframes = 4 << (4 - val); } #ifdef CONFIG_LOCKDEP bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) { struct sta_info *sta = container_of(pubsta, struct sta_info, sta); return lockdep_is_held(&sta->local->hw.wiphy->mtx); } EXPORT_SYMBOL(lockdep_sta_mutex_held); #endif
31 2 2 2 2 2 3 3 3 3 3 24 1 6 6 6 4 2 1 1 1 3 3 2 2 4 4 1 1 2 4 4 1 1 25 1 25 25 25 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 // SPDX-License-Identifier: GPL-2.0 #include <linux/compat.h> #include <linux/console.h> #include <linux/fb.h> #include <linux/fbcon.h> #include <linux/major.h> #include "fb_internal.h" /* * We hold a reference to the fb_info in file->private_data, * but if the current registered fb has changed, we don't * actually want to use it. * * So look up the fb_info using the inode minor number, * and just verify it against the reference we have. */ static struct fb_info *file_fb_info(struct file *file) { struct inode *inode = file_inode(file); int fbidx = iminor(inode); struct fb_info *info = registered_fb[fbidx]; if (info != file->private_data) info = NULL; return info; } static ssize_t fb_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct fb_info *info = file_fb_info(file); if (!info) return -ENODEV; if (fb_WARN_ON_ONCE(info, !info->fbops->fb_read)) return -EINVAL; if (info->state != FBINFO_STATE_RUNNING) return -EPERM; return info->fbops->fb_read(info, buf, count, ppos); } static ssize_t fb_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct fb_info *info = file_fb_info(file); if (!info) return -ENODEV; if (fb_WARN_ON_ONCE(info, !info->fbops->fb_write)) return -EINVAL; if (info->state != FBINFO_STATE_RUNNING) return -EPERM; return info->fbops->fb_write(info, buf, count, ppos); } static long do_fb_ioctl(struct fb_info *info, unsigned int cmd, unsigned long arg) { const struct fb_ops *fb; struct fb_var_screeninfo var; struct fb_fix_screeninfo fix; struct fb_cmap cmap_from; struct fb_cmap_user cmap; void __user *argp = (void __user *)arg; long ret = 0; switch (cmd) { case FBIOGET_VSCREENINFO: lock_fb_info(info); var = info->var; unlock_fb_info(info); ret = copy_to_user(argp, &var, sizeof(var)) ? -EFAULT : 0; break; case FBIOPUT_VSCREENINFO: if (copy_from_user(&var, argp, sizeof(var))) return -EFAULT; /* only for kernel-internal use */ var.activate &= ~FB_ACTIVATE_KD_TEXT; console_lock(); lock_fb_info(info); ret = fbcon_modechange_possible(info, &var); if (!ret) ret = fb_set_var(info, &var); if (!ret) fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL); unlock_fb_info(info); console_unlock(); if (!ret && copy_to_user(argp, &var, sizeof(var))) ret = -EFAULT; break; case FBIOGET_FSCREENINFO: lock_fb_info(info); memcpy(&fix, &info->fix, sizeof(fix)); if (info->flags & FBINFO_HIDE_SMEM_START) fix.smem_start = 0; unlock_fb_info(info); ret = copy_to_user(argp, &fix, sizeof(fix)) ? -EFAULT : 0; break; case FBIOPUTCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) return -EFAULT; ret = fb_set_user_cmap(&cmap, info); break; case FBIOGETCMAP: if (copy_from_user(&cmap, argp, sizeof(cmap))) return -EFAULT; lock_fb_info(info); cmap_from = info->cmap; unlock_fb_info(info); ret = fb_cmap_to_user(&cmap_from, &cmap); break; case FBIOPAN_DISPLAY: if (copy_from_user(&var, argp, sizeof(var))) return -EFAULT; console_lock(); lock_fb_info(info); ret = fb_pan_display(info, &var); unlock_fb_info(info); console_unlock(); if (ret == 0 && copy_to_user(argp, &var, sizeof(var))) return -EFAULT; break; case FBIO_CURSOR: ret = -EINVAL; break; case FBIOGET_CON2FBMAP: ret = fbcon_get_con2fb_map_ioctl(argp); break; case FBIOPUT_CON2FBMAP: ret = fbcon_set_con2fb_map_ioctl(argp); break; case FBIOBLANK: if (arg > FB_BLANK_POWERDOWN) return -EINVAL; console_lock(); lock_fb_info(info); ret = fb_blank(info, arg); /* might again call into fb_blank */ fbcon_fb_blanked(info, arg); unlock_fb_info(info); console_unlock(); break; default: lock_fb_info(info); fb = info->fbops; if (fb->fb_ioctl) ret = fb->fb_ioctl(info, cmd, arg); else ret = -ENOTTY; unlock_fb_info(info); } return ret; } static long fb_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fb_info *info = file_fb_info(file); if (!info) return -ENODEV; return do_fb_ioctl(info, cmd, arg); } #ifdef CONFIG_COMPAT struct fb_fix_screeninfo32 { char id[16]; compat_caddr_t smem_start; u32 smem_len; u32 type; u32 type_aux; u32 visual; u16 xpanstep; u16 ypanstep; u16 ywrapstep; u32 line_length; compat_caddr_t mmio_start; u32 mmio_len; u32 accel; u16 reserved[3]; }; struct fb_cmap32 { u32 start; u32 len; compat_caddr_t red; compat_caddr_t green; compat_caddr_t blue; compat_caddr_t transp; }; static int fb_getput_cmap(struct fb_info *info, unsigned int cmd, unsigned long arg) { struct fb_cmap32 cmap32; struct fb_cmap cmap_from; struct fb_cmap_user cmap; if (copy_from_user(&cmap32, compat_ptr(arg), sizeof(cmap32))) return -EFAULT; cmap = (struct fb_cmap_user) { .start = cmap32.start, .len = cmap32.len, .red = compat_ptr(cmap32.red), .green = compat_ptr(cmap32.green), .blue = compat_ptr(cmap32.blue), .transp = compat_ptr(cmap32.transp), }; if (cmd == FBIOPUTCMAP) return fb_set_user_cmap(&cmap, info); lock_fb_info(info); cmap_from = info->cmap; unlock_fb_info(info); return fb_cmap_to_user(&cmap_from, &cmap); } static int do_fscreeninfo_to_user(struct fb_fix_screeninfo *fix, struct fb_fix_screeninfo32 __user *fix32) { __u32 data; int err; err = copy_to_user(&fix32->id, &fix->id, sizeof(fix32->id)); data = (__u32) (unsigned long) fix->smem_start; err |= put_user(data, &fix32->smem_start); err |= put_user(fix->smem_len, &fix32->smem_len); err |= put_user(fix->type, &fix32->type); err |= put_user(fix->type_aux, &fix32->type_aux); err |= put_user(fix->visual, &fix32->visual); err |= put_user(fix->xpanstep, &fix32->xpanstep); err |= put_user(fix->ypanstep, &fix32->ypanstep); err |= put_user(fix->ywrapstep, &fix32->ywrapstep); err |= put_user(fix->line_length, &fix32->line_length); data = (__u32) (unsigned long) fix->mmio_start; err |= put_user(data, &fix32->mmio_start); err |= put_user(fix->mmio_len, &fix32->mmio_len); err |= put_user(fix->accel, &fix32->accel); err |= copy_to_user(fix32->reserved, fix->reserved, sizeof(fix->reserved)); if (err) return -EFAULT; return 0; } static int fb_get_fscreeninfo(struct fb_info *info, unsigned int cmd, unsigned long arg) { struct fb_fix_screeninfo fix; lock_fb_info(info); fix = info->fix; if (info->flags & FBINFO_HIDE_SMEM_START) fix.smem_start = 0; unlock_fb_info(info); return do_fscreeninfo_to_user(&fix, compat_ptr(arg)); } static long fb_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct fb_info *info = file_fb_info(file); const struct fb_ops *fb; long ret = -ENOIOCTLCMD; if (!info) return -ENODEV; fb = info->fbops; switch (cmd) { case FBIOGET_VSCREENINFO: case FBIOPUT_VSCREENINFO: case FBIOPAN_DISPLAY: case FBIOGET_CON2FBMAP: case FBIOPUT_CON2FBMAP: arg = (unsigned long) compat_ptr(arg); fallthrough; case FBIOBLANK: ret = do_fb_ioctl(info, cmd, arg); break; case FBIOGET_FSCREENINFO: ret = fb_get_fscreeninfo(info, cmd, arg); break; case FBIOGETCMAP: case FBIOPUTCMAP: ret = fb_getput_cmap(info, cmd, arg); break; default: if (fb->fb_compat_ioctl) ret = fb->fb_compat_ioctl(info, cmd, arg); break; } return ret; } #endif static int fb_mmap(struct file *file, struct vm_area_struct *vma) { struct fb_info *info = file_fb_info(file); int res; if (!info) return -ENODEV; if (fb_WARN_ON_ONCE(info, !info->fbops->fb_mmap)) return -ENODEV; mutex_lock(&info->mm_lock); res = info->fbops->fb_mmap(info, vma); mutex_unlock(&info->mm_lock); return res; } static int fb_open(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { int fbidx = iminor(inode); struct fb_info *info; int res = 0; info = get_fb_info(fbidx); if (!info) { request_module("fb%d", fbidx); info = get_fb_info(fbidx); if (!info) return -ENODEV; } if (IS_ERR(info)) return PTR_ERR(info); lock_fb_info(info); if (!try_module_get(info->fbops->owner)) { res = -ENODEV; goto out; } file->private_data = info; if (info->fbops->fb_open) { res = info->fbops->fb_open(info, 1); if (res) module_put(info->fbops->owner); } #ifdef CONFIG_FB_DEFERRED_IO if (info->fbdefio) fb_deferred_io_open(info, inode, file); #endif out: unlock_fb_info(info); if (res) put_fb_info(info); return res; } static int fb_release(struct inode *inode, struct file *file) __acquires(&info->lock) __releases(&info->lock) { struct fb_info * const info = file->private_data; lock_fb_info(info); #if IS_ENABLED(CONFIG_FB_DEFERRED_IO) if (info->fbdefio) fb_deferred_io_release(info); #endif if (info->fbops->fb_release) info->fbops->fb_release(info, 1); module_put(info->fbops->owner); unlock_fb_info(info); put_fb_info(info); return 0; } #if defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && !defined(CONFIG_MMU) static unsigned long get_fb_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct fb_info * const info = filp->private_data; unsigned long fb_size = PAGE_ALIGN(info->fix.smem_len); if (pgoff > fb_size || len > fb_size - pgoff) return -EINVAL; return (unsigned long)info->screen_base + pgoff; } #endif static const struct file_operations fb_fops = { .owner = THIS_MODULE, .read = fb_read, .write = fb_write, .unlocked_ioctl = fb_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = fb_compat_ioctl, #endif .mmap = fb_mmap, .open = fb_open, .release = fb_release, #if defined(HAVE_ARCH_FB_UNMAPPED_AREA) || \ (defined(CONFIG_FB_PROVIDE_GET_FB_UNMAPPED_AREA) && \ !defined(CONFIG_MMU)) .get_unmapped_area = get_fb_unmapped_area, #endif #ifdef CONFIG_FB_DEFERRED_IO .fsync = fb_deferred_io_fsync, #endif .llseek = default_llseek, }; int fb_register_chrdev(void) { int ret; ret = register_chrdev(FB_MAJOR, "fb", &fb_fops); if (ret) { pr_err("Unable to get major %d for fb devs\n", FB_MAJOR); return ret; } return ret; } void fb_unregister_chrdev(void) { unregister_chrdev(FB_MAJOR, "fb"); }
7 7 7 7 2 6 7 1 6 7 7 2 2 7 7 1 1 7 7 9 9 9 9 6 9 1 4 7 3 3 1 1 2 12 12 11 11 9 2 9 12 11 10 9 4 4 3 8 12 7 7 1 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2014 Fraunhofer ITWM * * Written by: * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de> */ #include <linux/ieee802154.h> #include <net/mac802154.h> #include <net/ieee802154_netdev.h> static int ieee802154_hdr_push_addr(u8 *buf, const struct ieee802154_addr *addr, bool omit_pan) { int pos = 0; if (addr->mode == IEEE802154_ADDR_NONE) return 0; if (!omit_pan) { memcpy(buf + pos, &addr->pan_id, 2); pos += 2; } switch (addr->mode) { case IEEE802154_ADDR_SHORT: memcpy(buf + pos, &addr->short_addr, 2); pos += 2; break; case IEEE802154_ADDR_LONG: memcpy(buf + pos, &addr->extended_addr, IEEE802154_ADDR_LEN); pos += IEEE802154_ADDR_LEN; break; default: return -EINVAL; } return pos; } static int ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr) { int pos = 5; memcpy(buf, hdr, 1); memcpy(buf + 1, &hdr->frame_counter, 4); switch (hdr->key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: return pos; case IEEE802154_SCF_KEY_INDEX: break; case IEEE802154_SCF_KEY_SHORT_INDEX: memcpy(buf + pos, &hdr->short_src, 4); pos += 4; break; case IEEE802154_SCF_KEY_HW_INDEX: memcpy(buf + pos, &hdr->extended_src, IEEE802154_ADDR_LEN); pos += IEEE802154_ADDR_LEN; break; } buf[pos++] = hdr->key_id; return pos; } int ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr) { u8 buf[IEEE802154_MAX_HEADER_LEN]; int pos = 2; int rc; struct ieee802154_hdr_fc *fc = &hdr->fc; buf[pos++] = hdr->seq; fc->dest_addr_mode = hdr->dest.mode; rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false); if (rc < 0) return -EINVAL; pos += rc; fc->source_addr_mode = hdr->source.mode; if (hdr->source.pan_id == hdr->dest.pan_id && hdr->dest.mode != IEEE802154_ADDR_NONE) fc->intra_pan = true; rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc->intra_pan); if (rc < 0) return -EINVAL; pos += rc; if (fc->security_enabled) { fc->version = 1; rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec); if (rc < 0) return -EINVAL; pos += rc; } memcpy(buf, fc, 2); memcpy(skb_push(skb, pos), buf, pos); return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_push); int ieee802154_mac_cmd_push(struct sk_buff *skb, void *f, const void *pl, unsigned int pl_len) { struct ieee802154_mac_cmd_frame *frame = f; struct ieee802154_mac_cmd_pl *mac_pl = &frame->mac_pl; struct ieee802154_hdr *mhr = &frame->mhr; int ret; skb_reserve(skb, sizeof(*mhr)); ret = ieee802154_hdr_push(skb, mhr); if (ret < 0) return ret; skb_reset_mac_header(skb); skb->mac_len = ret; skb_put_data(skb, mac_pl, sizeof(*mac_pl)); skb_put_data(skb, pl, pl_len); return 0; } EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_push); int ieee802154_beacon_push(struct sk_buff *skb, struct ieee802154_beacon_frame *beacon) { struct ieee802154_beacon_hdr *mac_pl = &beacon->mac_pl; struct ieee802154_hdr *mhr = &beacon->mhr; int ret; skb_reserve(skb, sizeof(*mhr)); ret = ieee802154_hdr_push(skb, mhr); if (ret < 0) return ret; skb_reset_mac_header(skb); skb->mac_len = ret; skb_put_data(skb, mac_pl, sizeof(*mac_pl)); if (mac_pl->pend_short_addr_count || mac_pl->pend_ext_addr_count) return -EOPNOTSUPP; return 0; } EXPORT_SYMBOL_GPL(ieee802154_beacon_push); static int ieee802154_hdr_get_addr(const u8 *buf, int mode, bool omit_pan, struct ieee802154_addr *addr) { int pos = 0; addr->mode = mode; if (mode == IEEE802154_ADDR_NONE) return 0; if (!omit_pan) { memcpy(&addr->pan_id, buf + pos, 2); pos += 2; } if (mode == IEEE802154_ADDR_SHORT) { memcpy(&addr->short_addr, buf + pos, 2); return pos + 2; } else { memcpy(&addr->extended_addr, buf + pos, IEEE802154_ADDR_LEN); return pos + IEEE802154_ADDR_LEN; } } static int ieee802154_hdr_addr_len(int mode, bool omit_pan) { int pan_len = omit_pan ? 0 : 2; switch (mode) { case IEEE802154_ADDR_NONE: return 0; case IEEE802154_ADDR_SHORT: return 2 + pan_len; case IEEE802154_ADDR_LONG: return IEEE802154_ADDR_LEN + pan_len; default: return -EINVAL; } } static int ieee802154_hdr_get_sechdr(const u8 *buf, struct ieee802154_sechdr *hdr) { int pos = 5; memcpy(hdr, buf, 1); memcpy(&hdr->frame_counter, buf + 1, 4); switch (hdr->key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: return pos; case IEEE802154_SCF_KEY_INDEX: break; case IEEE802154_SCF_KEY_SHORT_INDEX: memcpy(&hdr->short_src, buf + pos, 4); pos += 4; break; case IEEE802154_SCF_KEY_HW_INDEX: memcpy(&hdr->extended_src, buf + pos, IEEE802154_ADDR_LEN); pos += IEEE802154_ADDR_LEN; break; } hdr->key_id = buf[pos++]; return pos; } static int ieee802154_sechdr_lengths[4] = { [IEEE802154_SCF_KEY_IMPLICIT] = 5, [IEEE802154_SCF_KEY_INDEX] = 6, [IEEE802154_SCF_KEY_SHORT_INDEX] = 10, [IEEE802154_SCF_KEY_HW_INDEX] = 14, }; static int ieee802154_hdr_sechdr_len(u8 sc) { return ieee802154_sechdr_lengths[IEEE802154_SCF_KEY_ID_MODE(sc)]; } static int ieee802154_hdr_minlen(const struct ieee802154_hdr *hdr) { int dlen, slen; dlen = ieee802154_hdr_addr_len(hdr->fc.dest_addr_mode, false); slen = ieee802154_hdr_addr_len(hdr->fc.source_addr_mode, hdr->fc.intra_pan); if (slen < 0 || dlen < 0) return -EINVAL; return 3 + dlen + slen + hdr->fc.security_enabled; } static int ieee802154_hdr_get_addrs(const u8 *buf, struct ieee802154_hdr *hdr) { int pos = 0; pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.dest_addr_mode, false, &hdr->dest); pos += ieee802154_hdr_get_addr(buf + pos, hdr->fc.source_addr_mode, hdr->fc.intra_pan, &hdr->source); if (hdr->fc.intra_pan) hdr->source.pan_id = hdr->dest.pan_id; return pos; } int ieee802154_hdr_pull(struct sk_buff *skb, struct ieee802154_hdr *hdr) { int pos = 3, rc; if (!pskb_may_pull(skb, 3)) return -EINVAL; memcpy(hdr, skb->data, 3); rc = ieee802154_hdr_minlen(hdr); if (rc < 0 || !pskb_may_pull(skb, rc)) return -EINVAL; pos += ieee802154_hdr_get_addrs(skb->data + pos, hdr); if (hdr->fc.security_enabled) { int want = pos + ieee802154_hdr_sechdr_len(skb->data[pos]); if (!pskb_may_pull(skb, want)) return -EINVAL; pos += ieee802154_hdr_get_sechdr(skb->data + pos, &hdr->sec); } skb_pull(skb, pos); return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_pull); int ieee802154_mac_cmd_pl_pull(struct sk_buff *skb, struct ieee802154_mac_cmd_pl *mac_pl) { if (!pskb_may_pull(skb, sizeof(*mac_pl))) return -EINVAL; memcpy(mac_pl, skb->data, sizeof(*mac_pl)); skb_pull(skb, sizeof(*mac_pl)); return 0; } EXPORT_SYMBOL_GPL(ieee802154_mac_cmd_pl_pull); int ieee802154_hdr_peek_addrs(const struct sk_buff *skb, struct ieee802154_hdr *hdr) { const u8 *buf = skb_mac_header(skb); int pos = 3, rc; if (buf + 3 > skb_tail_pointer(skb)) return -EINVAL; memcpy(hdr, buf, 3); rc = ieee802154_hdr_minlen(hdr); if (rc < 0 || buf + rc > skb_tail_pointer(skb)) return -EINVAL; pos += ieee802154_hdr_get_addrs(buf + pos, hdr); return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek_addrs); int ieee802154_hdr_peek(const struct sk_buff *skb, struct ieee802154_hdr *hdr) { const u8 *buf = skb_mac_header(skb); int pos; pos = ieee802154_hdr_peek_addrs(skb, hdr); if (pos < 0) return -EINVAL; if (hdr->fc.security_enabled) { u8 key_id_mode = IEEE802154_SCF_KEY_ID_MODE(*(buf + pos)); int want = pos + ieee802154_sechdr_lengths[key_id_mode]; if (buf + want > skb_tail_pointer(skb)) return -EINVAL; pos += ieee802154_hdr_get_sechdr(buf + pos, &hdr->sec); } return pos; } EXPORT_SYMBOL_GPL(ieee802154_hdr_peek); int ieee802154_max_payload(const struct ieee802154_hdr *hdr) { int hlen = ieee802154_hdr_minlen(hdr); if (hdr->fc.security_enabled) { hlen += ieee802154_sechdr_lengths[hdr->sec.key_id_mode] - 1; hlen += ieee802154_sechdr_authtag_len(&hdr->sec); } return IEEE802154_MTU - hlen - IEEE802154_MFR_SIZE; } EXPORT_SYMBOL_GPL(ieee802154_max_payload);
15284 20 14730 198 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __ASM_PREEMPT_H #define __ASM_PREEMPT_H #include <asm/rmwcc.h> #include <asm/percpu.h> #include <linux/static_call_types.h> DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count); /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 /* * We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such * that a decrement hitting 0 means we can and should reschedule. */ #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) /* * We mask the PREEMPT_NEED_RESCHED bit so as not to confuse all current users * that think a non-zero value indicates we cannot preempt. */ static __always_inline int preempt_count(void) { return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) { int old, new; old = raw_cpu_read_4(__preempt_count); do { new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new)); } /* * must be macros to avoid header recursion hell */ #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* * We fold the NEED_RESCHED bit into the preempt count such that * preempt_enable() can decrement and test for needing to reschedule with a * single instruction. * * We invert the actual bit, so that when the decrement hits 0 we know we both * need to resched (the bit is cleared) and can resched (no preempt count). */ static __always_inline void set_preempt_need_resched(void) { raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); } /* * The various preempt_count add/sub methods */ static __always_inline void __preempt_count_add(int val) { raw_cpu_add_4(__preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { raw_cpu_add_4(__preempt_count, -val); } /* * Because we keep PREEMPT_NEED_RESCHED set when we do _not_ need to reschedule * a decrement which hits zero means we have no preempt_count and should * reschedule. */ static __always_inline bool __preempt_count_dec_and_test(void) { return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e, __percpu_arg([var])); } /* * Returns true when we need to resched and can (barring IRQ state). */ static __always_inline bool should_resched(int preempt_offset) { return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPTION extern asmlinkage void preempt_schedule(void); extern asmlinkage void preempt_schedule_thunk(void); #define preempt_schedule_dynamic_enabled preempt_schedule_thunk #define preempt_schedule_dynamic_disabled NULL extern asmlinkage void preempt_schedule_notrace(void); extern asmlinkage void preempt_schedule_notrace_thunk(void); #define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk #define preempt_schedule_notrace_dynamic_disabled NULL #ifdef CONFIG_PREEMPT_DYNAMIC DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); #define __preempt_schedule() \ do { \ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule); \ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \ } while (0) DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); #define __preempt_schedule_notrace() \ do { \ __STATIC_CALL_MOD_ADDRESSABLE(preempt_schedule_notrace); \ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \ } while (0) #else /* PREEMPT_DYNAMIC */ #define __preempt_schedule() \ asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT); #define __preempt_schedule_notrace() \ asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT); #endif /* PREEMPT_DYNAMIC */ #endif /* PREEMPTION */ #endif /* __ASM_PREEMPT_H */
6171 6187 6162 6189 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 /* SPDX-License-Identifier: GPL-2.0 */ /* * generic net pointers */ #ifndef __NET_GENERIC_H__ #define __NET_GENERIC_H__ #include <linux/bug.h> #include <linux/rcupdate.h> #include <net/net_namespace.h> /* * Generic net pointers are to be used by modules to put some private * stuff on the struct net without explicit struct net modification * * The rules are simple: * 1. set pernet_operations->id. After register_pernet_device you * will have the id of your private pointer. * 2. set pernet_operations->size to have the code allocate and free * a private structure pointed to from struct net. * 3. do not change this pointer while the net is alive; * 4. do not try to have any private reference on the net_generic object. * * After accomplishing all of the above, the private pointer can be * accessed with the net_generic() call. */ struct net_generic { union { struct { unsigned int len; struct rcu_head rcu; } s; DECLARE_FLEX_ARRAY(void *, ptr); }; }; static inline void *net_generic(const struct net *net, unsigned int id) { struct net_generic *ng; void *ptr; rcu_read_lock(); ng = rcu_dereference(net->gen); ptr = ng->ptr[id]; rcu_read_unlock(); return ptr; } #endif
15 15 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef LINUX_IOMAP_H #define LINUX_IOMAP_H 1 #include <linux/atomic.h> #include <linux/bitmap.h> #include <linux/blk_types.h> #include <linux/mm.h> #include <linux/types.h> #include <linux/mm_types.h> #include <linux/blkdev.h> struct address_space; struct fiemap_extent_info; struct inode; struct iomap_iter; struct iomap_dio; struct iomap_writepage_ctx; struct iov_iter; struct kiocb; struct page; struct vm_area_struct; struct vm_fault; /* * Types of block ranges for iomap mappings: */ #define IOMAP_HOLE 0 /* no blocks allocated, need allocation */ #define IOMAP_DELALLOC 1 /* delayed allocation blocks */ #define IOMAP_MAPPED 2 /* blocks allocated at @addr */ #define IOMAP_UNWRITTEN 3 /* blocks allocated at @addr in unwritten state */ #define IOMAP_INLINE 4 /* data inline in the inode */ /* * Flags reported by the file system from iomap_begin: * * IOMAP_F_NEW indicates that the blocks have been newly allocated and need * zeroing for areas that no data is copied to. * * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access * written data and requires fdatasync to commit them to persistent storage. * This needs to take into account metadata changes that *may* be made at IO * completion, such as file size updates from direct IO. * * IOMAP_F_SHARED indicates that the blocks are shared, and will need to be * unshared as part a write. * * IOMAP_F_MERGED indicates that the iomap contains the merge of multiple block * mappings. * * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of * buffer heads for this mapping. * * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent * rather than a file data extent. * * IOMAP_F_BOUNDARY indicates that I/O and I/O completions for this iomap must * never be merged with the mapping before it. * * IOMAP_F_ANON_WRITE indicates that (write) I/O does not have a target block * assigned to it yet and the file system will do that in the bio submission * handler, splitting the I/O as needed. * * IOMAP_F_ATOMIC_BIO indicates that (write) I/O will be issued as an atomic * bio, i.e. set REQ_ATOMIC. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) #define IOMAP_F_SHARED (1U << 2) #define IOMAP_F_MERGED (1U << 3) #ifdef CONFIG_BUFFER_HEAD #define IOMAP_F_BUFFER_HEAD (1U << 4) #else #define IOMAP_F_BUFFER_HEAD 0 #endif /* CONFIG_BUFFER_HEAD */ #define IOMAP_F_XATTR (1U << 5) #define IOMAP_F_BOUNDARY (1U << 6) #define IOMAP_F_ANON_WRITE (1U << 7) #define IOMAP_F_ATOMIC_BIO (1U << 8) /* * Flag reserved for file system specific usage */ #define IOMAP_F_PRIVATE (1U << 12) /* * Flags set by the core iomap code during operations: * * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size * has changed as the result of this write operation. * * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file * range it covers needs to be remapped by the high level before the operation * can proceed. */ #define IOMAP_F_SIZE_CHANGED (1U << 14) #define IOMAP_F_STALE (1U << 15) /* * Magic value for addr: */ #define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ struct iomap_folio_ops; struct iomap { u64 addr; /* disk offset of mapping, bytes */ loff_t offset; /* file offset of mapping, bytes */ u64 length; /* length of mapping, bytes */ u16 type; /* type of mapping */ u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data; void *private; /* filesystem private */ const struct iomap_folio_ops *folio_ops; u64 validity_cookie; /* used with .iomap_valid() */ }; static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) { if (iomap->flags & IOMAP_F_ANON_WRITE) return U64_MAX; /* invalid */ return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; } /* * Returns the inline data pointer for logical offset @pos. */ static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos) { return iomap->inline_data + pos - iomap->offset; } /* * Check if the mapping's length is within the valid range for inline data. * This is used to guard against accessing data beyond the page inline_data * points at. */ static inline bool iomap_inline_data_valid(const struct iomap *iomap) { return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); } /* * When a filesystem sets folio_ops in an iomap mapping it returns, get_folio * and put_folio will be called for each folio written to. This only applies * to buffered writes as unbuffered writes will not typically have folios * associated with them. * * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. */ struct iomap_folio_ops { struct folio *(*get_folio)(struct iomap_iter *iter, loff_t pos, unsigned len); void (*put_folio)(struct inode *inode, loff_t pos, unsigned copied, struct folio *folio); /* * Check that the cached iomap still maps correctly to the filesystem's * internal extent map. FS internal extent maps can change while iomap * is iterating a cached iomap, so this hook allows iomap to detect that * the iomap needs to be refreshed during a long running write * operation. * * The filesystem can store internal state (e.g. a sequence number) in * iomap->validity_cookie when the iomap is first mapped to be able to * detect changes between mapping time and whenever .iomap_valid() is * called. * * This is called with the folio over the specified file position held * locked by the iomap code. */ bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); }; /* * Flags for iomap_begin / iomap_end. No flag implies a read. */ #define IOMAP_WRITE (1 << 0) /* writing, must allocate blocks */ #define IOMAP_ZERO (1 << 1) /* zeroing operation, may skip holes */ #define IOMAP_REPORT (1 << 2) /* report extent status, e.g. FIEMAP */ #define IOMAP_FAULT (1 << 3) /* mapping for page fault */ #define IOMAP_DIRECT (1 << 4) /* direct I/O */ #define IOMAP_NOWAIT (1 << 5) /* do not block */ #define IOMAP_OVERWRITE_ONLY (1 << 6) /* only pure overwrites allowed */ #define IOMAP_UNSHARE (1 << 7) /* unshare_file_range */ #ifdef CONFIG_FS_DAX #define IOMAP_DAX (1 << 8) /* DAX mapping */ #else #define IOMAP_DAX 0 #endif /* CONFIG_FS_DAX */ #define IOMAP_ATOMIC (1 << 9) /* torn-write protection */ #define IOMAP_DONTCACHE (1 << 10) struct iomap_ops { /* * Return the existing mapping at pos, or reserve space starting at * pos for up to length, as long as we can do it as a single mapping. * The actual length is returned in iomap->length. */ int (*iomap_begin)(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, struct iomap *srcmap); /* * Commit and/or unreserve space previous allocated using iomap_begin. * Written indicates the length of the successful write operation which * needs to be commited, while the rest needs to be unreserved. * Written might be zero if no data was written. */ int (*iomap_end)(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap); }; /** * struct iomap_iter - Iterate through a range of a file * @inode: Set at the start of the iteration and should not change. * @pos: The current file position we are operating on. It is updated by * calls to iomap_iter(). Treat as read-only in the body. * @len: The remaining length of the file segment we're operating on. * It is updated at the same time as @pos. * @iter_start_pos: The original start pos for the current iomap. Used for * incremental iter advance. * @status: Status of the most recent iteration. Zero on success or a negative * errno on error. * @flags: Zero or more of the iomap_begin flags above. * @iomap: Map describing the I/O iteration * @srcmap: Source map for COW operations */ struct iomap_iter { struct inode *inode; loff_t pos; u64 len; loff_t iter_start_pos; int status; unsigned flags; struct iomap iomap; struct iomap srcmap; void *private; }; int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops); int iomap_iter_advance(struct iomap_iter *iter, u64 *count); /** * iomap_length_trim - trimmed length of the current iomap iteration * @iter: iteration structure * @pos: File position to trim from. * @len: Length of the mapping to trim to. * * Returns a trimmed length that the operation applies to for the current * iteration. */ static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos, u64 len) { u64 end = iter->iomap.offset + iter->iomap.length; if (iter->srcmap.type != IOMAP_HOLE) end = min(end, iter->srcmap.offset + iter->srcmap.length); return min(len, end - pos); } /** * iomap_length - length of the current iomap iteration * @iter: iteration structure * * Returns the length that the operation applies to for the current iteration. */ static inline u64 iomap_length(const struct iomap_iter *iter) { return iomap_length_trim(iter, iter->pos, iter->len); } /** * iomap_iter_advance_full - advance by the full length of current map */ static inline int iomap_iter_advance_full(struct iomap_iter *iter) { u64 length = iomap_length(iter); return iomap_iter_advance(iter, &length); } /** * iomap_iter_srcmap - return the source map for the current iomap iteration * @i: iteration structure * * Write operations on file systems with reflink support might require a * source and a destination map. This function retourns the source map * for a given operation, which may or may no be identical to the destination * map in &i->iomap. */ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) { if (i->srcmap.type != IOMAP_HOLE) return &i->srcmap; return &i->iomap; } /* * Return the file offset for the first unchanged block after a short write. * * If nothing was written, round @pos down to point at the first block in * the range, else round up to include the partially written block. */ static inline loff_t iomap_last_written_block(struct inode *inode, loff_t pos, ssize_t written) { if (unlikely(!written)) return round_down(pos, i_blocksize(inode)); return round_up(pos + written, i_blocksize(inode)); } /* * Check if the range needs to be unshared for a FALLOC_FL_UNSHARE_RANGE * operation. * * Don't bother with blocks that are not shared to start with; or mappings that * cannot be shared, such as inline data, delalloc reservations, holes or * unwritten extents. * * Note that we use srcmap directly instead of iomap_iter_srcmap as unsharing * requires providing a separate source map, and the presence of one is a good * indicator that unsharing is needed, unlike IOMAP_F_SHARED which can be set * for any data that goes into the COW fork for XFS. */ static inline bool iomap_want_unshare_iter(const struct iomap_iter *iter) { return (iter->iomap.flags & IOMAP_F_SHARED) && iter->srcmap.type == IOMAP_MAPPED; } ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops, void *private); int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, const struct iomap_ops *ops); int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, const struct iomap_ops *ops, void *private); int iomap_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops, void *private); vm_fault_t iomap_page_mkwrite(struct vm_fault *vmf, const struct iomap_ops *ops, void *private); typedef void (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length, struct iomap *iomap); void iomap_write_delalloc_release(struct inode *inode, loff_t start_byte, loff_t end_byte, unsigned flags, struct iomap *iomap, iomap_punch_t punch); int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len, const struct iomap_ops *ops); loff_t iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops); loff_t iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops); sector_t iomap_bmap(struct address_space *mapping, sector_t bno, const struct iomap_ops *ops); /* * Flags for iomap_ioend->io_flags. */ /* shared COW extent */ #define IOMAP_IOEND_SHARED (1U << 0) /* unwritten extent */ #define IOMAP_IOEND_UNWRITTEN (1U << 1) /* don't merge into previous ioend */ #define IOMAP_IOEND_BOUNDARY (1U << 2) /* is direct I/O */ #define IOMAP_IOEND_DIRECT (1U << 3) /* * Flags that if set on either ioend prevent the merge of two ioends. * (IOMAP_IOEND_BOUNDARY also prevents merges, but only one-way) */ #define IOMAP_IOEND_NOMERGE_FLAGS \ (IOMAP_IOEND_SHARED | IOMAP_IOEND_UNWRITTEN | IOMAP_IOEND_DIRECT) /* * Structure for writeback I/O completions. * * File systems implementing ->submit_ioend (for buffered I/O) or ->submit_io * for direct I/O) can split a bio generated by iomap. In that case the parent * ioend it was split from is recorded in ioend->io_parent. */ struct iomap_ioend { struct list_head io_list; /* next ioend in chain */ u16 io_flags; /* IOMAP_IOEND_* */ struct inode *io_inode; /* file being written to */ size_t io_size; /* size of the extent */ atomic_t io_remaining; /* completetion defer count */ int io_error; /* stashed away status */ struct iomap_ioend *io_parent; /* parent for completions */ loff_t io_offset; /* offset in the file */ sector_t io_sector; /* start sector of ioend */ void *io_private; /* file system private data */ struct bio io_bio; /* MUST BE LAST! */ }; static inline struct iomap_ioend *iomap_ioend_from_bio(struct bio *bio) { return container_of(bio, struct iomap_ioend, io_bio); } struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. * * Can return arbitrarily large regions, but we need to call into it at * least once per folio to allow the file systems to synchronize with * the write path that could be invalidating mappings. * * An existing mapping from a previous call to this method can be reused * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset, unsigned len); /* * Optional, allows the file systems to hook into bio submission, * including overriding the bi_end_io handler. * * Returns 0 if the bio was successfully submitted, or a negative * error code if status was non-zero or another error happened and * the bio could not be submitted. */ int (*submit_ioend)(struct iomap_writepage_ctx *wpc, int status); /* * Optional, allows the file system to discard state on a page where * we failed to submit any I/O. */ void (*discard_folio)(struct folio *folio, loff_t pos); }; struct iomap_writepage_ctx { struct iomap iomap; struct iomap_ioend *ioend; const struct iomap_writeback_ops *ops; u32 nr_folios; /* folios added to the ioend */ }; struct iomap_ioend *iomap_init_ioend(struct inode *inode, struct bio *bio, loff_t file_offset, u16 ioend_flags); struct iomap_ioend *iomap_split_ioend(struct iomap_ioend *ioend, unsigned int max_len, bool is_append); void iomap_finish_ioends(struct iomap_ioend *ioend, int error); void iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends); void iomap_sort_ioends(struct list_head *ioend_list); int iomap_writepages(struct address_space *mapping, struct writeback_control *wbc, struct iomap_writepage_ctx *wpc, const struct iomap_writeback_ops *ops); /* * Flags for direct I/O ->end_io: */ #define IOMAP_DIO_UNWRITTEN (1 << 0) /* covers unwritten extent(s) */ #define IOMAP_DIO_COW (1 << 1) /* covers COW extent(s) */ struct iomap_dio_ops { int (*end_io)(struct kiocb *iocb, ssize_t size, int error, unsigned flags); void (*submit_io)(const struct iomap_iter *iter, struct bio *bio, loff_t file_offset); /* * Filesystems wishing to attach private information to a direct io bio * must provide a ->submit_io method that attaches the additional * information to the bio and changes the ->bi_end_io callback to a * custom function. This function should, at a minimum, perform any * relevant post-processing of the bio and end with a call to * iomap_dio_bio_end_io. */ struct bio_set *bio_set; }; /* * Wait for the I/O to complete in iomap_dio_rw even if the kiocb is not * synchronous. */ #define IOMAP_DIO_FORCE_WAIT (1 << 0) /* * Do not allocate blocks or zero partial blocks, but instead fall back to * the caller by returning -EAGAIN. Used to optimize direct I/O writes that * are not aligned to the file system block size. */ #define IOMAP_DIO_OVERWRITE_ONLY (1 << 1) /* * When a page fault occurs, return a partial synchronous result and allow * the caller to retry the rest of the operation after dealing with the page * fault. */ #define IOMAP_DIO_PARTIAL (1 << 2) ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); struct iomap_dio *__iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, const struct iomap_dio_ops *dops, unsigned int dio_flags, void *private, size_t done_before); ssize_t iomap_dio_complete(struct iomap_dio *dio); void iomap_dio_bio_end_io(struct bio *bio); #ifdef CONFIG_SWAP struct file; struct swap_info_struct; int iomap_swapfile_activate(struct swap_info_struct *sis, struct file *swap_file, sector_t *pagespan, const struct iomap_ops *ops); #else # define iomap_swapfile_activate(sis, swapfile, pagespan, ops) (-EIO) #endif /* CONFIG_SWAP */ extern struct bio_set iomap_ioend_bioset; #endif /* LINUX_IOMAP_H */
13 50 13 13 12 4 19 14 7 8 9 8 7 14 3 3 14 3 14 14 14 14 10 19 20 2 5 5 29 12 27 4 23 3 83 67 58 17 17 17 15 3 3 3 3 3 17 17 17 17 17 17 25 29 5 5 4 5 3 87 88 63 40 23 6 426 426 15 15 15 13 10 3 9 15 10 10 15 14 15 13 1 426 426 15 420 13 14 14 6 426 14 426 425 426 17 17 18 18 6 6 6 6 6 6 6 6 6 6 1 6 9 7 1 7 7 7 7 7 7 6 7 7 16 17 16 17 9 9 15 15 15 15 15 15 15 14 15 15 15 15 15 17 17 16 17 7 7 15 15 14 15 15 15 15 15 15 15 15 15 15 15 3 3 3 7 42 9 19 26 18 14 9 9 9 23 23 23 23 23 4 4 1 21 21 7 16 13 13 13 13 13 12 16 13 13 12 3 13 16 17 17 17 39 12 29 39 35 39 12 30 41 40 9 33 41 4 4 4 3 2 4 7 3 3 2 2 2 1 4 3 13 16 10 5 5 5 5 13 10 13 17 23 22 23 23 23 23 23 23 3 3 1 4 1 1 1 1 1 4 40 40 40 40 39 40 39 1 1 10 10 10 5 5 4 4 4 5 5 5 22 22 22 4 4 4 3 4 21 21 21 21 21 21 21 21 4 4 13 14 20 16 15 14 16 16 13 14 14 14 14 14 12 12 12 12 11 12 12 4 12 12 17 14 4 18 14 14 14 14 14 13 18 14 12 17 14 18 14 14 18 7 7 7 7 18 18 14 14 17 14 13 18 16 16 16 15 15 14 16 16 16 16 16 18 18 15 16 16 2 2 2 13 14 2 14 14 17 12 11 9 12 9 11 11 12 9 12 12 11 12 15 13 14 14 14 14 2 12 12 9 13 13 1 14 13 13 13 13 3 3 3 3 2 2 1 7 7 6 7 7 4 4 7 4 3 7 4 4 4 7 7 21 21 21 21 20 21 21 1 1 1 21 7 7 17 17 16 17 4 17 12 17 17 17 17 17 13 17 17 17 16 7 7 7 7 7 7 1 7 4 2 7 5 6 5 5 2 1 8 8 8 8 8 8 7 7 1 1 1 8 8 8 8 7 2 7 8 1 1 1 1 6 6 5 5 5 6 5 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 1 2 2 1 2 2 1 1 1 2 2 2 2 1 2 1 2 1 1 1 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 1 3 3 1 2 1 3 1 1 1 1 2 6 5 6 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 6 5 6 6 59 60 59 61 9 9 9 7 6 2 2 2 9 3 1 2 8 7 8 3 45 33 13 12 3 2 12 13 13 13 5 16 16 16 16 16 16 14 16 13 4 15 12 6 6 2 6 11 58 56 29 26 58 58 39 39 39 37 38 14 15 22 23 23 23 22 23 18 23 18 5 5 5 16 15 3 3 15 14 10 1 9 10 4 1 4 4 4 16 16 10 10 1 1 14 1 10 10 10 1 1 16 16 11 1 10 11 10 11 11 10 15 16 10 4 4 6 14 15 10 16 11 11 14 1 16 10 10 10 15 14 9 9 13 3 13 2 3 2 5 5 5 5 4 4 2 1 1 5 3 3 3 5 7 7 7 7 7 7 2 7 7 7 7 7 7 7 7 7 1 6 6 6 6 7 7 4 2 3 13 12 13 9 13 5 13 13 13 13 10 13 13 13 12 13 13 12 13 13 13 13 13 13 10 10 10 10 10 10 10 5 10 10 10 10 10 10 1 10 3 10 10 10 5 10 10 8 10 10 10 10 10 10 10 10 10 10 10 10 10 4 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 483 19 2 2 1 409 3 414 410 414 414 409 414 410 3 3 2 1 1 1 1 1 1 1 2 2 2 2 411 412 411 1 1 409 1 1 1 414 2 412 412 1 1 1 1 410 408 1 410 411 408 410 409 409 412 412 412 1 1 1 411 410 9 9 3 3 20 21 18 20 19 19 19 19 19 19 4 4 3 3 3 3 3 3 4 21 18 5 19 20 1 506 503 508 508 502 497 508 507 1 1 1 370 373 370 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093 7094 7095 7096 7097 7098 7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110 7111 7112 7113 7114 7115 7116 7117 7118 7119 7120 7121 7122 7123 7124 7125 7126 7127 7128 7129 7130 7131 7132 7133 7134 7135 7136 7137 7138 7139 7140 7141 7142 7143 7144 7145 7146 7147 7148 7149 7150 7151 7152 7153 7154 7155 7156 7157 7158 7159 7160 7161 7162 7163 7164 7165 7166 7167 7168 7169 7170 7171 7172 7173 7174 7175 7176 7177 7178 7179 7180 7181 7182 7183 7184 7185 7186 7187 7188 7189 7190 7191 7192 7193 7194 7195 7196 7197 7198 7199 7200 7201 7202 7203 7204 7205 7206 7207 7208 7209 7210 7211 7212 7213 7214 7215 7216 7217 7218 7219 7220 7221 7222 /* * Generic process-grouping system. * * Based originally on the cpuset system, extracted by Paul Menage * Copyright (C) 2006 Google, Inc * * Notifications support * Copyright (C) 2009 Nokia Corporation * Author: Kirill A. Shutemov * * Copyright notices from the original cpuset code: * -------------------------------------------------- * Copyright (C) 2003 BULL SA. * Copyright (C) 2004-2006 Silicon Graphics, Inc. * * Portions derived from Patrick Mochel's sysfs code. * sysfs is Copyright (c) 2001-3 Patrick Mochel * * 2003-10-10 Written by Simon Derr. * 2003-10-22 Updates by Stephen Hemminger. * 2004 May-July Rework by Paul Jackson. * --------------------------------------------------- * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of the Linux * distribution for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "cgroup-internal.h" #include <linux/bpf-cgroup.h> #include <linux/cred.h> #include <linux/errno.h> #include <linux/init_task.h> #include <linux/kernel.h> #include <linux/magic.h> #include <linux/mutex.h> #include <linux/mount.h> #include <linux/pagemap.h> #include <linux/proc_fs.h> #include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/percpu-rwsem.h> #include <linux/string.h> #include <linux/hashtable.h> #include <linux/idr.h> #include <linux/kthread.h> #include <linux/atomic.h> #include <linux/cpuset.h> #include <linux/proc_ns.h> #include <linux/nsproxy.h> #include <linux/file.h> #include <linux/fs_parser.h> #include <linux/sched/cputime.h> #include <linux/sched/deadline.h> #include <linux/psi.h> #include <net/sock.h> #define CREATE_TRACE_POINTS #include <trace/events/cgroup.h> #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \ MAX_CFTYPE_NAME + 2) /* let's not notify more than 100 times per second */ #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100) /* * To avoid confusing the compiler (and generating warnings) with code * that attempts to access what would be a 0-element array (i.e. sized * to a potentially empty array when CGROUP_SUBSYS_COUNT == 0), this * constant expression can be added. */ #define CGROUP_HAS_SUBSYS_CONFIG (CGROUP_SUBSYS_COUNT > 0) /* * cgroup_mutex is the master lock. Any modification to cgroup or its * hierarchy must be performed while holding it. * * css_set_lock protects task->cgroups pointer, the list of css_set * objects, and the chain of tasks off each css_set. * * These locks are exported if CONFIG_PROVE_RCU so that accessors in * cgroup.h can use them for lockdep annotations. */ DEFINE_MUTEX(cgroup_mutex); DEFINE_SPINLOCK(css_set_lock); #if (defined CONFIG_PROVE_RCU || defined CONFIG_LOCKDEP) EXPORT_SYMBOL_GPL(cgroup_mutex); EXPORT_SYMBOL_GPL(css_set_lock); #endif DEFINE_SPINLOCK(trace_cgroup_path_lock); char trace_cgroup_path[TRACE_CGROUP_PATH_LEN]; static bool cgroup_debug __read_mostly; /* * Protects cgroup_idr and css_idr so that IDs can be released without * grabbing cgroup_mutex. */ static DEFINE_SPINLOCK(cgroup_idr_lock); /* * Protects cgroup_file->kn for !self csses. It synchronizes notifications * against file removal/re-creation across css hiding. */ static DEFINE_SPINLOCK(cgroup_file_kn_lock); DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem); #define cgroup_assert_mutex_or_rcu_locked() \ RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \ !lockdep_is_held(&cgroup_mutex), \ "cgroup_mutex or RCU read lock required"); /* * cgroup destruction makes heavy use of work items and there can be a lot * of concurrent destructions. Use a separate workqueue so that cgroup * destruction work items don't end up filling up max_active of system_wq * which may lead to deadlock. */ static struct workqueue_struct *cgroup_destroy_wq; /* generate an array of cgroup subsystem pointers */ #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys, struct cgroup_subsys *cgroup_subsys[] = { #include <linux/cgroup_subsys.h> }; #undef SUBSYS /* array of cgroup subsystem names */ #define SUBSYS(_x) [_x ## _cgrp_id] = #_x, static const char *cgroup_subsys_name[] = { #include <linux/cgroup_subsys.h> }; #undef SUBSYS /* array of static_keys for cgroup_subsys_enabled() and cgroup_subsys_on_dfl() */ #define SUBSYS(_x) \ DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \ DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \ EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \ EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key); #include <linux/cgroup_subsys.h> #undef SUBSYS #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key, static struct static_key_true *cgroup_subsys_enabled_key[] = { #include <linux/cgroup_subsys.h> }; #undef SUBSYS #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key, static struct static_key_true *cgroup_subsys_on_dfl_key[] = { #include <linux/cgroup_subsys.h> }; #undef SUBSYS static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu); /* the default hierarchy */ struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu }; EXPORT_SYMBOL_GPL(cgrp_dfl_root); /* * The default hierarchy always exists but is hidden until mounted for the * first time. This is for backward compatibility. */ bool cgrp_dfl_visible; /* some controllers are not supported in the default hierarchy */ static u16 cgrp_dfl_inhibit_ss_mask; /* some controllers are implicitly enabled on the default hierarchy */ static u16 cgrp_dfl_implicit_ss_mask; /* some controllers can be threaded on the default hierarchy */ static u16 cgrp_dfl_threaded_ss_mask; /* The list of hierarchy roots */ LIST_HEAD(cgroup_roots); static int cgroup_root_count; /* hierarchy ID allocation and mapping, protected by cgroup_mutex */ static DEFINE_IDR(cgroup_hierarchy_idr); /* * Assign a monotonically increasing serial number to csses. It guarantees * cgroups with bigger numbers are newer than those with smaller numbers. * Also, as csses are always appended to the parent's ->children list, it * guarantees that sibling csses are always sorted in the ascending serial * number order on the list. Protected by cgroup_mutex. */ static u64 css_serial_nr_next = 1; /* * These bitmasks identify subsystems with specific features to avoid * having to do iterative checks repeatedly. */ static u16 have_fork_callback __read_mostly; static u16 have_exit_callback __read_mostly; static u16 have_release_callback __read_mostly; static u16 have_canfork_callback __read_mostly; static bool have_favordynmods __ro_after_init = IS_ENABLED(CONFIG_CGROUP_FAVOR_DYNMODS); /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { .ns.count = REFCOUNT_INIT(2), .user_ns = &init_user_ns, .ns.ops = &cgroupns_operations, .ns.inum = PROC_CGROUP_INIT_INO, .root_cset = &init_css_set, }; static struct file_system_type cgroup2_fs_type; static struct cftype cgroup_base_files[]; static struct cftype cgroup_psi_files[]; /* cgroup optional features */ enum cgroup_opt_features { #ifdef CONFIG_PSI OPT_FEATURE_PRESSURE, #endif OPT_FEATURE_COUNT }; static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = { #ifdef CONFIG_PSI "pressure", #endif }; static u16 cgroup_feature_disable_mask __read_mostly; static int cgroup_apply_control(struct cgroup *cgrp); static void cgroup_finalize_control(struct cgroup *cgrp, int ret); static void css_task_iter_skip(struct css_task_iter *it, struct task_struct *task); static int cgroup_destroy_locked(struct cgroup *cgrp); static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, struct cgroup_subsys *ss); static void css_release(struct percpu_ref *ref); static void kill_css(struct cgroup_subsys_state *css); static int cgroup_addrm_files(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype cfts[], bool is_add); #ifdef CONFIG_DEBUG_CGROUP_REF #define CGROUP_REF_FN_ATTRS noinline #define CGROUP_REF_EXPORT(fn) EXPORT_SYMBOL_GPL(fn); #include <linux/cgroup_refcnt.h> #endif /** * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID * @ssid: subsys ID of interest * * cgroup_subsys_enabled() can only be used with literal subsys names which * is fine for individual subsystems but unsuitable for cgroup core. This * is slower static_key_enabled() based test indexed by @ssid. */ bool cgroup_ssid_enabled(int ssid) { if (!CGROUP_HAS_SUBSYS_CONFIG) return false; return static_key_enabled(cgroup_subsys_enabled_key[ssid]); } /** * cgroup_on_dfl - test whether a cgroup is on the default hierarchy * @cgrp: the cgroup of interest * * The default hierarchy is the v2 interface of cgroup and this function * can be used to test whether a cgroup is on the default hierarchy for * cases where a subsystem should behave differently depending on the * interface version. * * List of changed behaviors: * * - Mount options "noprefix", "xattr", "clone_children", "release_agent" * and "name" are disallowed. * * - When mounting an existing superblock, mount options should match. * * - rename(2) is disallowed. * * - "tasks" is removed. Everything should be at process granularity. Use * "cgroup.procs" instead. * * - "cgroup.procs" is not sorted. pids will be unique unless they got * recycled in-between reads. * * - "release_agent" and "notify_on_release" are removed. Replacement * notification mechanism will be implemented. * * - "cgroup.clone_children" is removed. * * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup * and its descendants contain no task; otherwise, 1. The file also * generates kernfs notification which can be monitored through poll and * [di]notify when the value of the file changes. * * - cpuset: tasks will be kept in empty cpusets when hotplug happens and * take masks of ancestors with non-empty cpus/mems, instead of being * moved to an ancestor. * * - cpuset: a task can be moved into an empty cpuset, and again it takes * masks of ancestors. * * - blkcg: blk-throttle becomes properly hierarchical. */ bool cgroup_on_dfl(const struct cgroup *cgrp) { return cgrp->root == &cgrp_dfl_root; } /* IDR wrappers which synchronize using cgroup_idr_lock */ static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask) { int ret; idr_preload(gfp_mask); spin_lock_bh(&cgroup_idr_lock); ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM); spin_unlock_bh(&cgroup_idr_lock); idr_preload_end(); return ret; } static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id) { void *ret; spin_lock_bh(&cgroup_idr_lock); ret = idr_replace(idr, ptr, id); spin_unlock_bh(&cgroup_idr_lock); return ret; } static void cgroup_idr_remove(struct idr *idr, int id) { spin_lock_bh(&cgroup_idr_lock); idr_remove(idr, id); spin_unlock_bh(&cgroup_idr_lock); } static bool cgroup_has_tasks(struct cgroup *cgrp) { return cgrp->nr_populated_csets; } static bool cgroup_is_threaded(struct cgroup *cgrp) { return cgrp->dom_cgrp != cgrp; } /* can @cgrp host both domain and threaded children? */ static bool cgroup_is_mixable(struct cgroup *cgrp) { /* * Root isn't under domain level resource control exempting it from * the no-internal-process constraint, so it can serve as a thread * root and a parent of resource domains at the same time. */ return !cgroup_parent(cgrp); } /* can @cgrp become a thread root? Should always be true for a thread root */ static bool cgroup_can_be_thread_root(struct cgroup *cgrp) { /* mixables don't care */ if (cgroup_is_mixable(cgrp)) return true; /* domain roots can't be nested under threaded */ if (cgroup_is_threaded(cgrp)) return false; /* can only have either domain or threaded children */ if (cgrp->nr_populated_domain_children) return false; /* and no domain controllers can be enabled */ if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask) return false; return true; } /* is @cgrp root of a threaded subtree? */ static bool cgroup_is_thread_root(struct cgroup *cgrp) { /* thread root should be a domain */ if (cgroup_is_threaded(cgrp)) return false; /* a domain w/ threaded children is a thread root */ if (cgrp->nr_threaded_children) return true; /* * A domain which has tasks and explicit threaded controllers * enabled is a thread root. */ if (cgroup_has_tasks(cgrp) && (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask)) return true; return false; } /* a domain which isn't connected to the root w/o brekage can't be used */ static bool cgroup_is_valid_domain(struct cgroup *cgrp) { /* the cgroup itself can be a thread root */ if (cgroup_is_threaded(cgrp)) return false; /* but the ancestors can't be unless mixable */ while ((cgrp = cgroup_parent(cgrp))) { if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp)) return false; if (cgroup_is_threaded(cgrp)) return false; } return true; } /* subsystems visibly enabled on a cgroup */ static u16 cgroup_control(struct cgroup *cgrp) { struct cgroup *parent = cgroup_parent(cgrp); u16 root_ss_mask = cgrp->root->subsys_mask; if (parent) { u16 ss_mask = parent->subtree_control; /* threaded cgroups can only have threaded controllers */ if (cgroup_is_threaded(cgrp)) ss_mask &= cgrp_dfl_threaded_ss_mask; return ss_mask; } if (cgroup_on_dfl(cgrp)) root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask | cgrp_dfl_implicit_ss_mask); return root_ss_mask; } /* subsystems enabled on a cgroup */ static u16 cgroup_ss_mask(struct cgroup *cgrp) { struct cgroup *parent = cgroup_parent(cgrp); if (parent) { u16 ss_mask = parent->subtree_ss_mask; /* threaded cgroups can only have threaded controllers */ if (cgroup_is_threaded(cgrp)) ss_mask &= cgrp_dfl_threaded_ss_mask; return ss_mask; } return cgrp->root->subsys_mask; } /** * cgroup_css - obtain a cgroup's css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This * function must be called either under cgroup_mutex or rcu_read_lock() and * the caller is responsible for pinning the returned css if it wants to * keep accessing it outside the said locks. This function may return * %NULL if @cgrp doesn't have @subsys_id enabled. */ static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp, struct cgroup_subsys *ss) { if (CGROUP_HAS_SUBSYS_CONFIG && ss) return rcu_dereference_check(cgrp->subsys[ss->id], lockdep_is_held(&cgroup_mutex)); else return &cgrp->self; } /** * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss * @cgrp: the cgroup of interest * @ss: the subsystem of interest (%NULL returns @cgrp->self) * * Similar to cgroup_css() but returns the effective css, which is defined * as the matching css of the nearest ancestor including self which has @ss * enabled. If @ss is associated with the hierarchy @cgrp is on, this * function is guaranteed to return non-NULL css. */ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, struct cgroup_subsys *ss) { lockdep_assert_held(&cgroup_mutex); if (!ss) return &cgrp->self; /* * This function is used while updating css associations and thus * can't test the csses directly. Test ss_mask. */ while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) { cgrp = cgroup_parent(cgrp); if (!cgrp) return NULL; } return cgroup_css(cgrp, ss); } /** * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest * * Find and get the effective css of @cgrp for @ss. The effective css is * defined as the matching css of the nearest ancestor including self which * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, * the root css is returned, so this function always returns a valid css. * * The returned css is not guaranteed to be online, and therefore it is the * callers responsibility to try get a reference for it. */ struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; if (!CGROUP_HAS_SUBSYS_CONFIG) return NULL; do { css = cgroup_css(cgrp, ss); if (css) return css; cgrp = cgroup_parent(cgrp); } while (cgrp); return init_css_set.subsys[ss->id]; } /** * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest * * Find and get the effective css of @cgrp for @ss. The effective css is * defined as the matching css of the nearest ancestor including self which * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, * the root css is returned, so this function always returns a valid css. * The returned css must be put using css_put(). */ struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp, struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; if (!CGROUP_HAS_SUBSYS_CONFIG) return NULL; rcu_read_lock(); do { css = cgroup_css(cgrp, ss); if (css && css_tryget_online(css)) goto out_unlock; cgrp = cgroup_parent(cgrp); } while (cgrp); css = init_css_set.subsys[ss->id]; css_get(css); out_unlock: rcu_read_unlock(); return css; } EXPORT_SYMBOL_GPL(cgroup_get_e_css); static void cgroup_get_live(struct cgroup *cgrp) { WARN_ON_ONCE(cgroup_is_dead(cgrp)); cgroup_get(cgrp); } /** * __cgroup_task_count - count the number of tasks in a cgroup. The caller * is responsible for taking the css_set_lock. * @cgrp: the cgroup in question */ int __cgroup_task_count(const struct cgroup *cgrp) { int count = 0; struct cgrp_cset_link *link; lockdep_assert_held(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) count += link->cset->nr_tasks; return count; } /** * cgroup_task_count - count the number of tasks in a cgroup. * @cgrp: the cgroup in question */ int cgroup_task_count(const struct cgroup *cgrp) { int count; spin_lock_irq(&css_set_lock); count = __cgroup_task_count(cgrp); spin_unlock_irq(&css_set_lock); return count; } static struct cgroup *kn_priv(struct kernfs_node *kn) { struct kernfs_node *parent; /* * The parent can not be replaced due to KERNFS_ROOT_INVARIANT_PARENT. * Therefore it is always safe to dereference this pointer outside of a * RCU section. */ parent = rcu_dereference_check(kn->__parent, kernfs_root_flags(kn) & KERNFS_ROOT_INVARIANT_PARENT); return parent->priv; } struct cgroup_subsys_state *of_css(struct kernfs_open_file *of) { struct cgroup *cgrp = kn_priv(of->kn); struct cftype *cft = of_cft(of); /* * This is open and unprotected implementation of cgroup_css(). * seq_css() is only called from a kernfs file operation which has * an active reference on the file. Because all the subsystem * files are drained before a css is disassociated with a cgroup, * the matching css from the cgroup's subsys table is guaranteed to * be and stay valid until the enclosing operation is complete. */ if (CGROUP_HAS_SUBSYS_CONFIG && cft->ss) return rcu_dereference_raw(cgrp->subsys[cft->ss->id]); else return &cgrp->self; } EXPORT_SYMBOL_GPL(of_css); /** * for_each_css - iterate all css's of a cgroup * @css: the iteration cursor * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end * @cgrp: the target cgroup to iterate css's of * * Should be called under cgroup_mutex. */ #define for_each_css(css, ssid, cgrp) \ for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ if (!((css) = rcu_dereference_check( \ (cgrp)->subsys[(ssid)], \ lockdep_is_held(&cgroup_mutex)))) { } \ else /** * do_each_subsys_mask - filter for_each_subsys with a bitmask * @ss: the iteration cursor * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end * @ss_mask: the bitmask * * The block will only run for cases where the ssid-th bit (1 << ssid) of * @ss_mask is set. */ #define do_each_subsys_mask(ss, ssid, ss_mask) do { \ unsigned long __ss_mask = (ss_mask); \ if (!CGROUP_HAS_SUBSYS_CONFIG) { \ (ssid) = 0; \ break; \ } \ for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \ (ss) = cgroup_subsys[ssid]; \ { #define while_each_subsys_mask() \ } \ } \ } while (false) /* iterate over child cgrps, lock should be held throughout iteration */ #define cgroup_for_each_live_child(child, cgrp) \ list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \ if (({ lockdep_assert_held(&cgroup_mutex); \ cgroup_is_dead(child); })) \ ; \ else /* walk live descendants in pre order */ #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \ css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \ if (({ lockdep_assert_held(&cgroup_mutex); \ (dsct) = (d_css)->cgroup; \ cgroup_is_dead(dsct); })) \ ; \ else /* walk live descendants in postorder */ #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \ css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \ if (({ lockdep_assert_held(&cgroup_mutex); \ (dsct) = (d_css)->cgroup; \ cgroup_is_dead(dsct); })) \ ; \ else /* * The default css_set - used by init and its children prior to any * hierarchies being mounted. It contains a pointer to the root state * for each subsystem. Also used to anchor the list of css_sets. Not * reference-counted, to improve performance when child cgroups * haven't been created. */ struct css_set init_css_set = { .refcount = REFCOUNT_INIT(1), .dom_cset = &init_css_set, .tasks = LIST_HEAD_INIT(init_css_set.tasks), .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks), .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks), .task_iters = LIST_HEAD_INIT(init_css_set.task_iters), .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets), .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links), .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node), .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node), .mg_node = LIST_HEAD_INIT(init_css_set.mg_node), /* * The following field is re-initialized when this cset gets linked * in cgroup_init(). However, let's initialize the field * statically too so that the default cgroup can be accessed safely * early during boot. */ .dfl_cgrp = &cgrp_dfl_root.cgrp, }; static int css_set_count = 1; /* 1 for init_css_set */ static bool css_set_threaded(struct css_set *cset) { return cset->dom_cset != cset; } /** * css_set_populated - does a css_set contain any tasks? * @cset: target css_set * * css_set_populated() should be the same as !!cset->nr_tasks at steady * state. However, css_set_populated() can be called while a task is being * added to or removed from the linked list before the nr_tasks is * properly updated. Hence, we can't just look at ->nr_tasks here. */ static bool css_set_populated(struct css_set *cset) { lockdep_assert_held(&css_set_lock); return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks); } /** * cgroup_update_populated - update the populated count of a cgroup * @cgrp: the target cgroup * @populated: inc or dec populated count * * One of the css_sets associated with @cgrp is either getting its first * task or losing the last. Update @cgrp->nr_populated_* accordingly. The * count is propagated towards root so that a given cgroup's * nr_populated_children is zero iff none of its descendants contain any * tasks. * * @cgrp's interface file "cgroup.populated" is zero if both * @cgrp->nr_populated_csets and @cgrp->nr_populated_children are zero and * 1 otherwise. When the sum changes from or to zero, userland is notified * that the content of the interface file has changed. This can be used to * detect when @cgrp and its descendants become populated or empty. */ static void cgroup_update_populated(struct cgroup *cgrp, bool populated) { struct cgroup *child = NULL; int adj = populated ? 1 : -1; lockdep_assert_held(&css_set_lock); do { bool was_populated = cgroup_is_populated(cgrp); if (!child) { cgrp->nr_populated_csets += adj; } else { if (cgroup_is_threaded(child)) cgrp->nr_populated_threaded_children += adj; else cgrp->nr_populated_domain_children += adj; } if (was_populated == cgroup_is_populated(cgrp)) break; cgroup1_check_for_release(cgrp); TRACE_CGROUP_PATH(notify_populated, cgrp, cgroup_is_populated(cgrp)); cgroup_file_notify(&cgrp->events_file); child = cgrp; cgrp = cgroup_parent(cgrp); } while (cgrp); } /** * css_set_update_populated - update populated state of a css_set * @cset: target css_set * @populated: whether @cset is populated or depopulated * * @cset is either getting the first task or losing the last. Update the * populated counters of all associated cgroups accordingly. */ static void css_set_update_populated(struct css_set *cset, bool populated) { struct cgrp_cset_link *link; lockdep_assert_held(&css_set_lock); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) cgroup_update_populated(link->cgrp, populated); } /* * @task is leaving, advance task iterators which are pointing to it so * that they can resume at the next position. Advancing an iterator might * remove it from the list, use safe walk. See css_task_iter_skip() for * details. */ static void css_set_skip_task_iters(struct css_set *cset, struct task_struct *task) { struct css_task_iter *it, *pos; list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node) css_task_iter_skip(it, task); } /** * css_set_move_task - move a task from one css_set to another * @task: task being moved * @from_cset: css_set @task currently belongs to (may be NULL) * @to_cset: new css_set @task is being moved to (may be NULL) * @use_mg_tasks: move to @to_cset->mg_tasks instead of ->tasks * * Move @task from @from_cset to @to_cset. If @task didn't belong to any * css_set, @from_cset can be NULL. If @task is being disassociated * instead of moved, @to_cset can be NULL. * * This function automatically handles populated counter updates and * css_task_iter adjustments but the caller is responsible for managing * @from_cset and @to_cset's reference counts. */ static void css_set_move_task(struct task_struct *task, struct css_set *from_cset, struct css_set *to_cset, bool use_mg_tasks) { lockdep_assert_held(&css_set_lock); if (to_cset && !css_set_populated(to_cset)) css_set_update_populated(to_cset, true); if (from_cset) { WARN_ON_ONCE(list_empty(&task->cg_list)); css_set_skip_task_iters(from_cset, task); list_del_init(&task->cg_list); if (!css_set_populated(from_cset)) css_set_update_populated(from_cset, false); } else { WARN_ON_ONCE(!list_empty(&task->cg_list)); } if (to_cset) { /* * We are synchronized through cgroup_threadgroup_rwsem * against PF_EXITING setting such that we can't race * against cgroup_exit()/cgroup_free() dropping the css_set. */ WARN_ON_ONCE(task->flags & PF_EXITING); cgroup_move_task(task, to_cset); list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks : &to_cset->tasks); } } /* * hash table for cgroup groups. This improves the performance to find * an existing css_set. This hash doesn't (currently) take into * account cgroups in empty hierarchies. */ #define CSS_SET_HASH_BITS 7 static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); static unsigned long css_set_hash(struct cgroup_subsys_state **css) { unsigned long key = 0UL; struct cgroup_subsys *ss; int i; for_each_subsys(ss, i) key += (unsigned long)css[i]; key = (key >> 16) ^ key; return key; } void put_css_set_locked(struct css_set *cset) { struct cgrp_cset_link *link, *tmp_link; struct cgroup_subsys *ss; int ssid; lockdep_assert_held(&css_set_lock); if (!refcount_dec_and_test(&cset->refcount)) return; WARN_ON_ONCE(!list_empty(&cset->threaded_csets)); /* This css_set is dead. Unlink it and release cgroup and css refs */ for_each_subsys(ss, ssid) { list_del(&cset->e_cset_node[ssid]); css_put(cset->subsys[ssid]); } hash_del(&cset->hlist); css_set_count--; list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) { list_del(&link->cset_link); list_del(&link->cgrp_link); if (cgroup_parent(link->cgrp)) cgroup_put(link->cgrp); kfree(link); } if (css_set_threaded(cset)) { list_del(&cset->threaded_csets_node); put_css_set_locked(cset->dom_cset); } kfree_rcu(cset, rcu_head); } /** * compare_css_sets - helper function for find_existing_css_set(). * @cset: candidate css_set being tested * @old_cset: existing css_set for a task * @new_cgrp: cgroup that's being entered by the task * @template: desired set of css pointers in css_set (pre-calculated) * * Returns true if "cset" matches "old_cset" except for the hierarchy * which "new_cgrp" belongs to, for which it should match "new_cgrp". */ static bool compare_css_sets(struct css_set *cset, struct css_set *old_cset, struct cgroup *new_cgrp, struct cgroup_subsys_state *template[]) { struct cgroup *new_dfl_cgrp; struct list_head *l1, *l2; /* * On the default hierarchy, there can be csets which are * associated with the same set of cgroups but different csses. * Let's first ensure that csses match. */ if (memcmp(template, cset->subsys, sizeof(cset->subsys))) return false; /* @cset's domain should match the default cgroup's */ if (cgroup_on_dfl(new_cgrp)) new_dfl_cgrp = new_cgrp; else new_dfl_cgrp = old_cset->dfl_cgrp; if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp) return false; /* * Compare cgroup pointers in order to distinguish between * different cgroups in hierarchies. As different cgroups may * share the same effective css, this comparison is always * necessary. */ l1 = &cset->cgrp_links; l2 = &old_cset->cgrp_links; while (1) { struct cgrp_cset_link *link1, *link2; struct cgroup *cgrp1, *cgrp2; l1 = l1->next; l2 = l2->next; /* See if we reached the end - both lists are equal length. */ if (l1 == &cset->cgrp_links) { BUG_ON(l2 != &old_cset->cgrp_links); break; } else { BUG_ON(l2 == &old_cset->cgrp_links); } /* Locate the cgroups associated with these links. */ link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link); link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link); cgrp1 = link1->cgrp; cgrp2 = link2->cgrp; /* Hierarchies should be linked in the same order. */ BUG_ON(cgrp1->root != cgrp2->root); /* * If this hierarchy is the hierarchy of the cgroup * that's changing, then we need to check that this * css_set points to the new cgroup; if it's any other * hierarchy, then this css_set should point to the * same cgroup as the old css_set. */ if (cgrp1->root == new_cgrp->root) { if (cgrp1 != new_cgrp) return false; } else { if (cgrp1 != cgrp2) return false; } } return true; } /** * find_existing_css_set - init css array and find the matching css_set * @old_cset: the css_set that we're using before the cgroup transition * @cgrp: the cgroup that we're moving into * @template: out param for the new set of csses, should be clear on entry */ static struct css_set *find_existing_css_set(struct css_set *old_cset, struct cgroup *cgrp, struct cgroup_subsys_state **template) { struct cgroup_root *root = cgrp->root; struct cgroup_subsys *ss; struct css_set *cset; unsigned long key; int i; /* * Build the set of subsystem state objects that we want to see in the * new css_set. While subsystems can change globally, the entries here * won't change, so no need for locking. */ for_each_subsys(ss, i) { if (root->subsys_mask & (1UL << i)) { /* * @ss is in this hierarchy, so we want the * effective css from @cgrp. */ template[i] = cgroup_e_css_by_mask(cgrp, ss); } else { /* * @ss is not in this hierarchy, so we don't want * to change the css. */ template[i] = old_cset->subsys[i]; } } key = css_set_hash(template); hash_for_each_possible(css_set_table, cset, hlist, key) { if (!compare_css_sets(cset, old_cset, cgrp, template)) continue; /* This css_set matches what we need */ return cset; } /* No existing cgroup group matched */ return NULL; } static void free_cgrp_cset_links(struct list_head *links_to_free) { struct cgrp_cset_link *link, *tmp_link; list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) { list_del(&link->cset_link); kfree(link); } } /** * allocate_cgrp_cset_links - allocate cgrp_cset_links * @count: the number of links to allocate * @tmp_links: list_head the allocated links are put on * * Allocate @count cgrp_cset_link structures and chain them on @tmp_links * through ->cset_link. Returns 0 on success or -errno. */ static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links) { struct cgrp_cset_link *link; int i; INIT_LIST_HEAD(tmp_links); for (i = 0; i < count; i++) { link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) { free_cgrp_cset_links(tmp_links); return -ENOMEM; } list_add(&link->cset_link, tmp_links); } return 0; } /** * link_css_set - a helper function to link a css_set to a cgroup * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links() * @cset: the css_set to be linked * @cgrp: the destination cgroup */ static void link_css_set(struct list_head *tmp_links, struct css_set *cset, struct cgroup *cgrp) { struct cgrp_cset_link *link; BUG_ON(list_empty(tmp_links)); if (cgroup_on_dfl(cgrp)) cset->dfl_cgrp = cgrp; link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link); link->cset = cset; link->cgrp = cgrp; /* * Always add links to the tail of the lists so that the lists are * in chronological order. */ list_move_tail(&link->cset_link, &cgrp->cset_links); list_add_tail(&link->cgrp_link, &cset->cgrp_links); if (cgroup_parent(cgrp)) cgroup_get_live(cgrp); } /** * find_css_set - return a new css_set with one cgroup updated * @old_cset: the baseline css_set * @cgrp: the cgroup to be updated * * Return a new css_set that's equivalent to @old_cset, but with @cgrp * substituted into the appropriate hierarchy. */ static struct css_set *find_css_set(struct css_set *old_cset, struct cgroup *cgrp) { struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { }; struct css_set *cset; struct list_head tmp_links; struct cgrp_cset_link *link; struct cgroup_subsys *ss; unsigned long key; int ssid; lockdep_assert_held(&cgroup_mutex); /* First see if we already have a cgroup group that matches * the desired set */ spin_lock_irq(&css_set_lock); cset = find_existing_css_set(old_cset, cgrp, template); if (cset) get_css_set(cset); spin_unlock_irq(&css_set_lock); if (cset) return cset; cset = kzalloc(sizeof(*cset), GFP_KERNEL); if (!cset) return NULL; /* Allocate all the cgrp_cset_link objects that we'll need */ if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) { kfree(cset); return NULL; } refcount_set(&cset->refcount, 1); cset->dom_cset = cset; INIT_LIST_HEAD(&cset->tasks); INIT_LIST_HEAD(&cset->mg_tasks); INIT_LIST_HEAD(&cset->dying_tasks); INIT_LIST_HEAD(&cset->task_iters); INIT_LIST_HEAD(&cset->threaded_csets); INIT_HLIST_NODE(&cset->hlist); INIT_LIST_HEAD(&cset->cgrp_links); INIT_LIST_HEAD(&cset->mg_src_preload_node); INIT_LIST_HEAD(&cset->mg_dst_preload_node); INIT_LIST_HEAD(&cset->mg_node); /* Copy the set of subsystem state objects generated in * find_existing_css_set() */ memcpy(cset->subsys, template, sizeof(cset->subsys)); spin_lock_irq(&css_set_lock); /* Add reference counts and links from the new css_set. */ list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; if (c->root == cgrp->root) c = cgrp; link_css_set(&tmp_links, cset, c); } BUG_ON(!list_empty(&tmp_links)); css_set_count++; /* Add @cset to the hash table */ key = css_set_hash(cset->subsys); hash_add(css_set_table, &cset->hlist, key); for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cset->subsys[ssid]; list_add_tail(&cset->e_cset_node[ssid], &css->cgroup->e_csets[ssid]); css_get(css); } spin_unlock_irq(&css_set_lock); /* * If @cset should be threaded, look up the matching dom_cset and * link them up. We first fully initialize @cset then look for the * dom_cset. It's simpler this way and safe as @cset is guaranteed * to stay empty until we return. */ if (cgroup_is_threaded(cset->dfl_cgrp)) { struct css_set *dcset; dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp); if (!dcset) { put_css_set(cset); return NULL; } spin_lock_irq(&css_set_lock); cset->dom_cset = dcset; list_add_tail(&cset->threaded_csets_node, &dcset->threaded_csets); spin_unlock_irq(&css_set_lock); } return cset; } struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root) { struct cgroup *root_cgrp = kernfs_root_to_node(kf_root)->priv; return root_cgrp->root; } void cgroup_favor_dynmods(struct cgroup_root *root, bool favor) { bool favoring = root->flags & CGRP_ROOT_FAVOR_DYNMODS; /* see the comment above CGRP_ROOT_FAVOR_DYNMODS definition */ if (favor && !favoring) { rcu_sync_enter(&cgroup_threadgroup_rwsem.rss); root->flags |= CGRP_ROOT_FAVOR_DYNMODS; } else if (!favor && favoring) { rcu_sync_exit(&cgroup_threadgroup_rwsem.rss); root->flags &= ~CGRP_ROOT_FAVOR_DYNMODS; } } static int cgroup_init_root_id(struct cgroup_root *root) { int id; lockdep_assert_held(&cgroup_mutex); id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL); if (id < 0) return id; root->hierarchy_id = id; return 0; } static void cgroup_exit_root_id(struct cgroup_root *root) { lockdep_assert_held(&cgroup_mutex); idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id); } void cgroup_free_root(struct cgroup_root *root) { kfree_rcu(root, rcu); } static void cgroup_destroy_root(struct cgroup_root *root) { struct cgroup *cgrp = &root->cgrp; struct cgrp_cset_link *link, *tmp_link; trace_cgroup_destroy_root(root); cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp); BUG_ON(atomic_read(&root->nr_cgrps)); BUG_ON(!list_empty(&cgrp->self.children)); /* Rebind all subsystems back to the default hierarchy */ WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask)); /* * Release all the links from cset_links to this hierarchy's * root cgroup */ spin_lock_irq(&css_set_lock); list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { list_del(&link->cset_link); list_del(&link->cgrp_link); kfree(link); } spin_unlock_irq(&css_set_lock); WARN_ON_ONCE(list_empty(&root->root_list)); list_del_rcu(&root->root_list); cgroup_root_count--; if (!have_favordynmods) cgroup_favor_dynmods(root, false); cgroup_exit_root_id(root); cgroup_unlock(); cgroup_rstat_exit(cgrp); kernfs_destroy_root(root->kf_root); cgroup_free_root(root); } /* * Returned cgroup is without refcount but it's valid as long as cset pins it. */ static inline struct cgroup *__cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { struct cgroup *res_cgroup = NULL; if (cset == &init_css_set) { res_cgroup = &root->cgrp; } else if (root == &cgrp_dfl_root) { res_cgroup = cset->dfl_cgrp; } else { struct cgrp_cset_link *link; lockdep_assert_held(&css_set_lock); list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { struct cgroup *c = link->cgrp; if (c->root == root) { res_cgroup = c; break; } } } /* * If cgroup_mutex is not held, the cgrp_cset_link will be freed * before we remove the cgroup root from the root_list. Consequently, * when accessing a cgroup root, the cset_link may have already been * freed, resulting in a NULL res_cgroup. However, by holding the * cgroup_mutex, we ensure that res_cgroup can't be NULL. * If we don't hold cgroup_mutex in the caller, we must do the NULL * check. */ return res_cgroup; } /* * look up cgroup associated with current task's cgroup namespace on the * specified hierarchy */ static struct cgroup * current_cgns_cgroup_from_root(struct cgroup_root *root) { struct cgroup *res = NULL; struct css_set *cset; lockdep_assert_held(&css_set_lock); rcu_read_lock(); cset = current->nsproxy->cgroup_ns->root_cset; res = __cset_cgroup_from_root(cset, root); rcu_read_unlock(); /* * The namespace_sem is held by current, so the root cgroup can't * be umounted. Therefore, we can ensure that the res is non-NULL. */ WARN_ON_ONCE(!res); return res; } /* * Look up cgroup associated with current task's cgroup namespace on the default * hierarchy. * * Unlike current_cgns_cgroup_from_root(), this doesn't need locks: * - Internal rcu_read_lock is unnecessary because we don't dereference any rcu * pointers. * - css_set_lock is not needed because we just read cset->dfl_cgrp. * - As a bonus returned cgrp is pinned with the current because it cannot * switch cgroup_ns asynchronously. */ static struct cgroup *current_cgns_cgroup_dfl(void) { struct css_set *cset; if (current->nsproxy) { cset = current->nsproxy->cgroup_ns->root_cset; return __cset_cgroup_from_root(cset, &cgrp_dfl_root); } else { /* * NOTE: This function may be called from bpf_cgroup_from_id() * on a task which has already passed exit_task_namespaces() and * nsproxy == NULL. Fall back to cgrp_dfl_root which will make all * cgroups visible for lookups. */ return &cgrp_dfl_root.cgrp; } } /* look up cgroup associated with given css_set on the specified hierarchy */ static struct cgroup *cset_cgroup_from_root(struct css_set *cset, struct cgroup_root *root) { lockdep_assert_held(&css_set_lock); return __cset_cgroup_from_root(cset, root); } /* * Return the cgroup for "task" from the given hierarchy. Must be * called with css_set_lock held to prevent task's groups from being modified. * Must be called with either cgroup_mutex or rcu read lock to prevent the * cgroup root from being destroyed. */ struct cgroup *task_cgroup_from_root(struct task_struct *task, struct cgroup_root *root) { /* * No need to lock the task - since we hold css_set_lock the * task can't change groups. */ return cset_cgroup_from_root(task_css_set(task), root); } /* * A task must hold cgroup_mutex to modify cgroups. * * Any task can increment and decrement the count field without lock. * So in general, code holding cgroup_mutex can't rely on the count * field not changing. However, if the count goes to zero, then only * cgroup_attach_task() can increment it again. Because a count of zero * means that no tasks are currently attached, therefore there is no * way a task attached to that cgroup can fork (the other way to * increment the count). So code holding cgroup_mutex can safely * assume that if the count is zero, it will stay zero. Similarly, if * a task holds cgroup_mutex on a cgroup with zero count, it * knows that the cgroup won't be removed, as cgroup_rmdir() * needs that mutex. * * A cgroup can only be deleted if both its 'count' of using tasks * is zero, and its list of 'children' cgroups is empty. Since all * tasks in the system use _some_ cgroup, and since there is always at * least one task in the system (init, pid == 1), therefore, root cgroup * always has either children cgroups and/or using tasks. So we don't * need a special hack to ensure that root cgroup cannot be deleted. * * P.S. One more locking exception. RCU is used to guard the * update of a tasks cgroup pointer by cgroup_attach_task() */ static struct kernfs_syscall_ops cgroup_kf_syscall_ops; static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft, char *buf) { struct cgroup_subsys *ss = cft->ss; if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) && !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) { const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : ""; snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s", dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name, cft->name); } else { strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX); } return buf; } /** * cgroup_file_mode - deduce file mode of a control file * @cft: the control file in question * * S_IRUGO for read, S_IWUSR for write. */ static umode_t cgroup_file_mode(const struct cftype *cft) { umode_t mode = 0; if (cft->read_u64 || cft->read_s64 || cft->seq_show) mode |= S_IRUGO; if (cft->write_u64 || cft->write_s64 || cft->write) { if (cft->flags & CFTYPE_WORLD_WRITABLE) mode |= S_IWUGO; else mode |= S_IWUSR; } return mode; } /** * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask * @subtree_control: the new subtree_control mask to consider * @this_ss_mask: available subsystems * * On the default hierarchy, a subsystem may request other subsystems to be * enabled together through its ->depends_on mask. In such cases, more * subsystems than specified in "cgroup.subtree_control" may be enabled. * * This function calculates which subsystems need to be enabled if * @subtree_control is to be applied while restricted to @this_ss_mask. */ static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask) { u16 cur_ss_mask = subtree_control; struct cgroup_subsys *ss; int ssid; lockdep_assert_held(&cgroup_mutex); cur_ss_mask |= cgrp_dfl_implicit_ss_mask; while (true) { u16 new_ss_mask = cur_ss_mask; do_each_subsys_mask(ss, ssid, cur_ss_mask) { new_ss_mask |= ss->depends_on; } while_each_subsys_mask(); /* * Mask out subsystems which aren't available. This can * happen only if some depended-upon subsystems were bound * to non-default hierarchies. */ new_ss_mask &= this_ss_mask; if (new_ss_mask == cur_ss_mask) break; cur_ss_mask = new_ss_mask; } return cur_ss_mask; } /** * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods * @kn: the kernfs_node being serviced * * This helper undoes cgroup_kn_lock_live() and should be invoked before * the method finishes if locking succeeded. Note that once this function * returns the cgroup returned by cgroup_kn_lock_live() may become * inaccessible any time. If the caller intends to continue to access the * cgroup, it should pin it before invoking this function. */ void cgroup_kn_unlock(struct kernfs_node *kn) { struct cgroup *cgrp; if (kernfs_type(kn) == KERNFS_DIR) cgrp = kn->priv; else cgrp = kn_priv(kn); cgroup_unlock(); kernfs_unbreak_active_protection(kn); cgroup_put(cgrp); } /** * cgroup_kn_lock_live - locking helper for cgroup kernfs methods * @kn: the kernfs_node being serviced * @drain_offline: perform offline draining on the cgroup * * This helper is to be used by a cgroup kernfs method currently servicing * @kn. It breaks the active protection, performs cgroup locking and * verifies that the associated cgroup is alive. Returns the cgroup if * alive; otherwise, %NULL. A successful return should be undone by a * matching cgroup_kn_unlock() invocation. If @drain_offline is %true, the * cgroup is drained of offlining csses before return. * * Any cgroup kernfs method implementation which requires locking the * associated cgroup should use this helper. It avoids nesting cgroup * locking under kernfs active protection and allows all kernfs operations * including self-removal. */ struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline) { struct cgroup *cgrp; if (kernfs_type(kn) == KERNFS_DIR) cgrp = kn->priv; else cgrp = kn_priv(kn); /* * We're gonna grab cgroup_mutex which nests outside kernfs * active_ref. cgroup liveliness check alone provides enough * protection against removal. Ensure @cgrp stays accessible and * break the active_ref protection. */ if (!cgroup_tryget(cgrp)) return NULL; kernfs_break_active_protection(kn); if (drain_offline) cgroup_lock_and_drain_offline(cgrp); else cgroup_lock(); if (!cgroup_is_dead(cgrp)) return cgrp; cgroup_kn_unlock(kn); return NULL; } static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft) { char name[CGROUP_FILE_NAME_MAX]; lockdep_assert_held(&cgroup_mutex); if (cft->file_offset) { struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss); struct cgroup_file *cfile = (void *)css + cft->file_offset; spin_lock_irq(&cgroup_file_kn_lock); cfile->kn = NULL; spin_unlock_irq(&cgroup_file_kn_lock); timer_delete_sync(&cfile->notify_timer); } kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name)); } /** * css_clear_dir - remove subsys files in a cgroup directory * @css: target css */ static void css_clear_dir(struct cgroup_subsys_state *css) { struct cgroup *cgrp = css->cgroup; struct cftype *cfts; if (!(css->flags & CSS_VISIBLE)) return; css->flags &= ~CSS_VISIBLE; if (!css->ss) { if (cgroup_on_dfl(cgrp)) { cgroup_addrm_files(css, cgrp, cgroup_base_files, false); if (cgroup_psi_enabled()) cgroup_addrm_files(css, cgrp, cgroup_psi_files, false); } else { cgroup_addrm_files(css, cgrp, cgroup1_base_files, false); } } else { list_for_each_entry(cfts, &css->ss->cfts, node) cgroup_addrm_files(css, cgrp, cfts, false); } } /** * css_populate_dir - create subsys files in a cgroup directory * @css: target css * * On failure, no file is added. */ static int css_populate_dir(struct cgroup_subsys_state *css) { struct cgroup *cgrp = css->cgroup; struct cftype *cfts, *failed_cfts; int ret; if (css->flags & CSS_VISIBLE) return 0; if (!css->ss) { if (cgroup_on_dfl(cgrp)) { ret = cgroup_addrm_files(css, cgrp, cgroup_base_files, true); if (ret < 0) return ret; if (cgroup_psi_enabled()) { ret = cgroup_addrm_files(css, cgrp, cgroup_psi_files, true); if (ret < 0) { cgroup_addrm_files(css, cgrp, cgroup_base_files, false); return ret; } } } else { ret = cgroup_addrm_files(css, cgrp, cgroup1_base_files, true); if (ret < 0) return ret; } } else { list_for_each_entry(cfts, &css->ss->cfts, node) { ret = cgroup_addrm_files(css, cgrp, cfts, true); if (ret < 0) { failed_cfts = cfts; goto err; } } } css->flags |= CSS_VISIBLE; return 0; err: list_for_each_entry(cfts, &css->ss->cfts, node) { if (cfts == failed_cfts) break; cgroup_addrm_files(css, cgrp, cfts, false); } return ret; } int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) { struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; int ssid, ret; u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); do_each_subsys_mask(ss, ssid, ss_mask) { /* * If @ss has non-root csses attached to it, can't move. * If @ss is an implicit controller, it is exempt from this * rule and can be stolen. */ if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) && !ss->implicit_on_dfl) return -EBUSY; /* can't move between two non-dummy roots either */ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) return -EBUSY; /* * Collect ssid's that need to be disabled from default * hierarchy. */ if (ss->root == &cgrp_dfl_root) dfl_disable_ss_mask |= 1 << ssid; } while_each_subsys_mask(); if (dfl_disable_ss_mask) { struct cgroup *scgrp = &cgrp_dfl_root.cgrp; /* * Controllers from default hierarchy that need to be rebound * are all disabled together in one go. */ cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; WARN_ON(cgroup_apply_control(scgrp)); cgroup_finalize_control(scgrp, 0); } do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; struct cgroup_subsys_state *css = cgroup_css(scgrp, ss); struct css_set *cset, *cset_pos; struct css_task_iter *it; WARN_ON(!css || cgroup_css(dcgrp, ss)); if (src_root != &cgrp_dfl_root) { /* disable from the source */ src_root->subsys_mask &= ~(1 << ssid); WARN_ON(cgroup_apply_control(scgrp)); cgroup_finalize_control(scgrp, 0); } /* rebind */ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); rcu_assign_pointer(dcgrp->subsys[ssid], css); ss->root = dst_root; spin_lock_irq(&css_set_lock); css->cgroup = dcgrp; WARN_ON(!list_empty(&dcgrp->e_csets[ss->id])); list_for_each_entry_safe(cset, cset_pos, &scgrp->e_csets[ss->id], e_cset_node[ss->id]) { list_move_tail(&cset->e_cset_node[ss->id], &dcgrp->e_csets[ss->id]); /* * all css_sets of scgrp together in same order to dcgrp, * patch in-flight iterators to preserve correct iteration. * since the iterator is always advanced right away and * finished when it->cset_pos meets it->cset_head, so only * update it->cset_head is enough here. */ list_for_each_entry(it, &cset->task_iters, iters_node) if (it->cset_head == &scgrp->e_csets[ss->id]) it->cset_head = &dcgrp->e_csets[ss->id]; } spin_unlock_irq(&css_set_lock); if (ss->css_rstat_flush) { list_del_rcu(&css->rstat_css_node); synchronize_rcu(); list_add_rcu(&css->rstat_css_node, &dcgrp->rstat_css_list); } /* default hierarchy doesn't enable controllers by default */ dst_root->subsys_mask |= 1 << ssid; if (dst_root == &cgrp_dfl_root) { static_branch_enable(cgroup_subsys_on_dfl_key[ssid]); } else { dcgrp->subtree_control |= 1 << ssid; static_branch_disable(cgroup_subsys_on_dfl_key[ssid]); } ret = cgroup_apply_control(dcgrp); if (ret) pr_warn("partial failure to rebind %s controller (err=%d)\n", ss->name, ret); if (ss->bind) ss->bind(css); } while_each_subsys_mask(); kernfs_activate(dcgrp->kn); return 0; } int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, struct kernfs_root *kf_root) { int len = 0; char *buf = NULL; struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root); struct cgroup *ns_cgroup; buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) return -ENOMEM; spin_lock_irq(&css_set_lock); ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot); len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX); spin_unlock_irq(&css_set_lock); if (len == -E2BIG) len = -ERANGE; else if (len > 0) { seq_escape(sf, buf, " \t\n\\"); len = 0; } kfree(buf); return len; } enum cgroup2_param { Opt_nsdelegate, Opt_favordynmods, Opt_memory_localevents, Opt_memory_recursiveprot, Opt_memory_hugetlb_accounting, Opt_pids_localevents, nr__cgroup2_params }; static const struct fs_parameter_spec cgroup2_fs_parameters[] = { fsparam_flag("nsdelegate", Opt_nsdelegate), fsparam_flag("favordynmods", Opt_favordynmods), fsparam_flag("memory_localevents", Opt_memory_localevents), fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot), fsparam_flag("memory_hugetlb_accounting", Opt_memory_hugetlb_accounting), fsparam_flag("pids_localevents", Opt_pids_localevents), {} }; static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct fs_parse_result result; int opt; opt = fs_parse(fc, cgroup2_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_nsdelegate: ctx->flags |= CGRP_ROOT_NS_DELEGATE; return 0; case Opt_favordynmods: ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; return 0; case Opt_memory_localevents: ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS; return 0; case Opt_memory_recursiveprot: ctx->flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT; return 0; case Opt_memory_hugetlb_accounting: ctx->flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING; return 0; case Opt_pids_localevents: ctx->flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS; return 0; } return -EINVAL; } struct cgroup_of_peak *of_peak(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; return &ctx->peak; } static void apply_cgroup_root_flags(unsigned int root_flags) { if (current->nsproxy->cgroup_ns == &init_cgroup_ns) { if (root_flags & CGRP_ROOT_NS_DELEGATE) cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE; else cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE; cgroup_favor_dynmods(&cgrp_dfl_root, root_flags & CGRP_ROOT_FAVOR_DYNMODS); if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS; else cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS; if (root_flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT; else cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_RECURSIVE_PROT; if (root_flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING; else cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING; if (root_flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) cgrp_dfl_root.flags |= CGRP_ROOT_PIDS_LOCAL_EVENTS; else cgrp_dfl_root.flags &= ~CGRP_ROOT_PIDS_LOCAL_EVENTS; } } static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root) { if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) seq_puts(seq, ",nsdelegate"); if (cgrp_dfl_root.flags & CGRP_ROOT_FAVOR_DYNMODS) seq_puts(seq, ",favordynmods"); if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) seq_puts(seq, ",memory_localevents"); if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT) seq_puts(seq, ",memory_recursiveprot"); if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING) seq_puts(seq, ",memory_hugetlb_accounting"); if (cgrp_dfl_root.flags & CGRP_ROOT_PIDS_LOCAL_EVENTS) seq_puts(seq, ",pids_localevents"); return 0; } static int cgroup_reconfigure(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); apply_cgroup_root_flags(ctx->flags); return 0; } static void init_cgroup_housekeeping(struct cgroup *cgrp) { struct cgroup_subsys *ss; int ssid; INIT_LIST_HEAD(&cgrp->self.sibling); INIT_LIST_HEAD(&cgrp->self.children); INIT_LIST_HEAD(&cgrp->cset_links); INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); cgrp->self.cgroup = cgrp; cgrp->self.flags |= CSS_ONLINE; cgrp->dom_cgrp = cgrp; cgrp->max_descendants = INT_MAX; cgrp->max_depth = INT_MAX; INIT_LIST_HEAD(&cgrp->rstat_css_list); prev_cputime_init(&cgrp->prev_cputime); for_each_subsys(ss, ssid) INIT_LIST_HEAD(&cgrp->e_csets[ssid]); init_waitqueue_head(&cgrp->offline_waitq); INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent); } void init_cgroup_root(struct cgroup_fs_context *ctx) { struct cgroup_root *root = ctx->root; struct cgroup *cgrp = &root->cgrp; INIT_LIST_HEAD_RCU(&root->root_list); atomic_set(&root->nr_cgrps, 1); cgrp->root = root; init_cgroup_housekeeping(cgrp); /* DYNMODS must be modified through cgroup_favor_dynmods() */ root->flags = ctx->flags & ~CGRP_ROOT_FAVOR_DYNMODS; if (ctx->release_agent) strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX); if (ctx->name) strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN); if (ctx->cpuset_clone_children) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags); } int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask) { LIST_HEAD(tmp_links); struct cgroup *root_cgrp = &root->cgrp; struct kernfs_syscall_ops *kf_sops; struct css_set *cset; int i, ret; lockdep_assert_held(&cgroup_mutex); ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0, GFP_KERNEL); if (ret) goto out; /* * We're accessing css_set_count without locking css_set_lock here, * but that's OK - it can only be increased by someone holding * cgroup_lock, and that's us. Later rebinding may disable * controllers on the default hierarchy and thus create new csets, * which can't be more than the existing ones. Allocate 2x. */ ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links); if (ret) goto cancel_ref; ret = cgroup_init_root_id(root); if (ret) goto cancel_ref; kf_sops = root == &cgrp_dfl_root ? &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops; root->kf_root = kernfs_create_root(kf_sops, KERNFS_ROOT_CREATE_DEACTIVATED | KERNFS_ROOT_SUPPORT_EXPORTOP | KERNFS_ROOT_SUPPORT_USER_XATTR | KERNFS_ROOT_INVARIANT_PARENT, root_cgrp); if (IS_ERR(root->kf_root)) { ret = PTR_ERR(root->kf_root); goto exit_root_id; } root_cgrp->kn = kernfs_root_to_node(root->kf_root); WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1); root_cgrp->ancestors[0] = root_cgrp; ret = css_populate_dir(&root_cgrp->self); if (ret) goto destroy_root; ret = cgroup_rstat_init(root_cgrp); if (ret) goto destroy_root; ret = rebind_subsystems(root, ss_mask); if (ret) goto exit_stats; if (root == &cgrp_dfl_root) { ret = cgroup_bpf_inherit(root_cgrp); WARN_ON_ONCE(ret); } trace_cgroup_setup_root(root); /* * There must be no failure case after here, since rebinding takes * care of subsystems' refcounts, which are explicitly dropped in * the failure exit path. */ list_add_rcu(&root->root_list, &cgroup_roots); cgroup_root_count++; /* * Link the root cgroup in this hierarchy into all the css_set * objects. */ spin_lock_irq(&css_set_lock); hash_for_each(css_set_table, i, cset, hlist) { link_css_set(&tmp_links, cset, root_cgrp); if (css_set_populated(cset)) cgroup_update_populated(root_cgrp, true); } spin_unlock_irq(&css_set_lock); BUG_ON(!list_empty(&root_cgrp->self.children)); BUG_ON(atomic_read(&root->nr_cgrps) != 1); ret = 0; goto out; exit_stats: cgroup_rstat_exit(root_cgrp); destroy_root: kernfs_destroy_root(root->kf_root); root->kf_root = NULL; exit_root_id: cgroup_exit_root_id(root); cancel_ref: percpu_ref_exit(&root_cgrp->self.refcnt); out: free_cgrp_cset_links(&tmp_links); return ret; } int cgroup_do_get_tree(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret; ctx->kfc.root = ctx->root->kf_root; if (fc->fs_type == &cgroup2_fs_type) ctx->kfc.magic = CGROUP2_SUPER_MAGIC; else ctx->kfc.magic = CGROUP_SUPER_MAGIC; ret = kernfs_get_tree(fc); /* * In non-init cgroup namespace, instead of root cgroup's dentry, * we return the dentry corresponding to the cgroupns->root_cgrp. */ if (!ret && ctx->ns != &init_cgroup_ns) { struct dentry *nsdentry; struct super_block *sb = fc->root->d_sb; struct cgroup *cgrp; cgroup_lock(); spin_lock_irq(&css_set_lock); cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root); spin_unlock_irq(&css_set_lock); cgroup_unlock(); nsdentry = kernfs_node_dentry(cgrp->kn, sb); dput(fc->root); if (IS_ERR(nsdentry)) { deactivate_locked_super(sb); ret = PTR_ERR(nsdentry); nsdentry = NULL; } fc->root = nsdentry; } if (!ctx->kfc.new_sb_created) cgroup_put(&ctx->root->cgrp); return ret; } /* * Destroy a cgroup filesystem context. */ static void cgroup_fs_context_free(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); kfree(ctx->name); kfree(ctx->release_agent); put_cgroup_ns(ctx->ns); kernfs_free_fs_context(fc); kfree(ctx); } static int cgroup_get_tree(struct fs_context *fc) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); int ret; WRITE_ONCE(cgrp_dfl_visible, true); cgroup_get_live(&cgrp_dfl_root.cgrp); ctx->root = &cgrp_dfl_root; ret = cgroup_do_get_tree(fc); if (!ret) apply_cgroup_root_flags(ctx->flags); return ret; } static const struct fs_context_operations cgroup_fs_context_ops = { .free = cgroup_fs_context_free, .parse_param = cgroup2_parse_param, .get_tree = cgroup_get_tree, .reconfigure = cgroup_reconfigure, }; static const struct fs_context_operations cgroup1_fs_context_ops = { .free = cgroup_fs_context_free, .parse_param = cgroup1_parse_param, .get_tree = cgroup1_get_tree, .reconfigure = cgroup1_reconfigure, }; /* * Initialise the cgroup filesystem creation/reconfiguration context. Notably, * we select the namespace we're going to use. */ static int cgroup_init_fs_context(struct fs_context *fc) { struct cgroup_fs_context *ctx; ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->ns = current->nsproxy->cgroup_ns; get_cgroup_ns(ctx->ns); fc->fs_private = &ctx->kfc; if (fc->fs_type == &cgroup2_fs_type) fc->ops = &cgroup_fs_context_ops; else fc->ops = &cgroup1_fs_context_ops; put_user_ns(fc->user_ns); fc->user_ns = get_user_ns(ctx->ns->user_ns); fc->global = true; if (have_favordynmods) ctx->flags |= CGRP_ROOT_FAVOR_DYNMODS; return 0; } static void cgroup_kill_sb(struct super_block *sb) { struct kernfs_root *kf_root = kernfs_root_from_sb(sb); struct cgroup_root *root = cgroup_root_from_kf(kf_root); /* * If @root doesn't have any children, start killing it. * This prevents new mounts by disabling percpu_ref_tryget_live(). * * And don't kill the default root. */ if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && !percpu_ref_is_dying(&root->cgrp.self.refcnt)) percpu_ref_kill(&root->cgrp.self.refcnt); cgroup_put(&root->cgrp); kernfs_kill_sb(sb); } struct file_system_type cgroup_fs_type = { .name = "cgroup", .init_fs_context = cgroup_init_fs_context, .parameters = cgroup1_fs_parameters, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; static struct file_system_type cgroup2_fs_type = { .name = "cgroup2", .init_fs_context = cgroup_init_fs_context, .parameters = cgroup2_fs_parameters, .kill_sb = cgroup_kill_sb, .fs_flags = FS_USERNS_MOUNT, }; #ifdef CONFIG_CPUSETS_V1 enum cpuset_param { Opt_cpuset_v2_mode, }; static const struct fs_parameter_spec cpuset_fs_parameters[] = { fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode), {} }; static int cpuset_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct cgroup_fs_context *ctx = cgroup_fc2context(fc); struct fs_parse_result result; int opt; opt = fs_parse(fc, cpuset_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_cpuset_v2_mode: ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE; return 0; } return -EINVAL; } static const struct fs_context_operations cpuset_fs_context_ops = { .get_tree = cgroup1_get_tree, .free = cgroup_fs_context_free, .parse_param = cpuset_parse_param, }; /* * This is ugly, but preserves the userspace API for existing cpuset * users. If someone tries to mount the "cpuset" filesystem, we * silently switch it to mount "cgroup" instead */ static int cpuset_init_fs_context(struct fs_context *fc) { char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER); struct cgroup_fs_context *ctx; int err; err = cgroup_init_fs_context(fc); if (err) { kfree(agent); return err; } fc->ops = &cpuset_fs_context_ops; ctx = cgroup_fc2context(fc); ctx->subsys_mask = 1 << cpuset_cgrp_id; ctx->flags |= CGRP_ROOT_NOPREFIX; ctx->release_agent = agent; get_filesystem(&cgroup_fs_type); put_filesystem(fc->fs_type); fc->fs_type = &cgroup_fs_type; return 0; } static struct file_system_type cpuset_fs_type = { .name = "cpuset", .init_fs_context = cpuset_init_fs_context, .parameters = cpuset_fs_parameters, .fs_flags = FS_USERNS_MOUNT, }; #endif int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns) { struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root); return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen); } int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns) { int ret; cgroup_lock(); spin_lock_irq(&css_set_lock); ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns); spin_unlock_irq(&css_set_lock); cgroup_unlock(); return ret; } EXPORT_SYMBOL_GPL(cgroup_path_ns); /** * cgroup_attach_lock - Lock for ->attach() * @lock_threadgroup: whether to down_write cgroup_threadgroup_rwsem * * cgroup migration sometimes needs to stabilize threadgroups against forks and * exits by write-locking cgroup_threadgroup_rwsem. However, some ->attach() * implementations (e.g. cpuset), also need to disable CPU hotplug. * Unfortunately, letting ->attach() operations acquire cpus_read_lock() can * lead to deadlocks. * * Bringing up a CPU may involve creating and destroying tasks which requires * read-locking threadgroup_rwsem, so threadgroup_rwsem nests inside * cpus_read_lock(). If we call an ->attach() which acquires the cpus lock while * write-locking threadgroup_rwsem, the locking order is reversed and we end up * waiting for an on-going CPU hotplug operation which in turn is waiting for * the threadgroup_rwsem to be released to create new tasks. For more details: * * http://lkml.kernel.org/r/20220711174629.uehfmqegcwn2lqzu@wubuntu * * Resolve the situation by always acquiring cpus_read_lock() before optionally * write-locking cgroup_threadgroup_rwsem. This allows ->attach() to assume that * CPU hotplug is disabled on entry. */ void cgroup_attach_lock(bool lock_threadgroup) { cpus_read_lock(); if (lock_threadgroup) percpu_down_write(&cgroup_threadgroup_rwsem); } /** * cgroup_attach_unlock - Undo cgroup_attach_lock() * @lock_threadgroup: whether to up_write cgroup_threadgroup_rwsem */ void cgroup_attach_unlock(bool lock_threadgroup) { if (lock_threadgroup) percpu_up_write(&cgroup_threadgroup_rwsem); cpus_read_unlock(); } /** * cgroup_migrate_add_task - add a migration target task to a migration context * @task: target task * @mgctx: target migration context * * Add @task, which is a migration target, to @mgctx->tset. This function * becomes noop if @task doesn't need to be migrated. @task's css_set * should have been added as a migration source and @task->cg_list will be * moved from the css_set's tasks list to mg_tasks one. */ static void cgroup_migrate_add_task(struct task_struct *task, struct cgroup_mgctx *mgctx) { struct css_set *cset; lockdep_assert_held(&css_set_lock); /* @task either already exited or can't exit until the end */ if (task->flags & PF_EXITING) return; /* cgroup_threadgroup_rwsem protects racing against forks */ WARN_ON_ONCE(list_empty(&task->cg_list)); cset = task_css_set(task); if (!cset->mg_src_cgrp) return; mgctx->tset.nr_tasks++; list_move_tail(&task->cg_list, &cset->mg_tasks); if (list_empty(&cset->mg_node)) list_add_tail(&cset->mg_node, &mgctx->tset.src_csets); if (list_empty(&cset->mg_dst_cset->mg_node)) list_add_tail(&cset->mg_dst_cset->mg_node, &mgctx->tset.dst_csets); } /** * cgroup_taskset_first - reset taskset and return the first task * @tset: taskset of interest * @dst_cssp: output variable for the destination css * * @tset iteration is initialized and the first task is returned. */ struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp) { tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node); tset->cur_task = NULL; return cgroup_taskset_next(tset, dst_cssp); } /** * cgroup_taskset_next - iterate to the next task in taskset * @tset: taskset of interest * @dst_cssp: output variable for the destination css * * Return the next task in @tset. Iteration must have been initialized * with cgroup_taskset_first(). */ struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset, struct cgroup_subsys_state **dst_cssp) { struct css_set *cset = tset->cur_cset; struct task_struct *task = tset->cur_task; while (CGROUP_HAS_SUBSYS_CONFIG && &cset->mg_node != tset->csets) { if (!task) task = list_first_entry(&cset->mg_tasks, struct task_struct, cg_list); else task = list_next_entry(task, cg_list); if (&task->cg_list != &cset->mg_tasks) { tset->cur_cset = cset; tset->cur_task = task; /* * This function may be called both before and * after cgroup_migrate_execute(). The two cases * can be distinguished by looking at whether @cset * has its ->mg_dst_cset set. */ if (cset->mg_dst_cset) *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid]; else *dst_cssp = cset->subsys[tset->ssid]; return task; } cset = list_next_entry(cset, mg_node); task = NULL; } return NULL; } /** * cgroup_migrate_execute - migrate a taskset * @mgctx: migration context * * Migrate tasks in @mgctx as setup by migration preparation functions. * This function fails iff one of the ->can_attach callbacks fails and * guarantees that either all or none of the tasks in @mgctx are migrated. * @mgctx is consumed regardless of success. */ static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx) { struct cgroup_taskset *tset = &mgctx->tset; struct cgroup_subsys *ss; struct task_struct *task, *tmp_task; struct css_set *cset, *tmp_cset; int ssid, failed_ssid, ret; /* check that we can legitimately attach to the cgroup */ if (tset->nr_tasks) { do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ss->can_attach) { tset->ssid = ssid; ret = ss->can_attach(tset); if (ret) { failed_ssid = ssid; goto out_cancel_attach; } } } while_each_subsys_mask(); } /* * Now that we're guaranteed success, proceed to move all tasks to * the new cgroup. There are no failure cases after here, so this * is the commit point. */ spin_lock_irq(&css_set_lock); list_for_each_entry(cset, &tset->src_csets, mg_node) { list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) { struct css_set *from_cset = task_css_set(task); struct css_set *to_cset = cset->mg_dst_cset; get_css_set(to_cset); to_cset->nr_tasks++; css_set_move_task(task, from_cset, to_cset, true); from_cset->nr_tasks--; /* * If the source or destination cgroup is frozen, * the task might require to change its state. */ cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp, to_cset->dfl_cgrp); put_css_set_locked(from_cset); } } spin_unlock_irq(&css_set_lock); /* * Migration is committed, all target tasks are now on dst_csets. * Nothing is sensitive to fork() after this point. Notify * controllers that migration is complete. */ tset->csets = &tset->dst_csets; if (tset->nr_tasks) { do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ss->attach) { tset->ssid = ssid; ss->attach(tset); } } while_each_subsys_mask(); } ret = 0; goto out_release_tset; out_cancel_attach: if (tset->nr_tasks) { do_each_subsys_mask(ss, ssid, mgctx->ss_mask) { if (ssid == failed_ssid) break; if (ss->cancel_attach) { tset->ssid = ssid; ss->cancel_attach(tset); } } while_each_subsys_mask(); } out_release_tset: spin_lock_irq(&css_set_lock); list_splice_init(&tset->dst_csets, &tset->src_csets); list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) { list_splice_tail_init(&cset->mg_tasks, &cset->tasks); list_del_init(&cset->mg_node); } spin_unlock_irq(&css_set_lock); /* * Re-initialize the cgroup_taskset structure in case it is reused * again in another cgroup_migrate_add_task()/cgroup_migrate_execute() * iteration. */ tset->nr_tasks = 0; tset->csets = &tset->src_csets; return ret; } /** * cgroup_migrate_vet_dst - verify whether a cgroup can be migration destination * @dst_cgrp: destination cgroup to test * * On the default hierarchy, except for the mixable, (possible) thread root * and threaded cgroups, subtree_control must be zero for migration * destination cgroups with tasks so that child cgroups don't compete * against tasks. */ int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp) { /* v1 doesn't have any restriction */ if (!cgroup_on_dfl(dst_cgrp)) return 0; /* verify @dst_cgrp can host resources */ if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp)) return -EOPNOTSUPP; /* * If @dst_cgrp is already or can become a thread root or is * threaded, it doesn't matter. */ if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp)) return 0; /* apply no-internal-process constraint */ if (dst_cgrp->subtree_control) return -EBUSY; return 0; } /** * cgroup_migrate_finish - cleanup after attach * @mgctx: migration context * * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See * those functions for details. */ void cgroup_migrate_finish(struct cgroup_mgctx *mgctx) { struct css_set *cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets, mg_src_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_src_preload_node); put_css_set_locked(cset); } list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets, mg_dst_preload_node) { cset->mg_src_cgrp = NULL; cset->mg_dst_cgrp = NULL; cset->mg_dst_cset = NULL; list_del_init(&cset->mg_dst_preload_node); put_css_set_locked(cset); } spin_unlock_irq(&css_set_lock); } /** * cgroup_migrate_add_src - add a migration source css_set * @src_cset: the source css_set to add * @dst_cgrp: the destination cgroup * @mgctx: migration context * * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin * @src_cset and add it to @mgctx->src_csets, which should later be cleaned * up by cgroup_migrate_finish(). * * This function may be called without holding cgroup_threadgroup_rwsem * even if the target is a process. Threads may be created and destroyed * but as long as cgroup_mutex is not dropped, no new css_set can be put * into play and the preloaded css_sets are guaranteed to cover all * migrations. */ void cgroup_migrate_add_src(struct css_set *src_cset, struct cgroup *dst_cgrp, struct cgroup_mgctx *mgctx) { struct cgroup *src_cgrp; lockdep_assert_held(&cgroup_mutex); lockdep_assert_held(&css_set_lock); /* * If ->dead, @src_set is associated with one or more dead cgroups * and doesn't contain any migratable tasks. Ignore it early so * that the rest of migration path doesn't get confused by it. */ if (src_cset->dead) return; if (!list_empty(&src_cset->mg_src_preload_node)) return; src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root); WARN_ON(src_cset->mg_src_cgrp); WARN_ON(src_cset->mg_dst_cgrp); WARN_ON(!list_empty(&src_cset->mg_tasks)); WARN_ON(!list_empty(&src_cset->mg_node)); src_cset->mg_src_cgrp = src_cgrp; src_cset->mg_dst_cgrp = dst_cgrp; get_css_set(src_cset); list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets); } /** * cgroup_migrate_prepare_dst - prepare destination css_sets for migration * @mgctx: migration context * * Tasks are about to be moved and all the source css_sets have been * preloaded to @mgctx->preloaded_src_csets. This function looks up and * pins all destination css_sets, links each to its source, and append them * to @mgctx->preloaded_dst_csets. * * This function must be called after cgroup_migrate_add_src() has been * called on each migration source css_set. After migration is performed * using cgroup_migrate(), cgroup_migrate_finish() must be called on * @mgctx. */ int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx) { struct css_set *src_cset, *tmp_cset; lockdep_assert_held(&cgroup_mutex); /* look up the dst cset for each src cset and link it to src */ list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets, mg_src_preload_node) { struct css_set *dst_cset; struct cgroup_subsys *ss; int ssid; dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp); if (!dst_cset) return -ENOMEM; WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset); /* * If src cset equals dst, it's noop. Drop the src. * cgroup_migrate() will skip the cset too. Note that we * can't handle src == dst as some nodes are used by both. */ if (src_cset == dst_cset) { src_cset->mg_src_cgrp = NULL; src_cset->mg_dst_cgrp = NULL; list_del_init(&src_cset->mg_src_preload_node); put_css_set(src_cset); put_css_set(dst_cset); continue; } src_cset->mg_dst_cset = dst_cset; if (list_empty(&dst_cset->mg_dst_preload_node)) list_add_tail(&dst_cset->mg_dst_preload_node, &mgctx->preloaded_dst_csets); else put_css_set(dst_cset); for_each_subsys(ss, ssid) if (src_cset->subsys[ssid] != dst_cset->subsys[ssid]) mgctx->ss_mask |= 1 << ssid; } return 0; } /** * cgroup_migrate - migrate a process or task to a cgroup * @leader: the leader of the process or the task to migrate * @threadgroup: whether @leader points to the whole process or a single task * @mgctx: migration context * * Migrate a process or task denoted by @leader. If migrating a process, * the caller must be holding cgroup_threadgroup_rwsem. The caller is also * responsible for invoking cgroup_migrate_add_src() and * cgroup_migrate_prepare_dst() on the targets before invoking this * function and following up with cgroup_migrate_finish(). * * As long as a controller's ->can_attach() doesn't fail, this function is * guaranteed to succeed. This means that, excluding ->can_attach() * failure, when migrating multiple targets, the success or failure can be * decided for all targets by invoking group_migrate_prepare_dst() before * actually starting migrating. */ int cgroup_migrate(struct task_struct *leader, bool threadgroup, struct cgroup_mgctx *mgctx) { struct task_struct *task; /* * The following thread iteration should be inside an RCU critical * section to prevent tasks from being freed while taking the snapshot. * spin_lock_irq() implies RCU critical section here. */ spin_lock_irq(&css_set_lock); task = leader; do { cgroup_migrate_add_task(task, mgctx); if (!threadgroup) break; } while_each_thread(leader, task); spin_unlock_irq(&css_set_lock); return cgroup_migrate_execute(mgctx); } /** * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup * @dst_cgrp: the cgroup to attach to * @leader: the task or the leader of the threadgroup to be attached * @threadgroup: attach the whole threadgroup? * * Call holding cgroup_mutex and cgroup_threadgroup_rwsem. */ int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader, bool threadgroup) { DEFINE_CGROUP_MGCTX(mgctx); struct task_struct *task; int ret = 0; /* look up all src csets */ spin_lock_irq(&css_set_lock); rcu_read_lock(); task = leader; do { cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx); if (!threadgroup) break; } while_each_thread(leader, task); rcu_read_unlock(); spin_unlock_irq(&css_set_lock); /* prepare dst csets and commit */ ret = cgroup_migrate_prepare_dst(&mgctx); if (!ret) ret = cgroup_migrate(leader, threadgroup, &mgctx); cgroup_migrate_finish(&mgctx); if (!ret) TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup); return ret; } struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup, bool *threadgroup_locked) { struct task_struct *tsk; pid_t pid; if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return ERR_PTR(-EINVAL); /* * If we migrate a single thread, we don't care about threadgroup * stability. If the thread is `current`, it won't exit(2) under our * hands or change PID through exec(2). We exclude * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write * callers by cgroup_mutex. * Therefore, we can skip the global lock. */ lockdep_assert_held(&cgroup_mutex); *threadgroup_locked = pid || threadgroup; cgroup_attach_lock(*threadgroup_locked); rcu_read_lock(); if (pid) { tsk = find_task_by_vpid(pid); if (!tsk) { tsk = ERR_PTR(-ESRCH); goto out_unlock_threadgroup; } } else { tsk = current; } if (threadgroup) tsk = tsk->group_leader; /* * kthreads may acquire PF_NO_SETAFFINITY during initialization. * If userland migrates such a kthread to a non-root cgroup, it can * become trapped in a cpuset, or RT kthread may be born in a * cgroup with no rt_runtime allocated. Just say no. */ if (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY)) { tsk = ERR_PTR(-EINVAL); goto out_unlock_threadgroup; } get_task_struct(tsk); goto out_unlock_rcu; out_unlock_threadgroup: cgroup_attach_unlock(*threadgroup_locked); *threadgroup_locked = false; out_unlock_rcu: rcu_read_unlock(); return tsk; } void cgroup_procs_write_finish(struct task_struct *task, bool threadgroup_locked) { struct cgroup_subsys *ss; int ssid; /* release reference from cgroup_procs_write_start() */ put_task_struct(task); cgroup_attach_unlock(threadgroup_locked); for_each_subsys(ss, ssid) if (ss->post_attach) ss->post_attach(); } static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask) { struct cgroup_subsys *ss; bool printed = false; int ssid; do_each_subsys_mask(ss, ssid, ss_mask) { if (printed) seq_putc(seq, ' '); seq_puts(seq, ss->name); printed = true; } while_each_subsys_mask(); if (printed) seq_putc(seq, '\n'); } /* show controllers which are enabled from the parent */ static int cgroup_controllers_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; cgroup_print_ss_mask(seq, cgroup_control(cgrp)); return 0; } /* show controllers which are enabled for a given cgroup's children */ static int cgroup_subtree_control_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; cgroup_print_ss_mask(seq, cgrp->subtree_control); return 0; } /** * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy * @cgrp: root of the subtree to update csses for * * @cgrp's control masks have changed and its subtree's css associations * need to be updated accordingly. This function looks up all css_sets * which are attached to the subtree, creates the matching updated css_sets * and migrates the tasks to the new ones. */ static int cgroup_update_dfl_csses(struct cgroup *cgrp) { DEFINE_CGROUP_MGCTX(mgctx); struct cgroup_subsys_state *d_css; struct cgroup *dsct; struct css_set *src_cset; bool has_tasks; int ret; lockdep_assert_held(&cgroup_mutex); /* look up all csses currently attached to @cgrp's subtree */ spin_lock_irq(&css_set_lock); cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { struct cgrp_cset_link *link; /* * As cgroup_update_dfl_csses() is only called by * cgroup_apply_control(). The csses associated with the * given cgrp will not be affected by changes made to * its subtree_control file. We can skip them. */ if (dsct == cgrp) continue; list_for_each_entry(link, &dsct->cset_links, cset_link) cgroup_migrate_add_src(link->cset, dsct, &mgctx); } spin_unlock_irq(&css_set_lock); /* * We need to write-lock threadgroup_rwsem while migrating tasks. * However, if there are no source csets for @cgrp, changing its * controllers isn't gonna produce any task migrations and the * write-locking can be skipped safely. */ has_tasks = !list_empty(&mgctx.preloaded_src_csets); cgroup_attach_lock(has_tasks); /* NULL dst indicates self on default hierarchy */ ret = cgroup_migrate_prepare_dst(&mgctx); if (ret) goto out_finish; spin_lock_irq(&css_set_lock); list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_src_preload_node) { struct task_struct *task, *ntask; /* all tasks in src_csets need to be migrated */ list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list) cgroup_migrate_add_task(task, &mgctx); } spin_unlock_irq(&css_set_lock); ret = cgroup_migrate_execute(&mgctx); out_finish: cgroup_migrate_finish(&mgctx); cgroup_attach_unlock(has_tasks); return ret; } /** * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses * @cgrp: root of the target subtree * * Because css offlining is asynchronous, userland may try to re-enable a * controller while the previous css is still around. This function grabs * cgroup_mutex and drains the previous css instances of @cgrp's subtree. */ void cgroup_lock_and_drain_offline(struct cgroup *cgrp) __acquires(&cgroup_mutex) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid; restart: cgroup_lock(); cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); DEFINE_WAIT(wait); if (!css || !percpu_ref_is_dying(&css->refcnt)) continue; cgroup_get_live(dsct); prepare_to_wait(&dsct->offline_waitq, &wait, TASK_UNINTERRUPTIBLE); cgroup_unlock(); schedule(); finish_wait(&dsct->offline_waitq, &wait); cgroup_put(dsct); goto restart; } } } /** * cgroup_save_control - save control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the * respective old_ prefixed fields for @cgrp's subtree including @cgrp * itself. */ static void cgroup_save_control(struct cgroup *cgrp) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { dsct->old_subtree_control = dsct->subtree_control; dsct->old_subtree_ss_mask = dsct->subtree_ss_mask; dsct->old_dom_cgrp = dsct->dom_cgrp; } } /** * cgroup_propagate_control - refresh control masks of a subtree * @cgrp: root of the target subtree * * For @cgrp and its subtree, ensure ->subtree_ss_mask matches * ->subtree_control and propagate controller availability through the * subtree so that descendants don't have unavailable controllers enabled. */ static void cgroup_propagate_control(struct cgroup *cgrp) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { dsct->subtree_control &= cgroup_control(dsct); dsct->subtree_ss_mask = cgroup_calc_subtree_ss_mask(dsct->subtree_control, cgroup_ss_mask(dsct)); } } /** * cgroup_restore_control - restore control masks and dom_cgrp of a subtree * @cgrp: root of the target subtree * * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the * respective old_ prefixed fields for @cgrp's subtree including @cgrp * itself. */ static void cgroup_restore_control(struct cgroup *cgrp) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { dsct->subtree_control = dsct->old_subtree_control; dsct->subtree_ss_mask = dsct->old_subtree_ss_mask; dsct->dom_cgrp = dsct->old_dom_cgrp; } } static bool css_visible(struct cgroup_subsys_state *css) { struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; if (cgroup_control(cgrp) & (1 << ss->id)) return true; if (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) return false; return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl; } /** * cgroup_apply_control_enable - enable or show csses according to control * @cgrp: root of the target subtree * * Walk @cgrp's subtree and create new csses or make the existing ones * visible. A css is created invisible if it's being implicitly enabled * through dependency. An invisible css is made visible when the userland * explicitly enables it. * * Returns 0 on success, -errno on failure. On failure, csses which have * been processed already aren't cleaned up. The caller is responsible for * cleaning up with cgroup_apply_control_disable(). */ static int cgroup_apply_control_enable(struct cgroup *cgrp) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid, ret; cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); if (!(cgroup_ss_mask(dsct) & (1 << ss->id))) continue; if (!css) { css = css_create(dsct, ss); if (IS_ERR(css)) return PTR_ERR(css); } WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); if (css_visible(css)) { ret = css_populate_dir(css); if (ret) return ret; } } } return 0; } /** * cgroup_apply_control_disable - kill or hide csses according to control * @cgrp: root of the target subtree * * Walk @cgrp's subtree and kill and hide csses so that they match * cgroup_ss_mask() and cgroup_visible_mask(). * * A css is hidden when the userland requests it to be disabled while other * subsystems are still depending on it. The css must not actively control * resources and be in the vanilla state if it's made visible again later. * Controllers which may be depended upon should provide ->css_reset() for * this purpose. */ static void cgroup_apply_control_disable(struct cgroup *cgrp) { struct cgroup *dsct; struct cgroup_subsys_state *d_css; struct cgroup_subsys *ss; int ssid; cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) { for_each_subsys(ss, ssid) { struct cgroup_subsys_state *css = cgroup_css(dsct, ss); if (!css) continue; WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt)); if (css->parent && !(cgroup_ss_mask(dsct) & (1 << ss->id))) { kill_css(css); } else if (!css_visible(css)) { css_clear_dir(css); if (ss->css_reset) ss->css_reset(css); } } } } /** * cgroup_apply_control - apply control mask updates to the subtree * @cgrp: root of the target subtree * * subsystems can be enabled and disabled in a subtree using the following * steps. * * 1. Call cgroup_save_control() to stash the current state. * 2. Update ->subtree_control masks in the subtree as desired. * 3. Call cgroup_apply_control() to apply the changes. * 4. Optionally perform other related operations. * 5. Call cgroup_finalize_control() to finish up. * * This function implements step 3 and propagates the mask changes * throughout @cgrp's subtree, updates csses accordingly and perform * process migrations. */ static int cgroup_apply_control(struct cgroup *cgrp) { int ret; cgroup_propagate_control(cgrp); ret = cgroup_apply_control_enable(cgrp); if (ret) return ret; /* * At this point, cgroup_e_css_by_mask() results reflect the new csses * making the following cgroup_update_dfl_csses() properly update * css associations of all tasks in the subtree. */ return cgroup_update_dfl_csses(cgrp); } /** * cgroup_finalize_control - finalize control mask update * @cgrp: root of the target subtree * @ret: the result of the update * * Finalize control mask update. See cgroup_apply_control() for more info. */ static void cgroup_finalize_control(struct cgroup *cgrp, int ret) { if (ret) { cgroup_restore_control(cgrp); cgroup_propagate_control(cgrp); } cgroup_apply_control_disable(cgrp); } static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable) { u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask; /* if nothing is getting enabled, nothing to worry about */ if (!enable) return 0; /* can @cgrp host any resources? */ if (!cgroup_is_valid_domain(cgrp->dom_cgrp)) return -EOPNOTSUPP; /* mixables don't care */ if (cgroup_is_mixable(cgrp)) return 0; if (domain_enable) { /* can't enable domain controllers inside a thread subtree */ if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp)) return -EOPNOTSUPP; } else { /* * Threaded controllers can handle internal competitions * and are always allowed inside a (prospective) thread * subtree. */ if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp)) return 0; } /* * Controllers can't be enabled for a cgroup with tasks to avoid * child cgroups competing against tasks. */ if (cgroup_has_tasks(cgrp)) return -EBUSY; return 0; } /* change the enabled child controllers for a cgroup in the default hierarchy */ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { u16 enable = 0, disable = 0; struct cgroup *cgrp, *child; struct cgroup_subsys *ss; char *tok; int ssid, ret; /* * Parse input - space separated list of subsystem names prefixed * with either + or -. */ buf = strstrip(buf); while ((tok = strsep(&buf, " "))) { if (tok[0] == '\0') continue; do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) { if (!cgroup_ssid_enabled(ssid) || strcmp(tok + 1, ss->name)) continue; if (*tok == '+') { enable |= 1 << ssid; disable &= ~(1 << ssid); } else if (*tok == '-') { disable |= 1 << ssid; enable &= ~(1 << ssid); } else { return -EINVAL; } break; } while_each_subsys_mask(); if (ssid == CGROUP_SUBSYS_COUNT) return -EINVAL; } cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENODEV; for_each_subsys(ss, ssid) { if (enable & (1 << ssid)) { if (cgrp->subtree_control & (1 << ssid)) { enable &= ~(1 << ssid); continue; } if (!(cgroup_control(cgrp) & (1 << ssid))) { ret = -ENOENT; goto out_unlock; } } else if (disable & (1 << ssid)) { if (!(cgrp->subtree_control & (1 << ssid))) { disable &= ~(1 << ssid); continue; } /* a child has it enabled? */ cgroup_for_each_live_child(child, cgrp) { if (child->subtree_control & (1 << ssid)) { ret = -EBUSY; goto out_unlock; } } } } if (!enable && !disable) { ret = 0; goto out_unlock; } ret = cgroup_vet_subtree_control_enable(cgrp, enable); if (ret) goto out_unlock; /* save and update control masks and prepare csses */ cgroup_save_control(cgrp); cgrp->subtree_control |= enable; cgrp->subtree_control &= ~disable; ret = cgroup_apply_control(cgrp); cgroup_finalize_control(cgrp, ret); if (ret) goto out_unlock; kernfs_activate(cgrp->kn); out_unlock: cgroup_kn_unlock(of->kn); return ret ?: nbytes; } /** * cgroup_enable_threaded - make @cgrp threaded * @cgrp: the target cgroup * * Called when "threaded" is written to the cgroup.type interface file and * tries to make @cgrp threaded and join the parent's resource domain. * This function is never called on the root cgroup as cgroup.type doesn't * exist on it. */ static int cgroup_enable_threaded(struct cgroup *cgrp) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup *dom_cgrp = parent->dom_cgrp; struct cgroup *dsct; struct cgroup_subsys_state *d_css; int ret; lockdep_assert_held(&cgroup_mutex); /* noop if already threaded */ if (cgroup_is_threaded(cgrp)) return 0; /* * If @cgroup is populated or has domain controllers enabled, it * can't be switched. While the below cgroup_can_be_thread_root() * test can catch the same conditions, that's only when @parent is * not mixable, so let's check it explicitly. */ if (cgroup_is_populated(cgrp) || cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask) return -EOPNOTSUPP; /* we're joining the parent's domain, ensure its validity */ if (!cgroup_is_valid_domain(dom_cgrp) || !cgroup_can_be_thread_root(dom_cgrp)) return -EOPNOTSUPP; /* * The following shouldn't cause actual migrations and should * always succeed. */ cgroup_save_control(cgrp); cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) if (dsct == cgrp || cgroup_is_threaded(dsct)) dsct->dom_cgrp = dom_cgrp; ret = cgroup_apply_control(cgrp); if (!ret) parent->nr_threaded_children++; cgroup_finalize_control(cgrp, ret); return ret; } static int cgroup_type_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; if (cgroup_is_threaded(cgrp)) seq_puts(seq, "threaded\n"); else if (!cgroup_is_valid_domain(cgrp)) seq_puts(seq, "domain invalid\n"); else if (cgroup_is_thread_root(cgrp)) seq_puts(seq, "domain threaded\n"); else seq_puts(seq, "domain\n"); return 0; } static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; int ret; /* only switching to threaded mode is supported */ if (strcmp(strstrip(buf), "threaded")) return -EINVAL; /* drain dying csses before we re-apply (threaded) subtree control */ cgrp = cgroup_kn_lock_live(of->kn, true); if (!cgrp) return -ENOENT; /* threaded can only be enabled */ ret = cgroup_enable_threaded(cgrp); cgroup_kn_unlock(of->kn); return ret ?: nbytes; } static int cgroup_max_descendants_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; int descendants = READ_ONCE(cgrp->max_descendants); if (descendants == INT_MAX) seq_puts(seq, "max\n"); else seq_printf(seq, "%d\n", descendants); return 0; } static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; int descendants; ssize_t ret; buf = strstrip(buf); if (!strcmp(buf, "max")) { descendants = INT_MAX; } else { ret = kstrtoint(buf, 0, &descendants); if (ret) return ret; } if (descendants < 0) return -ERANGE; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENOENT; cgrp->max_descendants = descendants; cgroup_kn_unlock(of->kn); return nbytes; } static int cgroup_max_depth_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; int depth = READ_ONCE(cgrp->max_depth); if (depth == INT_MAX) seq_puts(seq, "max\n"); else seq_printf(seq, "%d\n", depth); return 0; } static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; ssize_t ret; int depth; buf = strstrip(buf); if (!strcmp(buf, "max")) { depth = INT_MAX; } else { ret = kstrtoint(buf, 0, &depth); if (ret) return ret; } if (depth < 0) return -ERANGE; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENOENT; cgrp->max_depth = depth; cgroup_kn_unlock(of->kn); return nbytes; } static int cgroup_events_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp)); seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags)); return 0; } static int cgroup_stat_show(struct seq_file *seq, void *v) { struct cgroup *cgroup = seq_css(seq)->cgroup; struct cgroup_subsys_state *css; int dying_cnt[CGROUP_SUBSYS_COUNT]; int ssid; seq_printf(seq, "nr_descendants %d\n", cgroup->nr_descendants); /* * Show the number of live and dying csses associated with each of * non-inhibited cgroup subsystems that is bound to cgroup v2. * * Without proper lock protection, racing is possible. So the * numbers may not be consistent when that happens. */ rcu_read_lock(); for (ssid = 0; ssid < CGROUP_SUBSYS_COUNT; ssid++) { dying_cnt[ssid] = -1; if ((BIT(ssid) & cgrp_dfl_inhibit_ss_mask) || (cgroup_subsys[ssid]->root != &cgrp_dfl_root)) continue; css = rcu_dereference_raw(cgroup->subsys[ssid]); dying_cnt[ssid] = cgroup->nr_dying_subsys[ssid]; seq_printf(seq, "nr_subsys_%s %d\n", cgroup_subsys[ssid]->name, css ? (css->nr_descendants + 1) : 0); } seq_printf(seq, "nr_dying_descendants %d\n", cgroup->nr_dying_descendants); for (ssid = 0; ssid < CGROUP_SUBSYS_COUNT; ssid++) { if (dying_cnt[ssid] >= 0) seq_printf(seq, "nr_dying_subsys_%s %d\n", cgroup_subsys[ssid]->name, dying_cnt[ssid]); } rcu_read_unlock(); return 0; } #ifdef CONFIG_CGROUP_SCHED /** * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem * @cgrp: the cgroup of interest * @ss: the subsystem of interest * * Find and get @cgrp's css associated with @ss. If the css doesn't exist * or is offline, %NULL is returned. */ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, struct cgroup_subsys *ss) { struct cgroup_subsys_state *css; rcu_read_lock(); css = cgroup_css(cgrp, ss); if (css && !css_tryget_online(css)) css = NULL; rcu_read_unlock(); return css; } static int cgroup_extra_stat_show(struct seq_file *seq, int ssid) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct cgroup_subsys *ss = cgroup_subsys[ssid]; struct cgroup_subsys_state *css; int ret; if (!ss->css_extra_stat_show) return 0; css = cgroup_tryget_css(cgrp, ss); if (!css) return 0; ret = ss->css_extra_stat_show(seq, css); css_put(css); return ret; } static int cgroup_local_stat_show(struct seq_file *seq, struct cgroup *cgrp, int ssid) { struct cgroup_subsys *ss = cgroup_subsys[ssid]; struct cgroup_subsys_state *css; int ret; if (!ss->css_local_stat_show) return 0; css = cgroup_tryget_css(cgrp, ss); if (!css) return 0; ret = ss->css_local_stat_show(seq, css); css_put(css); return ret; } #endif static int cpu_stat_show(struct seq_file *seq, void *v) { int ret = 0; cgroup_base_stat_cputime_show(seq); #ifdef CONFIG_CGROUP_SCHED ret = cgroup_extra_stat_show(seq, cpu_cgrp_id); #endif return ret; } static int cpu_local_stat_show(struct seq_file *seq, void *v) { struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup; int ret = 0; #ifdef CONFIG_CGROUP_SCHED ret = cgroup_local_stat_show(seq, cgrp, cpu_cgrp_id); #endif return ret; } #ifdef CONFIG_PSI static int cgroup_io_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_IO); } static int cgroup_memory_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_MEM); } static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_CPU); } static ssize_t pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, enum psi_res res) { struct cgroup_file_ctx *ctx = of->priv; struct psi_trigger *new; struct cgroup *cgrp; struct psi_group *psi; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; cgroup_get(cgrp); cgroup_kn_unlock(of->kn); /* Allow only one trigger per file descriptor */ if (ctx->psi.trigger) { cgroup_put(cgrp); return -EBUSY; } psi = cgroup_psi(cgrp); new = psi_trigger_create(psi, buf, res, of->file, of); if (IS_ERR(new)) { cgroup_put(cgrp); return PTR_ERR(new); } smp_store_release(&ctx->psi.trigger, new); cgroup_put(cgrp); return nbytes; } static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return pressure_write(of, buf, nbytes, PSI_IO); } static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return pressure_write(of, buf, nbytes, PSI_MEM); } static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return pressure_write(of, buf, nbytes, PSI_CPU); } #ifdef CONFIG_IRQ_TIME_ACCOUNTING static int cgroup_irq_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct psi_group *psi = cgroup_psi(cgrp); return psi_show(seq, psi, PSI_IRQ); } static ssize_t cgroup_irq_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return pressure_write(of, buf, nbytes, PSI_IRQ); } #endif static int cgroup_pressure_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; struct psi_group *psi = cgroup_psi(cgrp); seq_printf(seq, "%d\n", psi->enabled); return 0; } static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { ssize_t ret; int enable; struct cgroup *cgrp; struct psi_group *psi; ret = kstrtoint(strstrip(buf), 0, &enable); if (ret) return ret; if (enable < 0 || enable > 1) return -ERANGE; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENOENT; psi = cgroup_psi(cgrp); if (psi->enabled != enable) { int i; /* show or hide {cpu,memory,io,irq}.pressure files */ for (i = 0; i < NR_PSI_RESOURCES; i++) cgroup_file_show(&cgrp->psi_files[i], enable); psi->enabled = enable; if (enable) psi_cgroup_restart(psi); } cgroup_kn_unlock(of->kn); return nbytes; } static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, poll_table *pt) { struct cgroup_file_ctx *ctx = of->priv; return psi_trigger_poll(&ctx->psi.trigger, of->file, pt); } static void cgroup_pressure_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; psi_trigger_destroy(ctx->psi.trigger); } bool cgroup_psi_enabled(void) { if (static_branch_likely(&psi_disabled)) return false; return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0; } #else /* CONFIG_PSI */ bool cgroup_psi_enabled(void) { return false; } #endif /* CONFIG_PSI */ static int cgroup_freeze_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; seq_printf(seq, "%d\n", cgrp->freezer.freeze); return 0; } static ssize_t cgroup_freeze_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup *cgrp; ssize_t ret; int freeze; ret = kstrtoint(strstrip(buf), 0, &freeze); if (ret) return ret; if (freeze < 0 || freeze > 1) return -ERANGE; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENOENT; cgroup_freeze(cgrp, freeze); cgroup_kn_unlock(of->kn); return nbytes; } static void __cgroup_kill(struct cgroup *cgrp) { struct css_task_iter it; struct task_struct *task; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); cgrp->kill_seq++; spin_unlock_irq(&css_set_lock); css_task_iter_start(&cgrp->self, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED, &it); while ((task = css_task_iter_next(&it))) { /* Ignore kernel threads here. */ if (task->flags & PF_KTHREAD) continue; /* Skip tasks that are already dying. */ if (__fatal_signal_pending(task)) continue; send_sig(SIGKILL, task, 0); } css_task_iter_end(&it); } static void cgroup_kill(struct cgroup *cgrp) { struct cgroup_subsys_state *css; struct cgroup *dsct; lockdep_assert_held(&cgroup_mutex); cgroup_for_each_live_descendant_pre(dsct, css, cgrp) __cgroup_kill(dsct); } static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { ssize_t ret = 0; int kill; struct cgroup *cgrp; ret = kstrtoint(strstrip(buf), 0, &kill); if (ret) return ret; if (kill != 1) return -ERANGE; cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENOENT; /* * Killing is a process directed operation, i.e. the whole thread-group * is taken down so act like we do for cgroup.procs and only make this * writable in non-threaded cgroups. */ if (cgroup_is_threaded(cgrp)) ret = -EOPNOTSUPP; else cgroup_kill(cgrp); cgroup_kn_unlock(of->kn); return ret ?: nbytes; } static int cgroup_file_open(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); struct cgroup_file_ctx *ctx; int ret; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; ctx->ns = current->nsproxy->cgroup_ns; get_cgroup_ns(ctx->ns); of->priv = ctx; if (!cft->open) return 0; ret = cft->open(of); if (ret) { put_cgroup_ns(ctx->ns); kfree(ctx); } return ret; } static void cgroup_file_release(struct kernfs_open_file *of) { struct cftype *cft = of_cft(of); struct cgroup_file_ctx *ctx = of->priv; if (cft->release) cft->release(of); put_cgroup_ns(ctx->ns); kfree(ctx); } static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct cgroup_file_ctx *ctx = of->priv; struct cgroup *cgrp = kn_priv(of->kn); struct cftype *cft = of_cft(of); struct cgroup_subsys_state *css; int ret; if (!nbytes) return 0; /* * If namespaces are delegation boundaries, disallow writes to * files in an non-init namespace root from inside the namespace * except for the files explicitly marked delegatable - * eg. cgroup.procs, cgroup.threads and cgroup.subtree_control. */ if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) && !(cft->flags & CFTYPE_NS_DELEGATABLE) && ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp) return -EPERM; if (cft->write) return cft->write(of, buf, nbytes, off); /* * kernfs guarantees that a file isn't deleted with operations in * flight, which means that the matching css is and stays alive and * doesn't need to be pinned. The RCU locking is not necessary * either. It's just for the convenience of using cgroup_css(). */ rcu_read_lock(); css = cgroup_css(cgrp, cft->ss); rcu_read_unlock(); if (cft->write_u64) { unsigned long long v; ret = kstrtoull(buf, 0, &v); if (!ret) ret = cft->write_u64(css, cft, v); } else if (cft->write_s64) { long long v; ret = kstrtoll(buf, 0, &v); if (!ret) ret = cft->write_s64(css, cft, v); } else { ret = -EINVAL; } return ret ?: nbytes; } static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt) { struct cftype *cft = of_cft(of); if (cft->poll) return cft->poll(of, pt); return kernfs_generic_poll(of, pt); } static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos) { return seq_cft(seq)->seq_start(seq, ppos); } static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos) { return seq_cft(seq)->seq_next(seq, v, ppos); } static void cgroup_seqfile_stop(struct seq_file *seq, void *v) { if (seq_cft(seq)->seq_stop) seq_cft(seq)->seq_stop(seq, v); } static int cgroup_seqfile_show(struct seq_file *m, void *arg) { struct cftype *cft = seq_cft(m); struct cgroup_subsys_state *css = seq_css(m); if (cft->seq_show) return cft->seq_show(m, arg); if (cft->read_u64) seq_printf(m, "%llu\n", cft->read_u64(css, cft)); else if (cft->read_s64) seq_printf(m, "%lld\n", cft->read_s64(css, cft)); else return -EINVAL; return 0; } static struct kernfs_ops cgroup_kf_single_ops = { .atomic_write_len = PAGE_SIZE, .open = cgroup_file_open, .release = cgroup_file_release, .write = cgroup_file_write, .poll = cgroup_file_poll, .seq_show = cgroup_seqfile_show, }; static struct kernfs_ops cgroup_kf_ops = { .atomic_write_len = PAGE_SIZE, .open = cgroup_file_open, .release = cgroup_file_release, .write = cgroup_file_write, .poll = cgroup_file_poll, .seq_start = cgroup_seqfile_start, .seq_next = cgroup_seqfile_next, .seq_stop = cgroup_seqfile_stop, .seq_show = cgroup_seqfile_show, }; static void cgroup_file_notify_timer(struct timer_list *timer) { cgroup_file_notify(container_of(timer, struct cgroup_file, notify_timer)); } static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype *cft) { char name[CGROUP_FILE_NAME_MAX]; struct kernfs_node *kn; struct lock_class_key *key = NULL; #ifdef CONFIG_DEBUG_LOCK_ALLOC key = &cft->lockdep_key; #endif kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name), cgroup_file_mode(cft), current_fsuid(), current_fsgid(), 0, cft->kf_ops, cft, NULL, key); if (IS_ERR(kn)) return PTR_ERR(kn); if (cft->file_offset) { struct cgroup_file *cfile = (void *)css + cft->file_offset; timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0); spin_lock_irq(&cgroup_file_kn_lock); cfile->kn = kn; spin_unlock_irq(&cgroup_file_kn_lock); } return 0; } /** * cgroup_addrm_files - add or remove files to a cgroup directory * @css: the target css * @cgrp: the target cgroup (usually css->cgroup) * @cfts: array of cftypes to be added * @is_add: whether to add or remove * * Depending on @is_add, add or remove files defined by @cfts on @cgrp. * For removals, this function never fails. */ static int cgroup_addrm_files(struct cgroup_subsys_state *css, struct cgroup *cgrp, struct cftype cfts[], bool is_add) { struct cftype *cft, *cft_end = NULL; int ret = 0; lockdep_assert_held(&cgroup_mutex); restart: for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) { /* does cft->flags tell us to skip this file on @cgrp? */ if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp)) continue; if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp)) continue; if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp)) continue; if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp)) continue; if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug) continue; if (is_add) { ret = cgroup_add_file(css, cgrp, cft); if (ret) { pr_warn("%s: failed to add %s, err=%d\n", __func__, cft->name, ret); cft_end = cft; is_add = false; goto restart; } } else { cgroup_rm_file(cgrp, cft); } } return ret; } static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add) { struct cgroup_subsys *ss = cfts[0].ss; struct cgroup *root = &ss->root->cgrp; struct cgroup_subsys_state *css; int ret = 0; lockdep_assert_held(&cgroup_mutex); /* add/rm files for all cgroups created before */ css_for_each_descendant_pre(css, cgroup_css(root, ss)) { struct cgroup *cgrp = css->cgroup; if (!(css->flags & CSS_VISIBLE)) continue; ret = cgroup_addrm_files(css, cgrp, cfts, is_add); if (ret) break; } if (is_add && !ret) kernfs_activate(root->kn); return ret; } static void cgroup_exit_cftypes(struct cftype *cfts) { struct cftype *cft; for (cft = cfts; cft->name[0] != '\0'; cft++) { /* free copy for custom atomic_write_len, see init_cftypes() */ if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) kfree(cft->kf_ops); cft->kf_ops = NULL; cft->ss = NULL; /* revert flags set by cgroup core while adding @cfts */ cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL | __CFTYPE_ADDED); } } static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype *cft; int ret = 0; for (cft = cfts; cft->name[0] != '\0'; cft++) { struct kernfs_ops *kf_ops; WARN_ON(cft->ss || cft->kf_ops); if (cft->flags & __CFTYPE_ADDED) { ret = -EBUSY; break; } if (cft->seq_start) kf_ops = &cgroup_kf_ops; else kf_ops = &cgroup_kf_single_ops; /* * Ugh... if @cft wants a custom max_write_len, we need to * make a copy of kf_ops to set its atomic_write_len. */ if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) { kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL); if (!kf_ops) { ret = -ENOMEM; break; } kf_ops->atomic_write_len = cft->max_write_len; } cft->kf_ops = kf_ops; cft->ss = ss; cft->flags |= __CFTYPE_ADDED; } if (ret) cgroup_exit_cftypes(cfts); return ret; } static void cgroup_rm_cftypes_locked(struct cftype *cfts) { lockdep_assert_held(&cgroup_mutex); list_del(&cfts->node); cgroup_apply_cftypes(cfts, false); cgroup_exit_cftypes(cfts); } /** * cgroup_rm_cftypes - remove an array of cftypes from a subsystem * @cfts: zero-length name terminated array of cftypes * * Unregister @cfts. Files described by @cfts are removed from all * existing cgroups and all future cgroups won't have them either. This * function can be called anytime whether @cfts' subsys is attached or not. * * Returns 0 on successful unregistration, -ENOENT if @cfts is not * registered. */ int cgroup_rm_cftypes(struct cftype *cfts) { if (!cfts || cfts[0].name[0] == '\0') return 0; if (!(cfts[0].flags & __CFTYPE_ADDED)) return -ENOENT; cgroup_lock(); cgroup_rm_cftypes_locked(cfts); cgroup_unlock(); return 0; } /** * cgroup_add_cftypes - add an array of cftypes to a subsystem * @ss: target cgroup subsystem * @cfts: zero-length name terminated array of cftypes * * Register @cfts to @ss. Files described by @cfts are created for all * existing cgroups to which @ss is attached and all future cgroups will * have them too. This function can be called anytime whether @ss is * attached or not. * * Returns 0 on successful registration, -errno on failure. Note that this * function currently returns 0 as long as @cfts registration is successful * even if some file creation attempts on existing cgroups fail. */ int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { int ret; if (!cgroup_ssid_enabled(ss->id)) return 0; if (!cfts || cfts[0].name[0] == '\0') return 0; ret = cgroup_init_cftypes(ss, cfts); if (ret) return ret; cgroup_lock(); list_add_tail(&cfts->node, &ss->cfts); ret = cgroup_apply_cftypes(cfts, true); if (ret) cgroup_rm_cftypes_locked(cfts); cgroup_unlock(); return ret; } /** * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy * @ss: target cgroup subsystem * @cfts: zero-length name terminated array of cftypes * * Similar to cgroup_add_cftypes() but the added files are only used for * the default hierarchy. */ int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype *cft; for (cft = cfts; cft && cft->name[0] != '\0'; cft++) cft->flags |= __CFTYPE_ONLY_ON_DFL; return cgroup_add_cftypes(ss, cfts); } /** * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies * @ss: target cgroup subsystem * @cfts: zero-length name terminated array of cftypes * * Similar to cgroup_add_cftypes() but the added files are only used for * the legacy hierarchies. */ int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) { struct cftype *cft; for (cft = cfts; cft && cft->name[0] != '\0'; cft++) cft->flags |= __CFTYPE_NOT_ON_DFL; return cgroup_add_cftypes(ss, cfts); } /** * cgroup_file_notify - generate a file modified event for a cgroup_file * @cfile: target cgroup_file * * @cfile must have been obtained by setting cftype->file_offset. */ void cgroup_file_notify(struct cgroup_file *cfile) { unsigned long flags; spin_lock_irqsave(&cgroup_file_kn_lock, flags); if (cfile->kn) { unsigned long last = cfile->notified_at; unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV; if (time_in_range(jiffies, last, next)) { timer_reduce(&cfile->notify_timer, next); } else { kernfs_notify(cfile->kn); cfile->notified_at = jiffies; } } spin_unlock_irqrestore(&cgroup_file_kn_lock, flags); } /** * cgroup_file_show - show or hide a hidden cgroup file * @cfile: target cgroup_file obtained by setting cftype->file_offset * @show: whether to show or hide */ void cgroup_file_show(struct cgroup_file *cfile, bool show) { struct kernfs_node *kn; spin_lock_irq(&cgroup_file_kn_lock); kn = cfile->kn; kernfs_get(kn); spin_unlock_irq(&cgroup_file_kn_lock); if (kn) kernfs_show(kn, show); kernfs_put(kn); } /** * css_next_child - find the next child of a given css * @pos: the current position (%NULL to initiate traversal) * @parent: css whose children to walk * * This function returns the next child of @parent and should be called * under either cgroup_mutex or RCU read lock. The only requirement is * that @parent and @pos are accessible. The next sibling is guaranteed to * be returned regardless of their states. * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *parent) { struct cgroup_subsys_state *next; cgroup_assert_mutex_or_rcu_locked(); /* * @pos could already have been unlinked from the sibling list. * Once a cgroup is removed, its ->sibling.next is no longer * updated when its next sibling changes. CSS_RELEASED is set when * @pos is taken off list, at which time its next pointer is valid, * and, as releases are serialized, the one pointed to by the next * pointer is guaranteed to not have started release yet. This * implies that if we observe !CSS_RELEASED on @pos in this RCU * critical section, the one pointed to by its next pointer is * guaranteed to not have finished its RCU grace period even if we * have dropped rcu_read_lock() in-between iterations. * * If @pos has CSS_RELEASED set, its next pointer can't be * dereferenced; however, as each css is given a monotonically * increasing unique serial number and always appended to the * sibling list, the next one can be found by walking the parent's * children until the first css with higher serial number than * @pos's. While this path can be slower, it happens iff iteration * races against release and the race window is very small. */ if (!pos) { next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling); } else if (likely(!(pos->flags & CSS_RELEASED))) { next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling); } else { list_for_each_entry_rcu(next, &parent->children, sibling, lockdep_is_held(&cgroup_mutex)) if (next->serial_nr > pos->serial_nr) break; } /* * @next, if not pointing to the head, can be dereferenced and is * the next sibling. */ if (&next->sibling != &parent->children) return next; return NULL; } /** * css_next_descendant_pre - find the next descendant for pre-order walk * @pos: the current position (%NULL to initiate traversal) * @root: css whose descendants to walk * * To be used by css_for_each_descendant_pre(). Find the next descendant * to visit for pre-order traversal of @root's descendants. @root is * included in the iteration and the first node to be visited. * * While this function requires cgroup_mutex or RCU read locking, it * doesn't require the whole traversal to be contained in a single critical * section. Additionally, it isn't necessary to hold onto a reference to @pos. * This function will return the correct next descendant as long as both @pos * and @root are accessible and @pos is a descendant of @root. * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state * css_next_descendant_pre(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *root) { struct cgroup_subsys_state *next; cgroup_assert_mutex_or_rcu_locked(); /* if first iteration, visit @root */ if (!pos) return root; /* visit the first child if exists */ next = css_next_child(NULL, pos); if (next) return next; /* no child, visit my or the closest ancestor's next sibling */ while (pos != root) { next = css_next_child(pos, pos->parent); if (next) return next; pos = pos->parent; } return NULL; } EXPORT_SYMBOL_GPL(css_next_descendant_pre); /** * css_rightmost_descendant - return the rightmost descendant of a css * @pos: css of interest * * Return the rightmost descendant of @pos. If there's no descendant, @pos * is returned. This can be used during pre-order traversal to skip * subtree of @pos. * * While this function requires cgroup_mutex or RCU read locking, it * doesn't require the whole traversal to be contained in a single critical * section. Additionally, it isn't necessary to hold onto a reference to @pos. * This function will return the correct rightmost descendant as long as @pos * is accessible. */ struct cgroup_subsys_state * css_rightmost_descendant(struct cgroup_subsys_state *pos) { struct cgroup_subsys_state *last, *tmp; cgroup_assert_mutex_or_rcu_locked(); do { last = pos; /* ->prev isn't RCU safe, walk ->next till the end */ pos = NULL; css_for_each_child(tmp, last) pos = tmp; } while (pos); return last; } static struct cgroup_subsys_state * css_leftmost_descendant(struct cgroup_subsys_state *pos) { struct cgroup_subsys_state *last; do { last = pos; pos = css_next_child(NULL, pos); } while (pos); return last; } /** * css_next_descendant_post - find the next descendant for post-order walk * @pos: the current position (%NULL to initiate traversal) * @root: css whose descendants to walk * * To be used by css_for_each_descendant_post(). Find the next descendant * to visit for post-order traversal of @root's descendants. @root is * included in the iteration and the last node to be visited. * * While this function requires cgroup_mutex or RCU read locking, it * doesn't require the whole traversal to be contained in a single critical * section. Additionally, it isn't necessary to hold onto a reference to @pos. * This function will return the correct next descendant as long as both @pos * and @cgroup are accessible and @pos is a descendant of @cgroup. * * If a subsystem synchronizes ->css_online() and the start of iteration, a * css which finished ->css_online() is guaranteed to be visible in the * future iterations and will stay visible until the last reference is put. * A css which hasn't finished ->css_online() or already finished * ->css_offline() may show up during traversal. It's each subsystem's * responsibility to synchronize against on/offlining. */ struct cgroup_subsys_state * css_next_descendant_post(struct cgroup_subsys_state *pos, struct cgroup_subsys_state *root) { struct cgroup_subsys_state *next; cgroup_assert_mutex_or_rcu_locked(); /* if first iteration, visit leftmost descendant which may be @root */ if (!pos) return css_leftmost_descendant(root); /* if we visited @root, we're done */ if (pos == root) return NULL; /* if there's an unvisited sibling, visit its leftmost descendant */ next = css_next_child(pos, pos->parent); if (next) return css_leftmost_descendant(next); /* no sibling left, visit parent */ return pos->parent; } /** * css_has_online_children - does a css have online children * @css: the target css * * Returns %true if @css has any online children; otherwise, %false. This * function can be called from any context but the caller is responsible * for synchronizing against on/offlining as necessary. */ bool css_has_online_children(struct cgroup_subsys_state *css) { struct cgroup_subsys_state *child; bool ret = false; rcu_read_lock(); css_for_each_child(child, css) { if (child->flags & CSS_ONLINE) { ret = true; break; } } rcu_read_unlock(); return ret; } static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it) { struct list_head *l; struct cgrp_cset_link *link; struct css_set *cset; lockdep_assert_held(&css_set_lock); /* find the next threaded cset */ if (it->tcset_pos) { l = it->tcset_pos->next; if (l != it->tcset_head) { it->tcset_pos = l; return container_of(l, struct css_set, threaded_csets_node); } it->tcset_pos = NULL; } /* find the next cset */ l = it->cset_pos; l = l->next; if (l == it->cset_head) { it->cset_pos = NULL; return NULL; } if (it->ss) { cset = container_of(l, struct css_set, e_cset_node[it->ss->id]); } else { link = list_entry(l, struct cgrp_cset_link, cset_link); cset = link->cset; } it->cset_pos = l; /* initialize threaded css_set walking */ if (it->flags & CSS_TASK_ITER_THREADED) { if (it->cur_dcset) put_css_set_locked(it->cur_dcset); it->cur_dcset = cset; get_css_set(cset); it->tcset_head = &cset->threaded_csets; it->tcset_pos = &cset->threaded_csets; } return cset; } /** * css_task_iter_advance_css_set - advance a task iterator to the next css_set * @it: the iterator to advance * * Advance @it to the next css_set to walk. */ static void css_task_iter_advance_css_set(struct css_task_iter *it) { struct css_set *cset; lockdep_assert_held(&css_set_lock); /* Advance to the next non-empty css_set and find first non-empty tasks list*/ while ((cset = css_task_iter_next_css_set(it))) { if (!list_empty(&cset->tasks)) { it->cur_tasks_head = &cset->tasks; break; } else if (!list_empty(&cset->mg_tasks)) { it->cur_tasks_head = &cset->mg_tasks; break; } else if (!list_empty(&cset->dying_tasks)) { it->cur_tasks_head = &cset->dying_tasks; break; } } if (!cset) { it->task_pos = NULL; return; } it->task_pos = it->cur_tasks_head->next; /* * We don't keep css_sets locked across iteration steps and thus * need to take steps to ensure that iteration can be resumed after * the lock is re-acquired. Iteration is performed at two levels - * css_sets and tasks in them. * * Once created, a css_set never leaves its cgroup lists, so a * pinned css_set is guaranteed to stay put and we can resume * iteration afterwards. * * Tasks may leave @cset across iteration steps. This is resolved * by registering each iterator with the css_set currently being * walked and making css_set_move_task() advance iterators whose * next task is leaving. */ if (it->cur_cset) { list_del(&it->iters_node); put_css_set_locked(it->cur_cset); } get_css_set(cset); it->cur_cset = cset; list_add(&it->iters_node, &cset->task_iters); } static void css_task_iter_skip(struct css_task_iter *it, struct task_struct *task) { lockdep_assert_held(&css_set_lock); if (it->task_pos == &task->cg_list) { it->task_pos = it->task_pos->next; it->flags |= CSS_TASK_ITER_SKIPPED; } } static void css_task_iter_advance(struct css_task_iter *it) { struct task_struct *task; lockdep_assert_held(&css_set_lock); repeat: if (it->task_pos) { /* * Advance iterator to find next entry. We go through cset * tasks, mg_tasks and dying_tasks, when consumed we move onto * the next cset. */ if (it->flags & CSS_TASK_ITER_SKIPPED) it->flags &= ~CSS_TASK_ITER_SKIPPED; else it->task_pos = it->task_pos->next; if (it->task_pos == &it->cur_cset->tasks) { it->cur_tasks_head = &it->cur_cset->mg_tasks; it->task_pos = it->cur_tasks_head->next; } if (it->task_pos == &it->cur_cset->mg_tasks) { it->cur_tasks_head = &it->cur_cset->dying_tasks; it->task_pos = it->cur_tasks_head->next; } if (it->task_pos == &it->cur_cset->dying_tasks) css_task_iter_advance_css_set(it); } else { /* called from start, proceed to the first cset */ css_task_iter_advance_css_set(it); } if (!it->task_pos) return; task = list_entry(it->task_pos, struct task_struct, cg_list); if (it->flags & CSS_TASK_ITER_PROCS) { /* if PROCS, skip over tasks which aren't group leaders */ if (!thread_group_leader(task)) goto repeat; /* and dying leaders w/o live member threads */ if (it->cur_tasks_head == &it->cur_cset->dying_tasks && !atomic_read(&task->signal->live)) goto repeat; } else { /* skip all dying ones */ if (it->cur_tasks_head == &it->cur_cset->dying_tasks) goto repeat; } } /** * css_task_iter_start - initiate task iteration * @css: the css to walk tasks of * @flags: CSS_TASK_ITER_* flags * @it: the task iterator to use * * Initiate iteration through the tasks of @css. The caller can call * css_task_iter_next() to walk through the tasks until the function * returns NULL. On completion of iteration, css_task_iter_end() must be * called. */ void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags, struct css_task_iter *it) { unsigned long irqflags; memset(it, 0, sizeof(*it)); spin_lock_irqsave(&css_set_lock, irqflags); it->ss = css->ss; it->flags = flags; if (CGROUP_HAS_SUBSYS_CONFIG && it->ss) it->cset_pos = &css->cgroup->e_csets[css->ss->id]; else it->cset_pos = &css->cgroup->cset_links; it->cset_head = it->cset_pos; css_task_iter_advance(it); spin_unlock_irqrestore(&css_set_lock, irqflags); } /** * css_task_iter_next - return the next task for the iterator * @it: the task iterator being iterated * * The "next" function for task iteration. @it should have been * initialized via css_task_iter_start(). Returns NULL when the iteration * reaches the end. */ struct task_struct *css_task_iter_next(struct css_task_iter *it) { unsigned long irqflags; if (it->cur_task) { put_task_struct(it->cur_task); it->cur_task = NULL; } spin_lock_irqsave(&css_set_lock, irqflags); /* @it may be half-advanced by skips, finish advancing */ if (it->flags & CSS_TASK_ITER_SKIPPED) css_task_iter_advance(it); if (it->task_pos) { it->cur_task = list_entry(it->task_pos, struct task_struct, cg_list); get_task_struct(it->cur_task); css_task_iter_advance(it); } spin_unlock_irqrestore(&css_set_lock, irqflags); return it->cur_task; } /** * css_task_iter_end - finish task iteration * @it: the task iterator to finish * * Finish task iteration started by css_task_iter_start(). */ void css_task_iter_end(struct css_task_iter *it) { unsigned long irqflags; if (it->cur_cset) { spin_lock_irqsave(&css_set_lock, irqflags); list_del(&it->iters_node); put_css_set_locked(it->cur_cset); spin_unlock_irqrestore(&css_set_lock, irqflags); } if (it->cur_dcset) put_css_set(it->cur_dcset); if (it->cur_task) put_task_struct(it->cur_task); } static void cgroup_procs_release(struct kernfs_open_file *of) { struct cgroup_file_ctx *ctx = of->priv; if (ctx->procs.started) css_task_iter_end(&ctx->procs.iter); } static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos) { struct kernfs_open_file *of = s->private; struct cgroup_file_ctx *ctx = of->priv; if (pos) (*pos)++; return css_task_iter_next(&ctx->procs.iter); } static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos, unsigned int iter_flags) { struct kernfs_open_file *of = s->private; struct cgroup *cgrp = seq_css(s)->cgroup; struct cgroup_file_ctx *ctx = of->priv; struct css_task_iter *it = &ctx->procs.iter; /* * When a seq_file is seeked, it's always traversed sequentially * from position 0, so we can simply keep iterating on !0 *pos. */ if (!ctx->procs.started) { if (WARN_ON_ONCE((*pos))) return ERR_PTR(-EINVAL); css_task_iter_start(&cgrp->self, iter_flags, it); ctx->procs.started = true; } else if (!(*pos)) { css_task_iter_end(it); css_task_iter_start(&cgrp->self, iter_flags, it); } else return it->cur_task; return cgroup_procs_next(s, NULL, NULL); } static void *cgroup_procs_start(struct seq_file *s, loff_t *pos) { struct cgroup *cgrp = seq_css(s)->cgroup; /* * All processes of a threaded subtree belong to the domain cgroup * of the subtree. Only threads can be distributed across the * subtree. Reject reads on cgroup.procs in the subtree proper. * They're always empty anyway. */ if (cgroup_is_threaded(cgrp)) return ERR_PTR(-EOPNOTSUPP); return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED); } static int cgroup_procs_show(struct seq_file *s, void *v) { seq_printf(s, "%d\n", task_pid_vnr(v)); return 0; } static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb) { int ret; struct inode *inode; lockdep_assert_held(&cgroup_mutex); inode = kernfs_get_inode(sb, cgrp->procs_file.kn); if (!inode) return -ENOMEM; ret = inode_permission(&nop_mnt_idmap, inode, MAY_WRITE); iput(inode); return ret; } static int cgroup_procs_write_permission(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, struct super_block *sb, struct cgroup_namespace *ns) { struct cgroup *com_cgrp = src_cgrp; int ret; lockdep_assert_held(&cgroup_mutex); /* find the common ancestor */ while (!cgroup_is_descendant(dst_cgrp, com_cgrp)) com_cgrp = cgroup_parent(com_cgrp); /* %current should be authorized to migrate to the common ancestor */ ret = cgroup_may_write(com_cgrp, sb); if (ret) return ret; /* * If namespaces are delegation boundaries, %current must be able * to see both source and destination cgroups from its namespace. */ if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) && (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) || !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp))) return -ENOENT; return 0; } static int cgroup_attach_permissions(struct cgroup *src_cgrp, struct cgroup *dst_cgrp, struct super_block *sb, bool threadgroup, struct cgroup_namespace *ns) { int ret = 0; ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns); if (ret) return ret; ret = cgroup_migrate_vet_dst(dst_cgrp); if (ret) return ret; if (!threadgroup && (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp)) ret = -EOPNOTSUPP; return ret; } static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, bool threadgroup) { struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; const struct cred *saved_cred; ssize_t ret; bool threadgroup_locked; dst_cgrp = cgroup_kn_lock_live(of->kn, false); if (!dst_cgrp) return -ENODEV; task = cgroup_procs_write_start(buf, threadgroup, &threadgroup_locked); ret = PTR_ERR_OR_ZERO(task); if (ret) goto out_unlock; /* find the source cgroup */ spin_lock_irq(&css_set_lock); src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root); spin_unlock_irq(&css_set_lock); /* * Process and thread migrations follow same delegation rule. Check * permissions using the credentials from file open to protect against * inherited fd attacks. */ saved_cred = override_creds(of->file->f_cred); ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup, ctx->ns); revert_creds(saved_cred); if (ret) goto out_finish; ret = cgroup_attach_task(dst_cgrp, task, threadgroup); out_finish: cgroup_procs_write_finish(task, threadgroup_locked); out_unlock: cgroup_kn_unlock(of->kn); return ret; } static ssize_t cgroup_procs_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return __cgroup_procs_write(of, buf, true) ?: nbytes; } static void *cgroup_threads_start(struct seq_file *s, loff_t *pos) { return __cgroup_procs_start(s, pos, 0); } static ssize_t cgroup_threads_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { return __cgroup_procs_write(of, buf, false) ?: nbytes; } /* cgroup core interface files for the default hierarchy */ static struct cftype cgroup_base_files[] = { { .name = "cgroup.type", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_type_show, .write = cgroup_type_write, }, { .name = "cgroup.procs", .flags = CFTYPE_NS_DELEGATABLE, .file_offset = offsetof(struct cgroup, procs_file), .release = cgroup_procs_release, .seq_start = cgroup_procs_start, .seq_next = cgroup_procs_next, .seq_show = cgroup_procs_show, .write = cgroup_procs_write, }, { .name = "cgroup.threads", .flags = CFTYPE_NS_DELEGATABLE, .release = cgroup_procs_release, .seq_start = cgroup_threads_start, .seq_next = cgroup_procs_next, .seq_show = cgroup_procs_show, .write = cgroup_threads_write, }, { .name = "cgroup.controllers", .seq_show = cgroup_controllers_show, }, { .name = "cgroup.subtree_control", .flags = CFTYPE_NS_DELEGATABLE, .seq_show = cgroup_subtree_control_show, .write = cgroup_subtree_control_write, }, { .name = "cgroup.events", .flags = CFTYPE_NOT_ON_ROOT, .file_offset = offsetof(struct cgroup, events_file), .seq_show = cgroup_events_show, }, { .name = "cgroup.max.descendants", .seq_show = cgroup_max_descendants_show, .write = cgroup_max_descendants_write, }, { .name = "cgroup.max.depth", .seq_show = cgroup_max_depth_show, .write = cgroup_max_depth_write, }, { .name = "cgroup.stat", .seq_show = cgroup_stat_show, }, { .name = "cgroup.freeze", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = cgroup_freeze_show, .write = cgroup_freeze_write, }, { .name = "cgroup.kill", .flags = CFTYPE_NOT_ON_ROOT, .write = cgroup_kill_write, }, { .name = "cpu.stat", .seq_show = cpu_stat_show, }, { .name = "cpu.stat.local", .seq_show = cpu_local_stat_show, }, { } /* terminate */ }; static struct cftype cgroup_psi_files[] = { #ifdef CONFIG_PSI { .name = "io.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_IO]), .seq_show = cgroup_io_pressure_show, .write = cgroup_io_pressure_write, .poll = cgroup_pressure_poll, .release = cgroup_pressure_release, }, { .name = "memory.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_MEM]), .seq_show = cgroup_memory_pressure_show, .write = cgroup_memory_pressure_write, .poll = cgroup_pressure_poll, .release = cgroup_pressure_release, }, { .name = "cpu.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_CPU]), .seq_show = cgroup_cpu_pressure_show, .write = cgroup_cpu_pressure_write, .poll = cgroup_pressure_poll, .release = cgroup_pressure_release, }, #ifdef CONFIG_IRQ_TIME_ACCOUNTING { .name = "irq.pressure", .file_offset = offsetof(struct cgroup, psi_files[PSI_IRQ]), .seq_show = cgroup_irq_pressure_show, .write = cgroup_irq_pressure_write, .poll = cgroup_pressure_poll, .release = cgroup_pressure_release, }, #endif { .name = "cgroup.pressure", .seq_show = cgroup_pressure_show, .write = cgroup_pressure_write, }, #endif /* CONFIG_PSI */ { } /* terminate */ }; /* * css destruction is four-stage process. * * 1. Destruction starts. Killing of the percpu_ref is initiated. * Implemented in kill_css(). * * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs * and thus css_tryget_online() is guaranteed to fail, the css can be * offlined by invoking offline_css(). After offlining, the base ref is * put. Implemented in css_killed_work_fn(). * * 3. When the percpu_ref reaches zero, the only possible remaining * accessors are inside RCU read sections. css_release() schedules the * RCU callback. * * 4. After the grace period, the css can be freed. Implemented in * css_free_rwork_fn(). * * It is actually hairier because both step 2 and 4 require process context * and thus involve punting to css->destroy_work adding two additional * steps to the already complex sequence. */ static void css_free_rwork_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(to_rcu_work(work), struct cgroup_subsys_state, destroy_rwork); struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; percpu_ref_exit(&css->refcnt); if (ss) { /* css free path */ struct cgroup_subsys_state *parent = css->parent; int id = css->id; ss->css_free(css); cgroup_idr_remove(&ss->css_idr, id); cgroup_put(cgrp); if (parent) css_put(parent); } else { /* cgroup free path */ atomic_dec(&cgrp->root->nr_cgrps); if (!cgroup_on_dfl(cgrp)) cgroup1_pidlist_destroy_all(cgrp); cancel_work_sync(&cgrp->release_agent_work); bpf_cgrp_storage_free(cgrp); if (cgroup_parent(cgrp)) { /* * We get a ref to the parent, and put the ref when * this cgroup is being freed, so it's guaranteed * that the parent won't be destroyed before its * children. */ cgroup_put(cgroup_parent(cgrp)); kernfs_put(cgrp->kn); psi_cgroup_free(cgrp); cgroup_rstat_exit(cgrp); kfree(cgrp); } else { /* * This is root cgroup's refcnt reaching zero, * which indicates that the root should be * released. */ cgroup_destroy_root(cgrp->root); } } } static void css_release_work_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, destroy_work); struct cgroup_subsys *ss = css->ss; struct cgroup *cgrp = css->cgroup; cgroup_lock(); css->flags |= CSS_RELEASED; list_del_rcu(&css->sibling); if (ss) { struct cgroup *parent_cgrp; /* css release path */ if (!list_empty(&css->rstat_css_node)) { cgroup_rstat_flush(cgrp); list_del_rcu(&css->rstat_css_node); } cgroup_idr_replace(&ss->css_idr, NULL, css->id); if (ss->css_released) ss->css_released(css); cgrp->nr_dying_subsys[ss->id]--; /* * When a css is released and ready to be freed, its * nr_descendants must be zero. However, the corresponding * cgrp->nr_dying_subsys[ss->id] may not be 0 if a subsystem * is activated and deactivated multiple times with one or * more of its previous activation leaving behind dying csses. */ WARN_ON_ONCE(css->nr_descendants); parent_cgrp = cgroup_parent(cgrp); while (parent_cgrp) { parent_cgrp->nr_dying_subsys[ss->id]--; parent_cgrp = cgroup_parent(parent_cgrp); } } else { struct cgroup *tcgrp; /* cgroup release path */ TRACE_CGROUP_PATH(release, cgrp); cgroup_rstat_flush(cgrp); spin_lock_irq(&css_set_lock); for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) tcgrp->nr_dying_descendants--; spin_unlock_irq(&css_set_lock); /* * There are two control paths which try to determine * cgroup from dentry without going through kernfs - * cgroupstats_build() and css_tryget_online_from_dir(). * Those are supported by RCU protecting clearing of * cgrp->kn->priv backpointer. */ if (cgrp->kn) RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv, NULL); } cgroup_unlock(); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); } static void css_release(struct percpu_ref *ref) { struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); INIT_WORK(&css->destroy_work, css_release_work_fn); queue_work(cgroup_destroy_wq, &css->destroy_work); } static void init_and_link_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss, struct cgroup *cgrp) { lockdep_assert_held(&cgroup_mutex); cgroup_get_live(cgrp); memset(css, 0, sizeof(*css)); css->cgroup = cgrp; css->ss = ss; css->id = -1; INIT_LIST_HEAD(&css->sibling); INIT_LIST_HEAD(&css->children); INIT_LIST_HEAD(&css->rstat_css_node); css->serial_nr = css_serial_nr_next++; atomic_set(&css->online_cnt, 0); if (cgroup_parent(cgrp)) { css->parent = cgroup_css(cgroup_parent(cgrp), ss); css_get(css->parent); } if (ss->css_rstat_flush) list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list); BUG_ON(cgroup_css(cgrp, ss)); } /* invoke ->css_online() on a new CSS and mark it online if successful */ static int online_css(struct cgroup_subsys_state *css) { struct cgroup_subsys *ss = css->ss; int ret = 0; lockdep_assert_held(&cgroup_mutex); if (ss->css_online) ret = ss->css_online(css); if (!ret) { css->flags |= CSS_ONLINE; rcu_assign_pointer(css->cgroup->subsys[ss->id], css); atomic_inc(&css->online_cnt); if (css->parent) { atomic_inc(&css->parent->online_cnt); while ((css = css->parent)) css->nr_descendants++; } } return ret; } /* if the CSS is online, invoke ->css_offline() on it and mark it offline */ static void offline_css(struct cgroup_subsys_state *css) { struct cgroup_subsys *ss = css->ss; lockdep_assert_held(&cgroup_mutex); if (!(css->flags & CSS_ONLINE)) return; if (ss->css_offline) ss->css_offline(css); css->flags &= ~CSS_ONLINE; RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL); wake_up_all(&css->cgroup->offline_waitq); css->cgroup->nr_dying_subsys[ss->id]++; /* * Parent css and cgroup cannot be freed until after the freeing * of child css, see css_free_rwork_fn(). */ while ((css = css->parent)) { css->nr_descendants--; css->cgroup->nr_dying_subsys[ss->id]++; } } /** * css_create - create a cgroup_subsys_state * @cgrp: the cgroup new css will be associated with * @ss: the subsys of new css * * Create a new css associated with @cgrp - @ss pair. On success, the new * css is online and installed in @cgrp. This function doesn't create the * interface files. Returns 0 on success, -errno on failure. */ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, struct cgroup_subsys *ss) { struct cgroup *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss); struct cgroup_subsys_state *css; int err; lockdep_assert_held(&cgroup_mutex); css = ss->css_alloc(parent_css); if (!css) css = ERR_PTR(-ENOMEM); if (IS_ERR(css)) return css; init_and_link_css(css, ss, cgrp); err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL); if (err) goto err_free_css; err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL); if (err < 0) goto err_free_css; css->id = err; /* @css is ready to be brought online now, make it visible */ list_add_tail_rcu(&css->sibling, &parent_css->children); cgroup_idr_replace(&ss->css_idr, css, css->id); err = online_css(css); if (err) goto err_list_del; return css; err_list_del: list_del_rcu(&css->sibling); err_free_css: list_del_rcu(&css->rstat_css_node); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); } /* * The returned cgroup is fully initialized including its control mask, but * it doesn't have the control mask applied. */ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name, umode_t mode) { struct cgroup_root *root = parent->root; struct cgroup *cgrp, *tcgrp; struct kernfs_node *kn; int level = parent->level + 1; int ret; /* allocate the cgroup and its ID, 0 is reserved for the root */ cgrp = kzalloc(struct_size(cgrp, ancestors, (level + 1)), GFP_KERNEL); if (!cgrp) return ERR_PTR(-ENOMEM); ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL); if (ret) goto out_free_cgrp; ret = cgroup_rstat_init(cgrp); if (ret) goto out_cancel_ref; /* create the directory */ kn = kernfs_create_dir_ns(parent->kn, name, mode, current_fsuid(), current_fsgid(), cgrp, NULL); if (IS_ERR(kn)) { ret = PTR_ERR(kn); goto out_stat_exit; } cgrp->kn = kn; init_cgroup_housekeeping(cgrp); cgrp->self.parent = &parent->self; cgrp->root = root; cgrp->level = level; ret = psi_cgroup_alloc(cgrp); if (ret) goto out_kernfs_remove; if (cgrp->root == &cgrp_dfl_root) { ret = cgroup_bpf_inherit(cgrp); if (ret) goto out_psi_free; } /* * New cgroup inherits effective freeze counter, and * if the parent has to be frozen, the child has too. */ cgrp->freezer.e_freeze = parent->freezer.e_freeze; if (cgrp->freezer.e_freeze) { /* * Set the CGRP_FREEZE flag, so when a process will be * attached to the child cgroup, it will become frozen. * At this point the new cgroup is unpopulated, so we can * consider it frozen immediately. */ set_bit(CGRP_FREEZE, &cgrp->flags); set_bit(CGRP_FROZEN, &cgrp->flags); } spin_lock_irq(&css_set_lock); for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) { cgrp->ancestors[tcgrp->level] = tcgrp; if (tcgrp != cgrp) { tcgrp->nr_descendants++; /* * If the new cgroup is frozen, all ancestor cgroups * get a new frozen descendant, but their state can't * change because of this. */ if (cgrp->freezer.e_freeze) tcgrp->freezer.nr_frozen_descendants++; } } spin_unlock_irq(&css_set_lock); if (notify_on_release(parent)) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); cgrp->self.serial_nr = css_serial_nr_next++; /* allocation complete, commit to creation */ list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children); atomic_inc(&root->nr_cgrps); cgroup_get_live(parent); /* * On the default hierarchy, a child doesn't automatically inherit * subtree_control from the parent. Each is configured manually. */ if (!cgroup_on_dfl(cgrp)) cgrp->subtree_control = cgroup_control(cgrp); cgroup_propagate_control(cgrp); return cgrp; out_psi_free: psi_cgroup_free(cgrp); out_kernfs_remove: kernfs_remove(cgrp->kn); out_stat_exit: cgroup_rstat_exit(cgrp); out_cancel_ref: percpu_ref_exit(&cgrp->self.refcnt); out_free_cgrp: kfree(cgrp); return ERR_PTR(ret); } static bool cgroup_check_hierarchy_limits(struct cgroup *parent) { struct cgroup *cgroup; int ret = false; int level = 0; lockdep_assert_held(&cgroup_mutex); for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) { if (cgroup->nr_descendants >= cgroup->max_descendants) goto fail; if (level >= cgroup->max_depth) goto fail; level++; } ret = true; fail: return ret; } int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode) { struct cgroup *parent, *cgrp; int ret; /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */ if (strchr(name, '\n')) return -EINVAL; parent = cgroup_kn_lock_live(parent_kn, false); if (!parent) return -ENODEV; if (!cgroup_check_hierarchy_limits(parent)) { ret = -EAGAIN; goto out_unlock; } cgrp = cgroup_create(parent, name, mode); if (IS_ERR(cgrp)) { ret = PTR_ERR(cgrp); goto out_unlock; } /* * This extra ref will be put in css_free_rwork_fn() and guarantees * that @cgrp->kn is always accessible. */ kernfs_get(cgrp->kn); ret = css_populate_dir(&cgrp->self); if (ret) goto out_destroy; ret = cgroup_apply_control_enable(cgrp); if (ret) goto out_destroy; TRACE_CGROUP_PATH(mkdir, cgrp); /* let's create and online css's */ kernfs_activate(cgrp->kn); ret = 0; goto out_unlock; out_destroy: cgroup_destroy_locked(cgrp); out_unlock: cgroup_kn_unlock(parent_kn); return ret; } /* * This is called when the refcnt of a css is confirmed to be killed. * css_tryget_online() is now guaranteed to fail. Tell the subsystem to * initiate destruction and put the css ref from kill_css(). */ static void css_killed_work_fn(struct work_struct *work) { struct cgroup_subsys_state *css = container_of(work, struct cgroup_subsys_state, destroy_work); cgroup_lock(); do { offline_css(css); css_put(css); /* @css can't go away while we're holding cgroup_mutex */ css = css->parent; } while (css && atomic_dec_and_test(&css->online_cnt)); cgroup_unlock(); } /* css kill confirmation processing requires process context, bounce */ static void css_killed_ref_fn(struct percpu_ref *ref) { struct cgroup_subsys_state *css = container_of(ref, struct cgroup_subsys_state, refcnt); if (atomic_dec_and_test(&css->online_cnt)) { INIT_WORK(&css->destroy_work, css_killed_work_fn); queue_work(cgroup_destroy_wq, &css->destroy_work); } } /** * kill_css - destroy a css * @css: css to destroy * * This function initiates destruction of @css by removing cgroup interface * files and putting its base reference. ->css_offline() will be invoked * asynchronously once css_tryget_online() is guaranteed to fail and when * the reference count reaches zero, @css will be released. */ static void kill_css(struct cgroup_subsys_state *css) { lockdep_assert_held(&cgroup_mutex); if (css->flags & CSS_DYING) return; /* * Call css_killed(), if defined, before setting the CSS_DYING flag */ if (css->ss->css_killed) css->ss->css_killed(css); css->flags |= CSS_DYING; /* * This must happen before css is disassociated with its cgroup. * See seq_css() for details. */ css_clear_dir(css); /* * Killing would put the base ref, but we need to keep it alive * until after ->css_offline(). */ css_get(css); /* * cgroup core guarantees that, by the time ->css_offline() is * invoked, no new css reference will be given out via * css_tryget_online(). We can't simply call percpu_ref_kill() and * proceed to offlining css's because percpu_ref_kill() doesn't * guarantee that the ref is seen as killed on all CPUs on return. * * Use percpu_ref_kill_and_confirm() to get notifications as each * css is confirmed to be seen as killed on all CPUs. */ percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn); } /** * cgroup_destroy_locked - the first stage of cgroup destruction * @cgrp: cgroup to be destroyed * * css's make use of percpu refcnts whose killing latency shouldn't be * exposed to userland and are RCU protected. Also, cgroup core needs to * guarantee that css_tryget_online() won't succeed by the time * ->css_offline() is invoked. To satisfy all the requirements, * destruction is implemented in the following two steps. * * s1. Verify @cgrp can be destroyed and mark it dying. Remove all * userland visible parts and start killing the percpu refcnts of * css's. Set up so that the next stage will be kicked off once all * the percpu refcnts are confirmed to be killed. * * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the * rest of destruction. Once all cgroup references are gone, the * cgroup is RCU-freed. * * This function implements s1. After this step, @cgrp is gone as far as * the userland is concerned and a new cgroup with the same name may be * created. As cgroup doesn't care about the names internally, this * doesn't cause any problem. */ static int cgroup_destroy_locked(struct cgroup *cgrp) __releases(&cgroup_mutex) __acquires(&cgroup_mutex) { struct cgroup *tcgrp, *parent = cgroup_parent(cgrp); struct cgroup_subsys_state *css; struct cgrp_cset_link *link; int ssid; lockdep_assert_held(&cgroup_mutex); /* * Only migration can raise populated from zero and we're already * holding cgroup_mutex. */ if (cgroup_is_populated(cgrp)) return -EBUSY; /* * Make sure there's no live children. We can't test emptiness of * ->self.children as dead children linger on it while being * drained; otherwise, "rmdir parent/child parent" may fail. */ if (css_has_online_children(&cgrp->self)) return -EBUSY; /* * Mark @cgrp and the associated csets dead. The former prevents * further task migration and child creation by disabling * cgroup_kn_lock_live(). The latter makes the csets ignored by * the migration path. */ cgrp->self.flags &= ~CSS_ONLINE; spin_lock_irq(&css_set_lock); list_for_each_entry(link, &cgrp->cset_links, cset_link) link->cset->dead = true; spin_unlock_irq(&css_set_lock); /* initiate massacre of all css's */ for_each_css(css, ssid, cgrp) kill_css(css); /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */ css_clear_dir(&cgrp->self); kernfs_remove(cgrp->kn); if (cgroup_is_threaded(cgrp)) parent->nr_threaded_children--; spin_lock_irq(&css_set_lock); for (tcgrp = parent; tcgrp; tcgrp = cgroup_parent(tcgrp)) { tcgrp->nr_descendants--; tcgrp->nr_dying_descendants++; /* * If the dying cgroup is frozen, decrease frozen descendants * counters of ancestor cgroups. */ if (test_bit(CGRP_FROZEN, &cgrp->flags)) tcgrp->freezer.nr_frozen_descendants--; } spin_unlock_irq(&css_set_lock); cgroup1_check_for_release(parent); if (cgrp->root == &cgrp_dfl_root) cgroup_bpf_offline(cgrp); /* put the base reference */ percpu_ref_kill(&cgrp->self.refcnt); return 0; }; int cgroup_rmdir(struct kernfs_node *kn) { struct cgroup *cgrp; int ret = 0; cgrp = cgroup_kn_lock_live(kn, false); if (!cgrp) return 0; ret = cgroup_destroy_locked(cgrp); if (!ret) TRACE_CGROUP_PATH(rmdir, cgrp); cgroup_kn_unlock(kn); return ret; } static struct kernfs_syscall_ops cgroup_kf_syscall_ops = { .show_options = cgroup_show_options, .mkdir = cgroup_mkdir, .rmdir = cgroup_rmdir, .show_path = cgroup_show_path, }; static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early) { struct cgroup_subsys_state *css; pr_debug("Initializing cgroup subsys %s\n", ss->name); cgroup_lock(); idr_init(&ss->css_idr); INIT_LIST_HEAD(&ss->cfts); /* Create the root cgroup state for this subsystem */ ss->root = &cgrp_dfl_root; css = ss->css_alloc(NULL); /* We don't handle early failures gracefully */ BUG_ON(IS_ERR(css)); init_and_link_css(css, ss, &cgrp_dfl_root.cgrp); /* * Root csses are never destroyed and we can't initialize * percpu_ref during early init. Disable refcnting. */ css->flags |= CSS_NO_REF; if (early) { /* allocation can't be done safely during early init */ css->id = 1; } else { css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); } /* Update the init_css_set to contain a subsys * pointer to this state - since the subsystem is * newly registered, all tasks and hence the * init_css_set is in the subsystem's root cgroup. */ init_css_set.subsys[ss->id] = css; have_fork_callback |= (bool)ss->fork << ss->id; have_exit_callback |= (bool)ss->exit << ss->id; have_release_callback |= (bool)ss->release << ss->id; have_canfork_callback |= (bool)ss->can_fork << ss->id; /* At system boot, before all subsystems have been * registered, no tasks have been forked, so we don't * need to invoke fork callbacks here. */ BUG_ON(!list_empty(&init_task.tasks)); BUG_ON(online_css(css)); cgroup_unlock(); } /** * cgroup_init_early - cgroup initialization at system boot * * Initialize cgroups at system boot, and initialize any * subsystems that request early init. */ int __init cgroup_init_early(void) { static struct cgroup_fs_context __initdata ctx; struct cgroup_subsys *ss; int i; ctx.root = &cgrp_dfl_root; init_cgroup_root(&ctx); cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF; RCU_INIT_POINTER(init_task.cgroups, &init_css_set); for_each_subsys(ss, i) { WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id, "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n", i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free, ss->id, ss->name); WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN, "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]); ss->id = i; ss->name = cgroup_subsys_name[i]; if (!ss->legacy_name) ss->legacy_name = cgroup_subsys_name[i]; if (ss->early_init) cgroup_init_subsys(ss, true); } return 0; } /** * cgroup_init - cgroup initialization * * Register cgroup filesystem and /proc file, and initialize * any subsystems that didn't request early init. */ int __init cgroup_init(void) { struct cgroup_subsys *ss; int ssid; BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16); BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup_psi_files)); BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files)); cgroup_rstat_boot(); get_user_ns(init_cgroup_ns.user_ns); cgroup_lock(); /* * Add init_css_set to the hash table so that dfl_root can link to * it during init. */ hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0)); cgroup_unlock(); for_each_subsys(ss, ssid) { if (ss->early_init) { struct cgroup_subsys_state *css = init_css_set.subsys[ss->id]; css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL); BUG_ON(css->id < 0); } else { cgroup_init_subsys(ss, false); } list_add_tail(&init_css_set.e_cset_node[ssid], &cgrp_dfl_root.cgrp.e_csets[ssid]); /* * Setting dfl_root subsys_mask needs to consider the * disabled flag and cftype registration needs kmalloc, * both of which aren't available during early_init. */ if (!cgroup_ssid_enabled(ssid)) continue; if (cgroup1_ssid_disabled(ssid)) pr_info("Disabling %s control group subsystem in v1 mounts\n", ss->legacy_name); cgrp_dfl_root.subsys_mask |= 1 << ss->id; /* implicit controllers must be threaded too */ WARN_ON(ss->implicit_on_dfl && !ss->threaded); if (ss->implicit_on_dfl) cgrp_dfl_implicit_ss_mask |= 1 << ss->id; else if (!ss->dfl_cftypes) cgrp_dfl_inhibit_ss_mask |= 1 << ss->id; if (ss->threaded) cgrp_dfl_threaded_ss_mask |= 1 << ss->id; if (ss->dfl_cftypes == ss->legacy_cftypes) { WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes)); } else { WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes)); WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes)); } if (ss->bind) ss->bind(init_css_set.subsys[ssid]); cgroup_lock(); css_populate_dir(init_css_set.subsys[ssid]); cgroup_unlock(); } /* init_css_set.subsys[] has been updated, re-hash */ hash_del(&init_css_set.hlist); hash_add(css_set_table, &init_css_set.hlist, css_set_hash(init_css_set.subsys)); WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup")); WARN_ON(register_filesystem(&cgroup_fs_type)); WARN_ON(register_filesystem(&cgroup2_fs_type)); WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show)); #ifdef CONFIG_CPUSETS_V1 WARN_ON(register_filesystem(&cpuset_fs_type)); #endif return 0; } static int __init cgroup_wq_init(void) { /* * There isn't much point in executing destruction path in * parallel. Good chunk is serialized with cgroup_mutex anyway. * Use 1 for @max_active. * * We would prefer to do this in cgroup_init() above, but that * is called before init_workqueues(): so leave this until after. */ cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); BUG_ON(!cgroup_destroy_wq); return 0; } core_initcall(cgroup_wq_init); void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) { struct kernfs_node *kn; kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); if (!kn) return; kernfs_path(kn, buf, buflen); kernfs_put(kn); } /* * cgroup_get_from_id : get the cgroup associated with cgroup id * @id: cgroup id * On success return the cgrp or ERR_PTR on failure * Only cgroups within current task's cgroup NS are valid. */ struct cgroup *cgroup_get_from_id(u64 id) { struct kernfs_node *kn; struct cgroup *cgrp, *root_cgrp; kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id); if (!kn) return ERR_PTR(-ENOENT); if (kernfs_type(kn) != KERNFS_DIR) { kernfs_put(kn); return ERR_PTR(-ENOENT); } rcu_read_lock(); cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); if (cgrp && !cgroup_tryget(cgrp)) cgrp = NULL; rcu_read_unlock(); kernfs_put(kn); if (!cgrp) return ERR_PTR(-ENOENT); root_cgrp = current_cgns_cgroup_dfl(); if (!cgroup_is_descendant(cgrp, root_cgrp)) { cgroup_put(cgrp); return ERR_PTR(-ENOENT); } return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_id); /* * proc_cgroup_show() * - Print task's cgroup paths into seq_file, one line for each hierarchy * - Used for /proc/<pid>/cgroup. */ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk) { char *buf; int retval; struct cgroup_root *root; retval = -ENOMEM; buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!buf) goto out; rcu_read_lock(); spin_lock_irq(&css_set_lock); for_each_root(root) { struct cgroup_subsys *ss; struct cgroup *cgrp; int ssid, count = 0; if (root == &cgrp_dfl_root && !READ_ONCE(cgrp_dfl_visible)) continue; cgrp = task_cgroup_from_root(tsk, root); /* The root has already been unmounted. */ if (!cgrp) continue; seq_printf(m, "%d:", root->hierarchy_id); if (root != &cgrp_dfl_root) for_each_subsys(ss, ssid) if (root->subsys_mask & (1 << ssid)) seq_printf(m, "%s%s", count++ ? "," : "", ss->legacy_name); if (strlen(root->name)) seq_printf(m, "%sname=%s", count ? "," : "", root->name); seq_putc(m, ':'); /* * On traditional hierarchies, all zombie tasks show up as * belonging to the root cgroup. On the default hierarchy, * while a zombie doesn't show up in "cgroup.procs" and * thus can't be migrated, its /proc/PID/cgroup keeps * reporting the cgroup it belonged to before exiting. If * the cgroup is removed before the zombie is reaped, * " (deleted)" is appended to the cgroup path. */ if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) { retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX, current->nsproxy->cgroup_ns); if (retval == -E2BIG) retval = -ENAMETOOLONG; if (retval < 0) goto out_unlock; seq_puts(m, buf); } else { seq_puts(m, "/"); } if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp)) seq_puts(m, " (deleted)\n"); else seq_putc(m, '\n'); } retval = 0; out_unlock: spin_unlock_irq(&css_set_lock); rcu_read_unlock(); kfree(buf); out: return retval; } /** * cgroup_fork - initialize cgroup related fields during copy_process() * @child: pointer to task_struct of forking parent process. * * A task is associated with the init_css_set until cgroup_post_fork() * attaches it to the target css_set. */ void cgroup_fork(struct task_struct *child) { RCU_INIT_POINTER(child->cgroups, &init_css_set); INIT_LIST_HEAD(&child->cg_list); } /** * cgroup_v1v2_get_from_file - get a cgroup pointer from a file pointer * @f: file corresponding to cgroup_dir * * Find the cgroup from a file pointer associated with a cgroup directory. * Returns a pointer to the cgroup on success. ERR_PTR is returned if the * cgroup cannot be found. */ static struct cgroup *cgroup_v1v2_get_from_file(struct file *f) { struct cgroup_subsys_state *css; css = css_tryget_online_from_dir(f->f_path.dentry, NULL); if (IS_ERR(css)) return ERR_CAST(css); return css->cgroup; } /** * cgroup_get_from_file - same as cgroup_v1v2_get_from_file, but only supports * cgroup2. * @f: file corresponding to cgroup2_dir */ static struct cgroup *cgroup_get_from_file(struct file *f) { struct cgroup *cgrp = cgroup_v1v2_get_from_file(f); if (IS_ERR(cgrp)) return ERR_CAST(cgrp); if (!cgroup_on_dfl(cgrp)) { cgroup_put(cgrp); return ERR_PTR(-EBADF); } return cgrp; } /** * cgroup_css_set_fork - find or create a css_set for a child process * @kargs: the arguments passed to create the child process * * This functions finds or creates a new css_set which the child * process will be attached to in cgroup_post_fork(). By default, * the child process will be given the same css_set as its parent. * * If CLONE_INTO_CGROUP is specified this function will try to find an * existing css_set which includes the requested cgroup and if not create * a new css_set that the child will be attached to later. If this function * succeeds it will hold cgroup_threadgroup_rwsem on return. If * CLONE_INTO_CGROUP is requested this function will grab cgroup mutex * before grabbing cgroup_threadgroup_rwsem and will hold a reference * to the target cgroup. */ static int cgroup_css_set_fork(struct kernel_clone_args *kargs) __acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem) { int ret; struct cgroup *dst_cgrp = NULL; struct css_set *cset; struct super_block *sb; if (kargs->flags & CLONE_INTO_CGROUP) cgroup_lock(); cgroup_threadgroup_change_begin(current); spin_lock_irq(&css_set_lock); cset = task_css_set(current); get_css_set(cset); if (kargs->cgrp) kargs->kill_seq = kargs->cgrp->kill_seq; else kargs->kill_seq = cset->dfl_cgrp->kill_seq; spin_unlock_irq(&css_set_lock); if (!(kargs->flags & CLONE_INTO_CGROUP)) { kargs->cset = cset; return 0; } CLASS(fd_raw, f)(kargs->cgroup); if (fd_empty(f)) { ret = -EBADF; goto err; } sb = fd_file(f)->f_path.dentry->d_sb; dst_cgrp = cgroup_get_from_file(fd_file(f)); if (IS_ERR(dst_cgrp)) { ret = PTR_ERR(dst_cgrp); dst_cgrp = NULL; goto err; } if (cgroup_is_dead(dst_cgrp)) { ret = -ENODEV; goto err; } /* * Verify that we the target cgroup is writable for us. This is * usually done by the vfs layer but since we're not going through * the vfs layer here we need to do it "manually". */ ret = cgroup_may_write(dst_cgrp, sb); if (ret) goto err; /* * Spawning a task directly into a cgroup works by passing a file * descriptor to the target cgroup directory. This can even be an O_PATH * file descriptor. But it can never be a cgroup.procs file descriptor. * This was done on purpose so spawning into a cgroup could be * conceptualized as an atomic * * fd = openat(dfd_cgroup, "cgroup.procs", ...); * write(fd, <child-pid>, ...); * * sequence, i.e. it's a shorthand for the caller opening and writing * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us * to always use the caller's credentials. */ ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, !(kargs->flags & CLONE_THREAD), current->nsproxy->cgroup_ns); if (ret) goto err; kargs->cset = find_css_set(cset, dst_cgrp); if (!kargs->cset) { ret = -ENOMEM; goto err; } put_css_set(cset); kargs->cgrp = dst_cgrp; return ret; err: cgroup_threadgroup_change_end(current); cgroup_unlock(); if (dst_cgrp) cgroup_put(dst_cgrp); put_css_set(cset); if (kargs->cset) put_css_set(kargs->cset); return ret; } /** * cgroup_css_set_put_fork - drop references we took during fork * @kargs: the arguments passed to create the child process * * Drop references to the prepared css_set and target cgroup if * CLONE_INTO_CGROUP was requested. */ static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex) { struct cgroup *cgrp = kargs->cgrp; struct css_set *cset = kargs->cset; cgroup_threadgroup_change_end(current); if (cset) { put_css_set(cset); kargs->cset = NULL; } if (kargs->flags & CLONE_INTO_CGROUP) { cgroup_unlock(); if (cgrp) { cgroup_put(cgrp); kargs->cgrp = NULL; } } } /** * cgroup_can_fork - called on a new task before the process is exposed * @child: the child process * @kargs: the arguments passed to create the child process * * This prepares a new css_set for the child process which the child will * be attached to in cgroup_post_fork(). * This calls the subsystem can_fork() callbacks. If the cgroup_can_fork() * callback returns an error, the fork aborts with that error code. This * allows for a cgroup subsystem to conditionally allow or deny new forks. */ int cgroup_can_fork(struct task_struct *child, struct kernel_clone_args *kargs) { struct cgroup_subsys *ss; int i, j, ret; ret = cgroup_css_set_fork(kargs); if (ret) return ret; do_each_subsys_mask(ss, i, have_canfork_callback) { ret = ss->can_fork(child, kargs->cset); if (ret) goto out_revert; } while_each_subsys_mask(); return 0; out_revert: for_each_subsys(ss, j) { if (j >= i) break; if (ss->cancel_fork) ss->cancel_fork(child, kargs->cset); } cgroup_css_set_put_fork(kargs); return ret; } /** * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork() * @child: the child process * @kargs: the arguments passed to create the child process * * This calls the cancel_fork() callbacks if a fork failed *after* * cgroup_can_fork() succeeded and cleans up references we took to * prepare a new css_set for the child process in cgroup_can_fork(). */ void cgroup_cancel_fork(struct task_struct *child, struct kernel_clone_args *kargs) { struct cgroup_subsys *ss; int i; for_each_subsys(ss, i) if (ss->cancel_fork) ss->cancel_fork(child, kargs->cset); cgroup_css_set_put_fork(kargs); } /** * cgroup_post_fork - finalize cgroup setup for the child process * @child: the child process * @kargs: the arguments passed to create the child process * * Attach the child process to its css_set calling the subsystem fork() * callbacks. */ void cgroup_post_fork(struct task_struct *child, struct kernel_clone_args *kargs) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex) { unsigned int cgrp_kill_seq = 0; unsigned long cgrp_flags = 0; bool kill = false; struct cgroup_subsys *ss; struct css_set *cset; int i; cset = kargs->cset; kargs->cset = NULL; spin_lock_irq(&css_set_lock); /* init tasks are special, only link regular threads */ if (likely(child->pid)) { if (kargs->cgrp) { cgrp_flags = kargs->cgrp->flags; cgrp_kill_seq = kargs->cgrp->kill_seq; } else { cgrp_flags = cset->dfl_cgrp->flags; cgrp_kill_seq = cset->dfl_cgrp->kill_seq; } WARN_ON_ONCE(!list_empty(&child->cg_list)); cset->nr_tasks++; css_set_move_task(child, NULL, cset, false); } else { put_css_set(cset); cset = NULL; } if (!(child->flags & PF_KTHREAD)) { if (unlikely(test_bit(CGRP_FREEZE, &cgrp_flags))) { /* * If the cgroup has to be frozen, the new task has * too. Let's set the JOBCTL_TRAP_FREEZE jobctl bit to * get the task into the frozen state. */ spin_lock(&child->sighand->siglock); WARN_ON_ONCE(child->frozen); child->jobctl |= JOBCTL_TRAP_FREEZE; spin_unlock(&child->sighand->siglock); /* * Calling cgroup_update_frozen() isn't required here, * because it will be called anyway a bit later from * do_freezer_trap(). So we avoid cgroup's transient * switch from the frozen state and back. */ } /* * If the cgroup is to be killed notice it now and take the * child down right after we finished preparing it for * userspace. */ kill = kargs->kill_seq != cgrp_kill_seq; } spin_unlock_irq(&css_set_lock); /* * Call ss->fork(). This must happen after @child is linked on * css_set; otherwise, @child might change state between ->fork() * and addition to css_set. */ do_each_subsys_mask(ss, i, have_fork_callback) { ss->fork(child); } while_each_subsys_mask(); /* Make the new cset the root_cset of the new cgroup namespace. */ if (kargs->flags & CLONE_NEWCGROUP) { struct css_set *rcset = child->nsproxy->cgroup_ns->root_cset; get_css_set(cset); child->nsproxy->cgroup_ns->root_cset = cset; put_css_set(rcset); } /* Cgroup has to be killed so take down child immediately. */ if (unlikely(kill)) do_send_sig_info(SIGKILL, SEND_SIG_NOINFO, child, PIDTYPE_TGID); cgroup_css_set_put_fork(kargs); } /** * cgroup_exit - detach cgroup from exiting task * @tsk: pointer to task_struct of exiting process * * Description: Detach cgroup from @tsk. * */ void cgroup_exit(struct task_struct *tsk) { struct cgroup_subsys *ss; struct css_set *cset; int i; spin_lock_irq(&css_set_lock); WARN_ON_ONCE(list_empty(&tsk->cg_list)); cset = task_css_set(tsk); css_set_move_task(tsk, cset, NULL, false); cset->nr_tasks--; /* matches the signal->live check in css_task_iter_advance() */ if (thread_group_leader(tsk) && atomic_read(&tsk->signal->live)) list_add_tail(&tsk->cg_list, &cset->dying_tasks); if (dl_task(tsk)) dec_dl_tasks_cs(tsk); WARN_ON_ONCE(cgroup_task_frozen(tsk)); if (unlikely(!(tsk->flags & PF_KTHREAD) && test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags))) cgroup_update_frozen(task_dfl_cgroup(tsk)); spin_unlock_irq(&css_set_lock); /* see cgroup_post_fork() for details */ do_each_subsys_mask(ss, i, have_exit_callback) { ss->exit(tsk); } while_each_subsys_mask(); } void cgroup_release(struct task_struct *task) { struct cgroup_subsys *ss; int ssid; do_each_subsys_mask(ss, ssid, have_release_callback) { ss->release(task); } while_each_subsys_mask(); if (!list_empty(&task->cg_list)) { spin_lock_irq(&css_set_lock); css_set_skip_task_iters(task_css_set(task), task); list_del_init(&task->cg_list); spin_unlock_irq(&css_set_lock); } } void cgroup_free(struct task_struct *task) { struct css_set *cset = task_css_set(task); put_css_set(cset); } static int __init cgroup_disable(char *str) { struct cgroup_subsys *ss; char *token; int i; while ((token = strsep(&str, ",")) != NULL) { if (!*token) continue; for_each_subsys(ss, i) { if (strcmp(token, ss->name) && strcmp(token, ss->legacy_name)) continue; static_branch_disable(cgroup_subsys_enabled_key[i]); pr_info("Disabling %s control group subsystem\n", ss->name); } for (i = 0; i < OPT_FEATURE_COUNT; i++) { if (strcmp(token, cgroup_opt_feature_names[i])) continue; cgroup_feature_disable_mask |= 1 << i; pr_info("Disabling %s control group feature\n", cgroup_opt_feature_names[i]); break; } } return 1; } __setup("cgroup_disable=", cgroup_disable); void __init __weak enable_debug_cgroup(void) { } static int __init enable_cgroup_debug(char *str) { cgroup_debug = true; enable_debug_cgroup(); return 1; } __setup("cgroup_debug", enable_cgroup_debug); static int __init cgroup_favordynmods_setup(char *str) { return (kstrtobool(str, &have_favordynmods) == 0); } __setup("cgroup_favordynmods=", cgroup_favordynmods_setup); /** * css_tryget_online_from_dir - get corresponding css from a cgroup dentry * @dentry: directory dentry of interest * @ss: subsystem of interest * * If @dentry is a directory for a cgroup which has @ss enabled on it, try * to get the corresponding css and return it. If such css doesn't exist * or can't be pinned, an ERR_PTR value is returned. */ struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, struct cgroup_subsys *ss) { struct kernfs_node *kn = kernfs_node_from_dentry(dentry); struct file_system_type *s_type = dentry->d_sb->s_type; struct cgroup_subsys_state *css = NULL; struct cgroup *cgrp; /* is @dentry a cgroup dir? */ if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) || !kn || kernfs_type(kn) != KERNFS_DIR) return ERR_PTR(-EBADF); rcu_read_lock(); /* * This path doesn't originate from kernfs and @kn could already * have been or be removed at any point. @kn->priv is RCU * protected for this access. See css_release_work_fn() for details. */ cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); if (cgrp) css = cgroup_css(cgrp, ss); if (!css || !css_tryget_online(css)) css = ERR_PTR(-ENOENT); rcu_read_unlock(); return css; } /** * css_from_id - lookup css by id * @id: the cgroup id * @ss: cgroup subsys to be looked into * * Returns the css if there's valid one with @id, otherwise returns NULL. * Should be called under rcu_read_lock(). */ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss) { WARN_ON_ONCE(!rcu_read_lock_held()); return idr_find(&ss->css_idr, id); } /** * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path * @path: path on the default hierarchy * * Find the cgroup at @path on the default hierarchy, increment its * reference count and return it. Returns pointer to the found cgroup on * success, ERR_PTR(-ENOENT) if @path doesn't exist or if the cgroup has already * been released and ERR_PTR(-ENOTDIR) if @path points to a non-directory. */ struct cgroup *cgroup_get_from_path(const char *path) { struct kernfs_node *kn; struct cgroup *cgrp = ERR_PTR(-ENOENT); struct cgroup *root_cgrp; root_cgrp = current_cgns_cgroup_dfl(); kn = kernfs_walk_and_get(root_cgrp->kn, path); if (!kn) goto out; if (kernfs_type(kn) != KERNFS_DIR) { cgrp = ERR_PTR(-ENOTDIR); goto out_kernfs; } rcu_read_lock(); cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv); if (!cgrp || !cgroup_tryget(cgrp)) cgrp = ERR_PTR(-ENOENT); rcu_read_unlock(); out_kernfs: kernfs_put(kn); out: return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_path); /** * cgroup_v1v2_get_from_fd - get a cgroup pointer from a fd * @fd: fd obtained by open(cgroup_dir) * * Find the cgroup from a fd which should be obtained * by opening a cgroup directory. Returns a pointer to the * cgroup on success. ERR_PTR is returned if the cgroup * cannot be found. */ struct cgroup *cgroup_v1v2_get_from_fd(int fd) { CLASS(fd_raw, f)(fd); if (fd_empty(f)) return ERR_PTR(-EBADF); return cgroup_v1v2_get_from_file(fd_file(f)); } /** * cgroup_get_from_fd - same as cgroup_v1v2_get_from_fd, but only supports * cgroup2. * @fd: fd obtained by open(cgroup2_dir) */ struct cgroup *cgroup_get_from_fd(int fd) { struct cgroup *cgrp = cgroup_v1v2_get_from_fd(fd); if (IS_ERR(cgrp)) return ERR_CAST(cgrp); if (!cgroup_on_dfl(cgrp)) { cgroup_put(cgrp); return ERR_PTR(-EBADF); } return cgrp; } EXPORT_SYMBOL_GPL(cgroup_get_from_fd); static u64 power_of_ten(int power) { u64 v = 1; while (power--) v *= 10; return v; } /** * cgroup_parse_float - parse a floating number * @input: input string * @dec_shift: number of decimal digits to shift * @v: output * * Parse a decimal floating point number in @input and store the result in * @v with decimal point right shifted @dec_shift times. For example, if * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345. * Returns 0 on success, -errno otherwise. * * There's nothing cgroup specific about this function except that it's * currently the only user. */ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v) { s64 whole, frac = 0; int fstart = 0, fend = 0, flen; if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend)) return -EINVAL; if (frac < 0) return -EINVAL; flen = fend > fstart ? fend - fstart : 0; if (flen < dec_shift) frac *= power_of_ten(dec_shift - flen); else frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift)); *v = whole * power_of_ten(dec_shift) + frac; return 0; } /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. */ #ifdef CONFIG_SOCK_CGROUP_DATA void cgroup_sk_alloc(struct sock_cgroup_data *skcd) { struct cgroup *cgroup; rcu_read_lock(); /* Don't associate the sock with unrelated interrupted task's cgroup. */ if (in_interrupt()) { cgroup = &cgrp_dfl_root.cgrp; cgroup_get(cgroup); goto out; } while (true) { struct css_set *cset; cset = task_css_set(current); if (likely(cgroup_tryget(cset->dfl_cgrp))) { cgroup = cset->dfl_cgrp; break; } cpu_relax(); } out: skcd->cgroup = cgroup; cgroup_bpf_get(cgroup); rcu_read_unlock(); } void cgroup_sk_clone(struct sock_cgroup_data *skcd) { struct cgroup *cgrp = sock_cgroup_ptr(skcd); /* * We might be cloning a socket which is left in an empty * cgroup and the cgroup might have already been rmdir'd. * Don't use cgroup_get_live(). */ cgroup_get(cgrp); cgroup_bpf_get(cgrp); } void cgroup_sk_free(struct sock_cgroup_data *skcd) { struct cgroup *cgrp = sock_cgroup_ptr(skcd); cgroup_bpf_put(cgrp); cgroup_put(cgrp); } #endif /* CONFIG_SOCK_CGROUP_DATA */ #ifdef CONFIG_SYSFS static ssize_t show_delegatable_files(struct cftype *files, char *buf, ssize_t size, const char *prefix) { struct cftype *cft; ssize_t ret = 0; for (cft = files; cft && cft->name[0] != '\0'; cft++) { if (!(cft->flags & CFTYPE_NS_DELEGATABLE)) continue; if (prefix) ret += snprintf(buf + ret, size - ret, "%s.", prefix); ret += snprintf(buf + ret, size - ret, "%s\n", cft->name); if (WARN_ON(ret >= size)) break; } return ret; } static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { struct cgroup_subsys *ss; int ssid; ssize_t ret = 0; ret = show_delegatable_files(cgroup_base_files, buf + ret, PAGE_SIZE - ret, NULL); if (cgroup_psi_enabled()) ret += show_delegatable_files(cgroup_psi_files, buf + ret, PAGE_SIZE - ret, NULL); for_each_subsys(ss, ssid) ret += show_delegatable_files(ss->dfl_cftypes, buf + ret, PAGE_SIZE - ret, cgroup_subsys_name[ssid]); return ret; } static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate); static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return snprintf(buf, PAGE_SIZE, "nsdelegate\n" "favordynmods\n" "memory_localevents\n" "memory_recursiveprot\n" "memory_hugetlb_accounting\n" "pids_localevents\n"); } static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features); static struct attribute *cgroup_sysfs_attrs[] = { &cgroup_delegate_attr.attr, &cgroup_features_attr.attr, NULL, }; static const struct attribute_group cgroup_sysfs_attr_group = { .attrs = cgroup_sysfs_attrs, .name = "cgroup", }; static int __init cgroup_sysfs_init(void) { return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group); } subsys_initcall(cgroup_sysfs_init); #endif /* CONFIG_SYSFS */
7 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 // SPDX-License-Identifier: GPL-2.0-or-later /* SCTP kernel implementation * Copyright (c) 2003 International Business Machines, Corp. * * This file is part of the SCTP kernel implementation * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Sridhar Samudrala <sri@us.ibm.com> */ #include <linux/types.h> #include <linux/seq_file.h> #include <linux/init.h> #include <linux/export.h> #include <net/sctp/sctp.h> #include <net/ip.h> /* for snmp_fold_field */ static const struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB), SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS), SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS), SNMP_MIB_ITEM("SctpAborteds", SCTP_MIB_ABORTEDS), SNMP_MIB_ITEM("SctpShutdowns", SCTP_MIB_SHUTDOWNS), SNMP_MIB_ITEM("SctpOutOfBlues", SCTP_MIB_OUTOFBLUES), SNMP_MIB_ITEM("SctpChecksumErrors", SCTP_MIB_CHECKSUMERRORS), SNMP_MIB_ITEM("SctpOutCtrlChunks", SCTP_MIB_OUTCTRLCHUNKS), SNMP_MIB_ITEM("SctpOutOrderChunks", SCTP_MIB_OUTORDERCHUNKS), SNMP_MIB_ITEM("SctpOutUnorderChunks", SCTP_MIB_OUTUNORDERCHUNKS), SNMP_MIB_ITEM("SctpInCtrlChunks", SCTP_MIB_INCTRLCHUNKS), SNMP_MIB_ITEM("SctpInOrderChunks", SCTP_MIB_INORDERCHUNKS), SNMP_MIB_ITEM("SctpInUnorderChunks", SCTP_MIB_INUNORDERCHUNKS), SNMP_MIB_ITEM("SctpFragUsrMsgs", SCTP_MIB_FRAGUSRMSGS), SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), SNMP_MIB_SENTINEL }; /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { unsigned long buff[SCTP_MIB_MAX]; struct net *net = seq->private; int i; memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); snmp_get_cpu_field_batch(buff, sctp_snmp_list, net->sctp.sctp_statistics); for (i = 0; sctp_snmp_list[i].name; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, buff[i]); return 0; } /* Dump local addresses of an association/endpoint. */ static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) { struct sctp_association *asoc; struct sctp_sockaddr_entry *laddr; struct sctp_transport *peer; union sctp_addr *addr, *primary = NULL; struct sctp_af *af; if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { asoc = sctp_assoc(epb); peer = asoc->peer.primary_path; if (unlikely(peer == NULL)) { WARN(1, "Association %p with NULL primary path!\n", asoc); return; } primary = &peer->saddr; } rcu_read_lock(); list_for_each_entry_rcu(laddr, &epb->bind_addr.address_list, list) { if (!laddr->valid) continue; addr = &laddr->a; af = sctp_get_af_specific(addr->sa.sa_family); if (primary && af->cmp_addr(addr, primary)) { seq_printf(seq, "*"); } af->seq_dump_addr(seq, addr); } rcu_read_unlock(); } /* Dump remote addresses of an association. */ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_association *assoc) { struct sctp_transport *transport; union sctp_addr *addr, *primary; struct sctp_af *af; primary = &assoc->peer.primary_addr; list_for_each_entry_rcu(transport, &assoc->peer.transport_addr_list, transports) { addr = &transport->ipaddr; af = sctp_get_af_specific(addr->sa.sa_family); if (af->cmp_addr(addr, primary)) { seq_printf(seq, "*"); } af->seq_dump_addr(seq, addr); } } static void *sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= sctp_ep_hashsize) return NULL; if (*pos < 0) *pos = 0; if (*pos == 0) seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT UID INODE LADDRS\n"); return (void *)pos; } static void sctp_eps_seq_stop(struct seq_file *seq, void *v) { } static void *sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) { if (++*pos >= sctp_ep_hashsize) return NULL; return pos; } /* Display sctp endpoints (/proc/net/sctp/eps). */ static int sctp_eps_seq_show(struct seq_file *seq, void *v) { struct sctp_hashbucket *head; struct sctp_endpoint *ep; struct sock *sk; int hash = *(loff_t *)v; if (hash >= sctp_ep_hashsize) return -ENOMEM; head = &sctp_ep_hashtable[hash]; read_lock_bh(&head->lock); sctp_for_each_hentry(ep, &head->chain) { sk = ep->base.sk; if (!net_eq(sock_net(sk), seq_file_net(seq))) continue; seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5u %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, ep->base.bind_addr.port, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk)); sctp_seq_dump_local_addrs(seq, &ep->base); seq_printf(seq, "\n"); } read_unlock_bh(&head->lock); return 0; } static const struct seq_operations sctp_eps_ops = { .start = sctp_eps_seq_start, .next = sctp_eps_seq_next, .stop = sctp_eps_seq_stop, .show = sctp_eps_seq_show, }; struct sctp_ht_iter { struct seq_net_private p; struct rhashtable_iter hti; }; static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; sctp_transport_walk_start(&iter->hti); return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } static void sctp_transport_seq_stop(struct seq_file *seq, void *v) { struct sctp_ht_iter *iter = seq->private; if (v && v != SEQ_START_TOKEN) { struct sctp_transport *transport = v; sctp_transport_put(transport); } sctp_transport_walk_stop(&iter->hti); } static void *sctp_transport_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sctp_ht_iter *iter = seq->private; if (v && v != SEQ_START_TOKEN) { struct sctp_transport *transport = v; sctp_transport_put(transport); } ++*pos; return sctp_transport_get_next(seq_file_net(seq), &iter->hti); } /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { struct sctp_transport *transport; struct sctp_association *assoc; struct sctp_ep_common *epb; struct sock *sk; if (v == SEQ_START_TOKEN) { seq_printf(seq, " ASSOC SOCK STY SST ST HBKT " "ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " "RPORT LADDRS <-> RADDRS " "HBINT INS OUTS MAXRT T1X T2X RTXC " "wmema wmemq sndbuf rcvbuf\n"); return 0; } transport = (struct sctp_transport *)v; assoc = transport->asoc; epb = &assoc->base; sk = epb->sk; seq_printf(seq, "%8pK %8pK %-3d %-3d %-2d %-4d " "%4d %8d %8d %7u %5lu %-5d %5d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, 0, assoc->assoc_id, assoc->sndbuf_used, atomic_read(&assoc->rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); seq_printf(seq, " "); sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "<-> "); sctp_seq_dump_remote_addrs(seq, assoc); seq_printf(seq, "\t%8lu %5d %5d %4d %4d %4d %8d " "%8d %8d %8d %8d", assoc->hbinterval, assoc->stream.incnt, assoc->stream.outcnt, assoc->max_retrans, assoc->init_retries, assoc->shutdown_retries, assoc->rtx_data_chunks, refcount_read(&sk->sk_wmem_alloc), READ_ONCE(sk->sk_wmem_queued), sk->sk_sndbuf, sk->sk_rcvbuf); seq_printf(seq, "\n"); return 0; } static const struct seq_operations sctp_assoc_ops = { .start = sctp_transport_seq_start, .next = sctp_transport_seq_next, .stop = sctp_transport_seq_stop, .show = sctp_assocs_seq_show, }; static int sctp_remaddr_seq_show(struct seq_file *seq, void *v) { struct sctp_association *assoc; struct sctp_transport *transport, *tsp; if (v == SEQ_START_TOKEN) { seq_printf(seq, "ADDR ASSOC_ID HB_ACT RTO MAX_PATH_RTX " "REM_ADDR_RTX START STATE\n"); return 0; } transport = (struct sctp_transport *)v; assoc = transport->asoc; list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list, transports) { /* * The remote address (ADDR) */ tsp->af_specific->seq_dump_addr(seq, &tsp->ipaddr); seq_printf(seq, " "); /* * The association ID (ASSOC_ID) */ seq_printf(seq, "%d ", tsp->asoc->assoc_id); /* * If the Heartbeat is active (HB_ACT) * Note: 1 = Active, 0 = Inactive */ seq_printf(seq, "%d ", timer_pending(&tsp->hb_timer)); /* * Retransmit time out (RTO) */ seq_printf(seq, "%lu ", tsp->rto); /* * Maximum path retransmit count (PATH_MAX_RTX) */ seq_printf(seq, "%d ", tsp->pathmaxrxt); /* * remote address retransmit count (REM_ADDR_RTX) * Note: We don't have a way to tally this at the moment * so lets just leave it as zero for the moment */ seq_puts(seq, "0 "); /* * remote address start time (START). This is also not * currently implemented, but we can record it with a * jiffies marker in a subsequent patch */ seq_puts(seq, "0 "); /* * The current state of this destination. I.e. * SCTP_ACTIVE, SCTP_INACTIVE, ... */ seq_printf(seq, "%d", tsp->state); seq_printf(seq, "\n"); } return 0; } static const struct seq_operations sctp_remaddr_ops = { .start = sctp_transport_seq_start, .next = sctp_transport_seq_next, .stop = sctp_transport_seq_stop, .show = sctp_remaddr_seq_show, }; /* Set up the proc fs entry for the SCTP protocol. */ int __net_init sctp_proc_init(struct net *net) { net->sctp.proc_net_sctp = proc_net_mkdir(net, "sctp", net->proc_net); if (!net->sctp.proc_net_sctp) return -ENOMEM; if (!proc_create_net_single("snmp", 0444, net->sctp.proc_net_sctp, sctp_snmp_seq_show, NULL)) goto cleanup; if (!proc_create_net("eps", 0444, net->sctp.proc_net_sctp, &sctp_eps_ops, sizeof(struct seq_net_private))) goto cleanup; if (!proc_create_net("assocs", 0444, net->sctp.proc_net_sctp, &sctp_assoc_ops, sizeof(struct sctp_ht_iter))) goto cleanup; if (!proc_create_net("remaddr", 0444, net->sctp.proc_net_sctp, &sctp_remaddr_ops, sizeof(struct sctp_ht_iter))) goto cleanup; return 0; cleanup: remove_proc_subtree("sctp", net->proc_net); net->sctp.proc_net_sctp = NULL; return -ENOMEM; }
8 4 4 4 4 4 4 3 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 8 342 340 342 342 337 341 339 342 342 341 50 91 116 91 50 692 692 277 88 88 66 61 118 2 342 246 176 176 177 178 178 179 175 250 84 215 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 /* * Copyright (c) 2015, Mellanox Technologies inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "core_priv.h" #include <linux/in.h> #include <linux/in6.h> /* For in6_dev_get/in6_dev_put */ #include <net/addrconf.h> #include <net/bonding.h> #include <rdma/ib_cache.h> #include <rdma/ib_addr.h> static struct workqueue_struct *gid_cache_wq; enum gid_op_type { GID_DEL = 0, GID_ADD }; struct update_gid_event_work { struct work_struct work; union ib_gid gid; struct ib_gid_attr gid_attr; enum gid_op_type gid_op; }; #define ROCE_NETDEV_CALLBACK_SZ 3 struct netdev_event_work_cmd { roce_netdev_callback cb; roce_netdev_filter filter; struct net_device *ndev; struct net_device *filter_ndev; }; struct netdev_event_work { struct work_struct work; struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ]; }; static const struct { bool (*is_supported)(const struct ib_device *device, u32 port_num); enum ib_gid_type gid_type; } PORT_CAP_TO_GID_TYPE[] = { {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE}, {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP}, }; #define CAP_TO_GID_TABLE_SIZE ARRAY_SIZE(PORT_CAP_TO_GID_TYPE) unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u32 port) { int i; unsigned int ret_flags = 0; if (!rdma_protocol_roce(ib_dev, port)) return 1UL << IB_GID_TYPE_IB; for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++) if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port)) ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type; return ret_flags; } EXPORT_SYMBOL(roce_gid_type_mask_support); static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev, u32 port, union ib_gid *gid, struct ib_gid_attr *gid_attr) { int i; unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port); for (i = 0; i < IB_GID_TYPE_SIZE; i++) { if ((1UL << i) & gid_type_mask) { gid_attr->gid_type = i; switch (gid_op) { case GID_ADD: ib_cache_gid_add(ib_dev, port, gid, gid_attr); break; case GID_DEL: ib_cache_gid_del(ib_dev, port, gid, gid_attr); break; } } } } enum bonding_slave_state { BONDING_SLAVE_STATE_ACTIVE = 1UL << 0, BONDING_SLAVE_STATE_INACTIVE = 1UL << 1, /* No primary slave or the device isn't a slave in bonding */ BONDING_SLAVE_STATE_NA = 1UL << 2, }; static enum bonding_slave_state is_eth_active_slave_of_bonding_rcu(struct net_device *dev, struct net_device *upper) { if (upper && netif_is_bond_master(upper)) { struct net_device *pdev = bond_option_active_slave_get_rcu(netdev_priv(upper)); if (pdev) return dev == pdev ? BONDING_SLAVE_STATE_ACTIVE : BONDING_SLAVE_STATE_INACTIVE; } return BONDING_SLAVE_STATE_NA; } #define REQUIRED_BOND_STATES (BONDING_SLAVE_STATE_ACTIVE | \ BONDING_SLAVE_STATE_NA) static bool is_eth_port_of_netdev_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *real_dev; bool res; if (!rdma_ndev) return false; rcu_read_lock(); real_dev = rdma_vlan_dev_real_dev(cookie); if (!real_dev) real_dev = cookie; res = ((rdma_is_upper_dev_rcu(rdma_ndev, cookie) && (is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) & REQUIRED_BOND_STATES)) || real_dev == rdma_ndev); rcu_read_unlock(); return res; } static bool is_eth_port_inactive_slave_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *master_dev; bool res; if (!rdma_ndev) return false; rcu_read_lock(); master_dev = netdev_master_upper_dev_get_rcu(rdma_ndev); res = is_eth_active_slave_of_bonding_rcu(rdma_ndev, master_dev) == BONDING_SLAVE_STATE_INACTIVE; rcu_read_unlock(); return res; } /** * is_ndev_for_default_gid_filter - Check if a given netdevice * can be considered for default GIDs or not. * @ib_dev: IB device to check * @port: Port to consider for adding default GID * @rdma_ndev: rdma netdevice pointer * @cookie: Netdevice to consider to form a default GID * * is_ndev_for_default_gid_filter() returns true if a given netdevice can be * considered for deriving default RoCE GID, returns false otherwise. */ static bool is_ndev_for_default_gid_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; bool res; if (!rdma_ndev) return false; rcu_read_lock(); /* * When rdma netdevice is used in bonding, bonding master netdevice * should be considered for default GIDs. Therefore, ignore slave rdma * netdevices when bonding is considered. * Additionally when event(cookie) netdevice is bond master device, * make sure that it the upper netdevice of rdma netdevice. */ res = ((cookie_ndev == rdma_ndev && !netif_is_bond_slave(rdma_ndev)) || (netif_is_bond_master(cookie_ndev) && rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev))); rcu_read_unlock(); return res; } static bool pass_all_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { return true; } static bool upper_device_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { bool res; if (!rdma_ndev) return false; if (rdma_ndev == cookie) return true; rcu_read_lock(); res = rdma_is_upper_dev_rcu(rdma_ndev, cookie); rcu_read_unlock(); return res; } /** * is_upper_ndev_bond_master_filter - Check if a given netdevice * is bond master device of netdevice of the RDMA device of port. * @ib_dev: IB device to check * @port: Port to consider for adding default GID * @rdma_ndev: Pointer to rdma netdevice * @cookie: Netdevice to consider to form a default GID * * is_upper_ndev_bond_master_filter() returns true if a cookie_netdev * is bond master device and rdma_ndev is its lower netdevice. It might * not have been established as slave device yet. */ static bool is_upper_ndev_bond_master_filter(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; bool match = false; if (!rdma_ndev) return false; rcu_read_lock(); if (netif_is_bond_master(cookie_ndev) && rdma_is_upper_dev_rcu(rdma_ndev, cookie_ndev)) match = true; rcu_read_unlock(); return match; } static void update_gid_ip(enum gid_op_type gid_op, struct ib_device *ib_dev, u32 port, struct net_device *ndev, struct sockaddr *addr) { union ib_gid gid; struct ib_gid_attr gid_attr; rdma_ip2gid(addr, &gid); memset(&gid_attr, 0, sizeof(gid_attr)); gid_attr.ndev = ndev; update_gid(gid_op, ib_dev, port, &gid, &gid_attr); } static void bond_delete_netdev_default_gids(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, struct net_device *event_ndev) { struct net_device *real_dev = rdma_vlan_dev_real_dev(event_ndev); unsigned long gid_type_mask; if (!rdma_ndev) return; if (!real_dev) real_dev = event_ndev; rcu_read_lock(); if (((rdma_ndev != event_ndev && !rdma_is_upper_dev_rcu(rdma_ndev, event_ndev)) || is_eth_active_slave_of_bonding_rcu(rdma_ndev, real_dev) == BONDING_SLAVE_STATE_INACTIVE)) { rcu_read_unlock(); return; } rcu_read_unlock(); gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, rdma_ndev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_DELETE); } static void enum_netdev_ipv4_ips(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { const struct in_ifaddr *ifa; struct in_device *in_dev; struct sin_list { struct list_head list; struct sockaddr_in ip; }; struct sin_list *sin_iter; struct sin_list *sin_temp; LIST_HEAD(sin_list); if (ndev->reg_state >= NETREG_UNREGISTERING) return; rcu_read_lock(); in_dev = __in_dev_get_rcu(ndev); if (!in_dev) { rcu_read_unlock(); return; } in_dev_for_each_ifa_rcu(ifa, in_dev) { struct sin_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) continue; entry->ip.sin_family = AF_INET; entry->ip.sin_addr.s_addr = ifa->ifa_address; list_add_tail(&entry->list, &sin_list); } rcu_read_unlock(); list_for_each_entry_safe(sin_iter, sin_temp, &sin_list, list) { update_gid_ip(GID_ADD, ib_dev, port, ndev, (struct sockaddr *)&sin_iter->ip); list_del(&sin_iter->list); kfree(sin_iter); } } static void enum_netdev_ipv6_ips(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { struct inet6_ifaddr *ifp; struct inet6_dev *in6_dev; struct sin6_list { struct list_head list; struct sockaddr_in6 sin6; }; struct sin6_list *sin6_iter; struct sin6_list *sin6_temp; struct ib_gid_attr gid_attr = {.ndev = ndev}; LIST_HEAD(sin6_list); if (ndev->reg_state >= NETREG_UNREGISTERING) return; in6_dev = in6_dev_get(ndev); if (!in6_dev) return; read_lock_bh(&in6_dev->lock); list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { struct sin6_list *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) continue; entry->sin6.sin6_family = AF_INET6; entry->sin6.sin6_addr = ifp->addr; list_add_tail(&entry->list, &sin6_list); } read_unlock_bh(&in6_dev->lock); in6_dev_put(in6_dev); list_for_each_entry_safe(sin6_iter, sin6_temp, &sin6_list, list) { union ib_gid gid; rdma_ip2gid((struct sockaddr *)&sin6_iter->sin6, &gid); update_gid(GID_ADD, ib_dev, port, &gid, &gid_attr); list_del(&sin6_iter->list); kfree(sin6_iter); } } static void _add_netdev_ips(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { enum_netdev_ipv4_ips(ib_dev, port, ndev); if (IS_ENABLED(CONFIG_IPV6)) enum_netdev_ipv6_ips(ib_dev, port, ndev); } static void add_netdev_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { _add_netdev_ips(ib_dev, port, cookie); } static void del_netdev_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { ib_cache_gid_del_all_netdev_gids(ib_dev, port, cookie); } /** * del_default_gids - Delete default GIDs of the event/cookie netdevice * @ib_dev: RDMA device pointer * @port: Port of the RDMA device whose GID table to consider * @rdma_ndev: Unused rdma netdevice * @cookie: Pointer to event netdevice * * del_default_gids() deletes the default GIDs of the event/cookie netdevice. */ static void del_default_gids(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *cookie_ndev = cookie; unsigned long gid_type_mask; gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, cookie_ndev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_DELETE); } static void add_default_gids(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *event_ndev = cookie; unsigned long gid_type_mask; gid_type_mask = roce_gid_type_mask_support(ib_dev, port); ib_cache_gid_set_default_gid(ib_dev, port, event_ndev, gid_type_mask, IB_CACHE_GID_DEFAULT_MODE_SET); } static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net *net; struct net_device *ndev; /* Lock the rtnl to make sure the netdevs does not move under * our feet */ rtnl_lock(); down_read(&net_rwsem); for_each_net(net) for_each_netdev(net, ndev) { /* * Filter and add default GIDs of the primary netdevice * when not in bonding mode, or add default GIDs * of bond master device, when in bonding mode. */ if (is_ndev_for_default_gid_filter(ib_dev, port, rdma_ndev, ndev)) add_default_gids(ib_dev, port, rdma_ndev, ndev); if (is_eth_port_of_netdev_filter(ib_dev, port, rdma_ndev, ndev)) _add_netdev_ips(ib_dev, port, ndev); } up_read(&net_rwsem); rtnl_unlock(); } /** * rdma_roce_rescan_device - Rescan all of the network devices in the system * and add their gids, as needed, to the relevant RoCE devices. * * @ib_dev: the rdma device */ void rdma_roce_rescan_device(struct ib_device *ib_dev) { ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL, enum_all_gids_of_dev_cb, NULL); } EXPORT_SYMBOL(rdma_roce_rescan_device); /** * rdma_roce_rescan_port - Rescan all of the network devices in the system * and add their gids if relevant to the port of the RoCE device. * * @ib_dev: IB device * @port: Port number */ void rdma_roce_rescan_port(struct ib_device *ib_dev, u32 port) { struct net_device *ndev = NULL; if (rdma_protocol_roce(ib_dev, port)) { ndev = ib_device_get_netdev(ib_dev, port); if (!ndev) return; enum_all_gids_of_dev_cb(ib_dev, port, ndev, ndev); dev_put(ndev); } } EXPORT_SYMBOL(rdma_roce_rescan_port); static void callback_for_addr_gid_device_scan(struct ib_device *device, u32 port, struct net_device *rdma_ndev, void *cookie) { struct update_gid_event_work *parsed = cookie; return update_gid(parsed->gid_op, device, port, &parsed->gid, &parsed->gid_attr); } struct upper_list { struct list_head list; struct net_device *upper; }; static int netdev_upper_walk(struct net_device *upper, struct netdev_nested_priv *priv) { struct upper_list *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); struct list_head *upper_list = (struct list_head *)priv->data; if (!entry) return 0; list_add_tail(&entry->list, upper_list); dev_hold(upper); entry->upper = upper; return 0; } static void handle_netdev_upper(struct ib_device *ib_dev, u32 port, void *cookie, void (*handle_netdev)(struct ib_device *ib_dev, u32 port, struct net_device *ndev)) { struct net_device *ndev = cookie; struct netdev_nested_priv priv; struct upper_list *upper_iter; struct upper_list *upper_temp; LIST_HEAD(upper_list); priv.data = &upper_list; rcu_read_lock(); netdev_walk_all_upper_dev_rcu(ndev, netdev_upper_walk, &priv); rcu_read_unlock(); handle_netdev(ib_dev, port, ndev); list_for_each_entry_safe(upper_iter, upper_temp, &upper_list, list) { handle_netdev(ib_dev, port, upper_iter->upper); dev_put(upper_iter->upper); list_del(&upper_iter->list); kfree(upper_iter); } } void roce_del_all_netdev_gids(struct ib_device *ib_dev, u32 port, struct net_device *ndev) { ib_cache_gid_del_all_netdev_gids(ib_dev, port, ndev); } EXPORT_SYMBOL(roce_del_all_netdev_gids); static void del_netdev_upper_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { handle_netdev_upper(ib_dev, port, cookie, roce_del_all_netdev_gids); } static void add_netdev_upper_ips(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { handle_netdev_upper(ib_dev, port, cookie, _add_netdev_ips); } static void del_netdev_default_ips_join(struct ib_device *ib_dev, u32 port, struct net_device *rdma_ndev, void *cookie) { struct net_device *master_ndev; rcu_read_lock(); master_ndev = netdev_master_upper_dev_get_rcu(rdma_ndev); dev_hold(master_ndev); rcu_read_unlock(); if (master_ndev) { bond_delete_netdev_default_gids(ib_dev, port, rdma_ndev, master_ndev); dev_put(master_ndev); } } /* The following functions operate on all IB devices. netdevice_event and * addr_event execute ib_enum_all_roce_netdevs through a work. * ib_enum_all_roce_netdevs iterates through all IB devices. */ static void netdevice_event_work_handler(struct work_struct *_work) { struct netdev_event_work *work = container_of(_work, struct netdev_event_work, work); unsigned int i; for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) { ib_enum_all_roce_netdevs(work->cmds[i].filter, work->cmds[i].filter_ndev, work->cmds[i].cb, work->cmds[i].ndev); dev_put(work->cmds[i].ndev); dev_put(work->cmds[i].filter_ndev); } kfree(work); } static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, struct net_device *ndev) { unsigned int i; struct netdev_event_work *ndev_work = kmalloc(sizeof(*ndev_work), GFP_KERNEL); if (!ndev_work) return NOTIFY_DONE; memcpy(ndev_work->cmds, cmds, sizeof(ndev_work->cmds)); for (i = 0; i < ARRAY_SIZE(ndev_work->cmds) && ndev_work->cmds[i].cb; i++) { if (!ndev_work->cmds[i].ndev) ndev_work->cmds[i].ndev = ndev; if (!ndev_work->cmds[i].filter_ndev) ndev_work->cmds[i].filter_ndev = ndev; dev_hold(ndev_work->cmds[i].ndev); dev_hold(ndev_work->cmds[i].filter_ndev); } INIT_WORK(&ndev_work->work, netdevice_event_work_handler); queue_work(gid_cache_wq, &ndev_work->work); return NOTIFY_DONE; } static const struct netdev_event_work_cmd add_cmd = { .cb = add_netdev_ips, .filter = is_eth_port_of_netdev_filter }; static const struct netdev_event_work_cmd add_cmd_upper_ips = { .cb = add_netdev_upper_ips, .filter = is_eth_port_of_netdev_filter }; static void ndev_event_unlink(struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { static const struct netdev_event_work_cmd upper_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter }; cmds[0] = upper_ips_del_cmd; cmds[0].ndev = changeupper_info->upper_dev; cmds[1] = add_cmd; } static const struct netdev_event_work_cmd bonding_default_add_cmd = { .cb = add_default_gids, .filter = is_upper_ndev_bond_master_filter }; static void ndev_event_link(struct net_device *event_ndev, struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { static const struct netdev_event_work_cmd bonding_default_del_cmd = { .cb = del_default_gids, .filter = is_upper_ndev_bond_master_filter }; /* * When a lower netdev is linked to its upper bonding * netdev, delete lower slave netdev's default GIDs. */ cmds[0] = bonding_default_del_cmd; cmds[0].ndev = event_ndev; cmds[0].filter_ndev = changeupper_info->upper_dev; /* Now add bonding upper device default GIDs */ cmds[1] = bonding_default_add_cmd; cmds[1].ndev = changeupper_info->upper_dev; cmds[1].filter_ndev = changeupper_info->upper_dev; /* Now add bonding upper device IP based GIDs */ cmds[2] = add_cmd_upper_ips; cmds[2].ndev = changeupper_info->upper_dev; cmds[2].filter_ndev = changeupper_info->upper_dev; } static void netdevice_event_changeupper(struct net_device *event_ndev, struct netdev_notifier_changeupper_info *changeupper_info, struct netdev_event_work_cmd *cmds) { if (changeupper_info->linking) ndev_event_link(event_ndev, changeupper_info, cmds); else ndev_event_unlink(changeupper_info, cmds); } static const struct netdev_event_work_cmd add_default_gid_cmd = { .cb = add_default_gids, .filter = is_ndev_for_default_gid_filter, }; static int netdevice_event(struct notifier_block *this, unsigned long event, void *ptr) { static const struct netdev_event_work_cmd del_cmd = { .cb = del_netdev_ips, .filter = pass_all_filter}; static const struct netdev_event_work_cmd bonding_default_del_cmd_join = { .cb = del_netdev_default_ips_join, .filter = is_eth_port_inactive_slave_filter }; static const struct netdev_event_work_cmd netdev_del_cmd = { .cb = del_netdev_ips, .filter = is_eth_port_of_netdev_filter }; static const struct netdev_event_work_cmd bonding_event_ips_del_cmd = { .cb = del_netdev_upper_ips, .filter = upper_device_filter}; struct net_device *ndev = netdev_notifier_info_to_dev(ptr); struct netdev_event_work_cmd cmds[ROCE_NETDEV_CALLBACK_SZ] = { {NULL} }; if (ndev->type != ARPHRD_ETHER) return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: case NETDEV_UP: cmds[0] = bonding_default_del_cmd_join; cmds[1] = add_default_gid_cmd; cmds[2] = add_cmd; break; case NETDEV_UNREGISTER: if (ndev->reg_state < NETREG_UNREGISTERED) cmds[0] = del_cmd; else return NOTIFY_DONE; break; case NETDEV_CHANGEADDR: cmds[0] = netdev_del_cmd; if (ndev->reg_state == NETREG_REGISTERED) { cmds[1] = add_default_gid_cmd; cmds[2] = add_cmd; } break; case NETDEV_CHANGEUPPER: netdevice_event_changeupper(ndev, container_of(ptr, struct netdev_notifier_changeupper_info, info), cmds); break; case NETDEV_BONDING_FAILOVER: cmds[0] = bonding_event_ips_del_cmd; /* Add default GIDs of the bond device */ cmds[1] = bonding_default_add_cmd; /* Add IP based GIDs of the bond device */ cmds[2] = add_cmd_upper_ips; break; default: return NOTIFY_DONE; } return netdevice_queue_work(cmds, ndev); } static void update_gid_event_work_handler(struct work_struct *_work) { struct update_gid_event_work *work = container_of(_work, struct update_gid_event_work, work); ib_enum_all_roce_netdevs(is_eth_port_of_netdev_filter, work->gid_attr.ndev, callback_for_addr_gid_device_scan, work); dev_put(work->gid_attr.ndev); kfree(work); } static int addr_event(struct notifier_block *this, unsigned long event, struct sockaddr *sa, struct net_device *ndev) { struct update_gid_event_work *work; enum gid_op_type gid_op; if (ndev->type != ARPHRD_ETHER) return NOTIFY_DONE; switch (event) { case NETDEV_UP: gid_op = GID_ADD; break; case NETDEV_DOWN: gid_op = GID_DEL; break; default: return NOTIFY_DONE; } work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) return NOTIFY_DONE; INIT_WORK(&work->work, update_gid_event_work_handler); rdma_ip2gid(sa, &work->gid); work->gid_op = gid_op; memset(&work->gid_attr, 0, sizeof(work->gid_attr)); dev_hold(ndev); work->gid_attr.ndev = ndev; queue_work(gid_cache_wq, &work->work); return NOTIFY_DONE; } static int inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct sockaddr_in in; struct net_device *ndev; struct in_ifaddr *ifa = ptr; in.sin_family = AF_INET; in.sin_addr.s_addr = ifa->ifa_address; ndev = ifa->ifa_dev->dev; return addr_event(this, event, (struct sockaddr *)&in, ndev); } static int inet6addr_event(struct notifier_block *this, unsigned long event, void *ptr) { struct sockaddr_in6 in6; struct net_device *ndev; struct inet6_ifaddr *ifa6 = ptr; in6.sin6_family = AF_INET6; in6.sin6_addr = ifa6->addr; ndev = ifa6->idev->dev; return addr_event(this, event, (struct sockaddr *)&in6, ndev); } static struct notifier_block nb_netdevice = { .notifier_call = netdevice_event }; static struct notifier_block nb_inetaddr = { .notifier_call = inetaddr_event }; static struct notifier_block nb_inet6addr = { .notifier_call = inet6addr_event }; int __init roce_gid_mgmt_init(void) { gid_cache_wq = alloc_ordered_workqueue("gid-cache-wq", 0); if (!gid_cache_wq) return -ENOMEM; register_inetaddr_notifier(&nb_inetaddr); if (IS_ENABLED(CONFIG_IPV6)) register_inet6addr_notifier(&nb_inet6addr); /* We relay on the netdevice notifier to enumerate all * existing devices in the system. Register to this notifier * last to make sure we will not miss any IP add/del * callbacks. */ register_netdevice_notifier(&nb_netdevice); return 0; } void __exit roce_gid_mgmt_cleanup(void) { if (IS_ENABLED(CONFIG_IPV6)) unregister_inet6addr_notifier(&nb_inet6addr); unregister_inetaddr_notifier(&nb_inetaddr); unregister_netdevice_notifier(&nb_netdevice); /* Ensure all gid deletion tasks complete before we go down, * to avoid any reference to free'd memory. By the time * ib-core is removed, all physical devices have been removed, * so no issue with remaining hardware contexts. */ destroy_workqueue(gid_cache_wq); }
4 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 /* * Copyright (c) 2004 Topspin Communications. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2006 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/init.h> #include <linux/err.h> #include <linux/random.h> #include <linux/spinlock.h> #include <linux/slab.h> #include <linux/dma-mapping.h> #include <linux/kref.h> #include <linux/xarray.h> #include <linux/workqueue.h> #include <uapi/linux/if_ether.h> #include <rdma/ib_pack.h> #include <rdma/ib_cache.h> #include <rdma/rdma_netlink.h> #include <net/netlink.h> #include <uapi/rdma/ib_user_sa.h> #include <rdma/ib_marshall.h> #include <rdma/ib_addr.h> #include <rdma/opa_addr.h> #include <rdma/rdma_cm.h> #include "sa.h" #include "core_priv.h" #define IB_SA_LOCAL_SVC_TIMEOUT_MIN 100 #define IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT 2000 #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 #define IB_SA_CPI_MAX_RETRY_CNT 3 #define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */ static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; struct ib_sa_sm_ah { struct ib_ah *ah; struct kref ref; u16 pkey_index; u8 src_path_mask; }; enum rdma_class_port_info_type { RDMA_CLASS_PORT_INFO_IB, RDMA_CLASS_PORT_INFO_OPA }; struct rdma_class_port_info { enum rdma_class_port_info_type type; union { struct ib_class_port_info ib; struct opa_class_port_info opa; }; }; struct ib_sa_classport_cache { bool valid; int retry_cnt; struct rdma_class_port_info data; }; struct ib_sa_port { struct ib_mad_agent *agent; struct ib_sa_sm_ah *sm_ah; struct work_struct update_task; struct ib_sa_classport_cache classport_info; struct delayed_work ib_cpi_work; spinlock_t classport_lock; /* protects class port info set */ spinlock_t ah_lock; u32 port_num; }; struct ib_sa_device { int start_port, end_port; struct ib_event_handler event_handler; struct ib_sa_port port[]; }; struct ib_sa_query { void (*callback)(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; struct ib_mad_send_buf *mad_buf; struct ib_sa_sm_ah *sm_ah; int id; u32 flags; struct list_head list; /* Local svc request list */ u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 #define IB_SA_CANCEL 0x00000002 #define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_path_query { void (*callback)(int status, struct sa_path_rec *rec, unsigned int num_paths, void *context); void *context; struct ib_sa_query sa_query; struct sa_path_rec *conv_pr; }; struct ib_sa_guidinfo_query { void (*callback)(int, struct ib_sa_guidinfo_rec *, void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_classport_info_query { void (*callback)(void *); void *context; struct ib_sa_query sa_query; }; struct ib_sa_mcmember_query { void (*callback)(int, struct ib_sa_mcmember_rec *, void *); void *context; struct ib_sa_query sa_query; }; static LIST_HEAD(ib_nl_request_list); static DEFINE_SPINLOCK(ib_nl_request_lock); static atomic_t ib_nl_sa_request_seq; static struct workqueue_struct *ib_nl_wq; static struct delayed_work ib_nl_timed_work; static const struct nla_policy ib_nl_policy[LS_NLA_TYPE_MAX] = { [LS_NLA_TYPE_PATH_RECORD] = {.type = NLA_BINARY, .len = sizeof(struct ib_path_rec_data)}, [LS_NLA_TYPE_TIMEOUT] = {.type = NLA_U32}, [LS_NLA_TYPE_SERVICE_ID] = {.type = NLA_U64}, [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid)}, [LS_NLA_TYPE_SGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid)}, [LS_NLA_TYPE_TCLASS] = {.type = NLA_U8}, [LS_NLA_TYPE_PKEY] = {.type = NLA_U16}, [LS_NLA_TYPE_QOS_CLASS] = {.type = NLA_U16}, }; static int ib_sa_add_one(struct ib_device *device); static void ib_sa_remove_one(struct ib_device *device, void *client_data); static struct ib_client sa_client = { .name = "sa", .add = ib_sa_add_one, .remove = ib_sa_remove_one }; static DEFINE_XARRAY_FLAGS(queries, XA_FLAGS_ALLOC | XA_FLAGS_LOCK_IRQ); static DEFINE_SPINLOCK(tid_lock); static u32 tid; #define PATH_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct sa_path_rec, field), \ .struct_size_bytes = sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field path_rec_table[] = { { PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(sgid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, { PATH_REC_FIELD(ib.dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { PATH_REC_FIELD(ib.slid), .offset_words = 10, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(ib.raw_traffic), .offset_words = 11, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 11, .offset_bits = 1, .size_bits = 3 }, { PATH_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { PATH_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { PATH_REC_FIELD(traffic_class), .offset_words = 12, .offset_bits = 0, .size_bits = 8 }, { PATH_REC_FIELD(reversible), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { PATH_REC_FIELD(numb_path), .offset_words = 12, .offset_bits = 9, .size_bits = 7 }, { PATH_REC_FIELD(pkey), .offset_words = 12, .offset_bits = 16, .size_bits = 16 }, { PATH_REC_FIELD(qos_class), .offset_words = 13, .offset_bits = 0, .size_bits = 12 }, { PATH_REC_FIELD(sl), .offset_words = 13, .offset_bits = 12, .size_bits = 4 }, { PATH_REC_FIELD(mtu_selector), .offset_words = 13, .offset_bits = 16, .size_bits = 2 }, { PATH_REC_FIELD(mtu), .offset_words = 13, .offset_bits = 18, .size_bits = 6 }, { PATH_REC_FIELD(rate_selector), .offset_words = 13, .offset_bits = 24, .size_bits = 2 }, { PATH_REC_FIELD(rate), .offset_words = 13, .offset_bits = 26, .size_bits = 6 }, { PATH_REC_FIELD(packet_life_time_selector), .offset_words = 14, .offset_bits = 0, .size_bits = 2 }, { PATH_REC_FIELD(packet_life_time), .offset_words = 14, .offset_bits = 2, .size_bits = 6 }, { PATH_REC_FIELD(preference), .offset_words = 14, .offset_bits = 8, .size_bits = 8 }, { RESERVED, .offset_words = 14, .offset_bits = 16, .size_bits = 48 }, }; #define OPA_PATH_REC_FIELD(field) \ .struct_offset_bytes = \ offsetof(struct sa_path_rec, field), \ .struct_size_bytes = \ sizeof_field(struct sa_path_rec, field), \ .field_name = "sa_path_rec:" #field static const struct ib_field opa_path_rec_table[] = { { OPA_PATH_REC_FIELD(service_id), .offset_words = 0, .offset_bits = 0, .size_bits = 64 }, { OPA_PATH_REC_FIELD(dgid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { OPA_PATH_REC_FIELD(sgid), .offset_words = 6, .offset_bits = 0, .size_bits = 128 }, { OPA_PATH_REC_FIELD(opa.dlid), .offset_words = 10, .offset_bits = 0, .size_bits = 32 }, { OPA_PATH_REC_FIELD(opa.slid), .offset_words = 11, .offset_bits = 0, .size_bits = 32 }, { OPA_PATH_REC_FIELD(opa.raw_traffic), .offset_words = 12, .offset_bits = 0, .size_bits = 1 }, { RESERVED, .offset_words = 12, .offset_bits = 1, .size_bits = 3 }, { OPA_PATH_REC_FIELD(flow_label), .offset_words = 12, .offset_bits = 4, .size_bits = 20 }, { OPA_PATH_REC_FIELD(hop_limit), .offset_words = 12, .offset_bits = 24, .size_bits = 8 }, { OPA_PATH_REC_FIELD(traffic_class), .offset_words = 13, .offset_bits = 0, .size_bits = 8 }, { OPA_PATH_REC_FIELD(reversible), .offset_words = 13, .offset_bits = 8, .size_bits = 1 }, { OPA_PATH_REC_FIELD(numb_path), .offset_words = 13, .offset_bits = 9, .size_bits = 7 }, { OPA_PATH_REC_FIELD(pkey), .offset_words = 13, .offset_bits = 16, .size_bits = 16 }, { OPA_PATH_REC_FIELD(opa.l2_8B), .offset_words = 14, .offset_bits = 0, .size_bits = 1 }, { OPA_PATH_REC_FIELD(opa.l2_10B), .offset_words = 14, .offset_bits = 1, .size_bits = 1 }, { OPA_PATH_REC_FIELD(opa.l2_9B), .offset_words = 14, .offset_bits = 2, .size_bits = 1 }, { OPA_PATH_REC_FIELD(opa.l2_16B), .offset_words = 14, .offset_bits = 3, .size_bits = 1 }, { RESERVED, .offset_words = 14, .offset_bits = 4, .size_bits = 2 }, { OPA_PATH_REC_FIELD(opa.qos_type), .offset_words = 14, .offset_bits = 6, .size_bits = 2 }, { OPA_PATH_REC_FIELD(opa.qos_priority), .offset_words = 14, .offset_bits = 8, .size_bits = 8 }, { RESERVED, .offset_words = 14, .offset_bits = 16, .size_bits = 3 }, { OPA_PATH_REC_FIELD(sl), .offset_words = 14, .offset_bits = 19, .size_bits = 5 }, { RESERVED, .offset_words = 14, .offset_bits = 24, .size_bits = 8 }, { OPA_PATH_REC_FIELD(mtu_selector), .offset_words = 15, .offset_bits = 0, .size_bits = 2 }, { OPA_PATH_REC_FIELD(mtu), .offset_words = 15, .offset_bits = 2, .size_bits = 6 }, { OPA_PATH_REC_FIELD(rate_selector), .offset_words = 15, .offset_bits = 8, .size_bits = 2 }, { OPA_PATH_REC_FIELD(rate), .offset_words = 15, .offset_bits = 10, .size_bits = 6 }, { OPA_PATH_REC_FIELD(packet_life_time_selector), .offset_words = 15, .offset_bits = 16, .size_bits = 2 }, { OPA_PATH_REC_FIELD(packet_life_time), .offset_words = 15, .offset_bits = 18, .size_bits = 6 }, { OPA_PATH_REC_FIELD(preference), .offset_words = 15, .offset_bits = 24, .size_bits = 8 }, }; #define MCMEMBER_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field), \ .struct_size_bytes = sizeof_field(struct ib_sa_mcmember_rec, field), \ .field_name = "sa_mcmember_rec:" #field static const struct ib_field mcmember_rec_table[] = { { MCMEMBER_REC_FIELD(mgid), .offset_words = 0, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(port_gid), .offset_words = 4, .offset_bits = 0, .size_bits = 128 }, { MCMEMBER_REC_FIELD(qkey), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { MCMEMBER_REC_FIELD(mlid), .offset_words = 9, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(mtu_selector), .offset_words = 9, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(mtu), .offset_words = 9, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(traffic_class), .offset_words = 9, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(pkey), .offset_words = 10, .offset_bits = 0, .size_bits = 16 }, { MCMEMBER_REC_FIELD(rate_selector), .offset_words = 10, .offset_bits = 16, .size_bits = 2 }, { MCMEMBER_REC_FIELD(rate), .offset_words = 10, .offset_bits = 18, .size_bits = 6 }, { MCMEMBER_REC_FIELD(packet_life_time_selector), .offset_words = 10, .offset_bits = 24, .size_bits = 2 }, { MCMEMBER_REC_FIELD(packet_life_time), .offset_words = 10, .offset_bits = 26, .size_bits = 6 }, { MCMEMBER_REC_FIELD(sl), .offset_words = 11, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(flow_label), .offset_words = 11, .offset_bits = 4, .size_bits = 20 }, { MCMEMBER_REC_FIELD(hop_limit), .offset_words = 11, .offset_bits = 24, .size_bits = 8 }, { MCMEMBER_REC_FIELD(scope), .offset_words = 12, .offset_bits = 0, .size_bits = 4 }, { MCMEMBER_REC_FIELD(join_state), .offset_words = 12, .offset_bits = 4, .size_bits = 4 }, { MCMEMBER_REC_FIELD(proxy_join), .offset_words = 12, .offset_bits = 8, .size_bits = 1 }, { RESERVED, .offset_words = 12, .offset_bits = 9, .size_bits = 23 }, }; #define CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_class_port_info, field), \ .struct_size_bytes = sizeof_field(struct ib_class_port_info, field), \ .field_name = "ib_class_port_info:" #field static const struct ib_field ib_classport_info_rec_table[] = { { CLASSPORTINFO_REC_FIELD(base_version), .offset_words = 0, .offset_bits = 0, .size_bits = 8 }, { CLASSPORTINFO_REC_FIELD(class_version), .offset_words = 0, .offset_bits = 8, .size_bits = 8 }, { CLASSPORTINFO_REC_FIELD(capability_mask), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { CLASSPORTINFO_REC_FIELD(redirect_tcslfl), .offset_words = 6, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_lid), .offset_words = 7, .offset_bits = 0, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(redirect_pkey), .offset_words = 7, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(redirect_qp), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(redirect_qkey), .offset_words = 9, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_gid), .offset_words = 10, .offset_bits = 0, .size_bits = 128 }, { CLASSPORTINFO_REC_FIELD(trap_tcslfl), .offset_words = 14, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_lid), .offset_words = 15, .offset_bits = 0, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(trap_pkey), .offset_words = 15, .offset_bits = 16, .size_bits = 16 }, { CLASSPORTINFO_REC_FIELD(trap_hlqp), .offset_words = 16, .offset_bits = 0, .size_bits = 32 }, { CLASSPORTINFO_REC_FIELD(trap_qkey), .offset_words = 17, .offset_bits = 0, .size_bits = 32 }, }; #define OPA_CLASSPORTINFO_REC_FIELD(field) \ .struct_offset_bytes =\ offsetof(struct opa_class_port_info, field), \ .struct_size_bytes = \ sizeof_field(struct opa_class_port_info, field), \ .field_name = "opa_class_port_info:" #field static const struct ib_field opa_classport_info_rec_table[] = { { OPA_CLASSPORTINFO_REC_FIELD(base_version), .offset_words = 0, .offset_bits = 0, .size_bits = 8 }, { OPA_CLASSPORTINFO_REC_FIELD(class_version), .offset_words = 0, .offset_bits = 8, .size_bits = 8 }, { OPA_CLASSPORTINFO_REC_FIELD(cap_mask), .offset_words = 0, .offset_bits = 16, .size_bits = 16 }, { OPA_CLASSPORTINFO_REC_FIELD(cap_mask2_resp_time), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_gid), .offset_words = 2, .offset_bits = 0, .size_bits = 128 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_tc_fl), .offset_words = 6, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_lid), .offset_words = 7, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_sl_qp), .offset_words = 8, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_qkey), .offset_words = 9, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_gid), .offset_words = 10, .offset_bits = 0, .size_bits = 128 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_tc_fl), .offset_words = 14, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_lid), .offset_words = 15, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_hl_qp), .offset_words = 16, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_qkey), .offset_words = 17, .offset_bits = 0, .size_bits = 32 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_pkey), .offset_words = 18, .offset_bits = 0, .size_bits = 16 }, { OPA_CLASSPORTINFO_REC_FIELD(redirect_pkey), .offset_words = 18, .offset_bits = 16, .size_bits = 16 }, { OPA_CLASSPORTINFO_REC_FIELD(trap_sl_rsvd), .offset_words = 19, .offset_bits = 0, .size_bits = 8 }, { RESERVED, .offset_words = 19, .offset_bits = 8, .size_bits = 24 }, }; #define GUIDINFO_REC_FIELD(field) \ .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \ .struct_size_bytes = sizeof_field(struct ib_sa_guidinfo_rec, field), \ .field_name = "sa_guidinfo_rec:" #field static const struct ib_field guidinfo_rec_table[] = { { GUIDINFO_REC_FIELD(lid), .offset_words = 0, .offset_bits = 0, .size_bits = 16 }, { GUIDINFO_REC_FIELD(block_num), .offset_words = 0, .offset_bits = 16, .size_bits = 8 }, { GUIDINFO_REC_FIELD(res1), .offset_words = 0, .offset_bits = 24, .size_bits = 8 }, { GUIDINFO_REC_FIELD(res2), .offset_words = 1, .offset_bits = 0, .size_bits = 32 }, { GUIDINFO_REC_FIELD(guid_info_list), .offset_words = 2, .offset_bits = 0, .size_bits = 512 }, }; #define RDMA_PRIMARY_PATH_MAX_REC_NUM 3 static inline void ib_sa_disable_local_svc(struct ib_sa_query *query) { query->flags &= ~IB_SA_ENABLE_LOCAL_SERVICE; } static inline int ib_sa_query_cancelled(struct ib_sa_query *query) { return (query->flags & IB_SA_CANCEL); } static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, struct ib_sa_query *query) { struct sa_path_rec *sa_rec = query->mad_buf->context[1]; struct ib_sa_mad *mad = query->mad_buf->mad; ib_sa_comp_mask comp_mask = mad->sa_hdr.comp_mask; u16 val16; u64 val64; struct rdma_ls_resolve_header *header; query->mad_buf->context[1] = NULL; /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); strscpy_pad(header->device_name, dev_name(&query->port->agent->device->dev), LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && sa_rec->reversible != 0) query->path_use = LS_RESOLVE_PATH_USE_ALL; else query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; header->path_use = query->path_use; /* Now build the attributes */ if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) { val64 = be64_to_cpu(sa_rec->service_id); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SERVICE_ID, sizeof(val64), &val64); } if (comp_mask & IB_SA_PATH_REC_DGID) nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_DGID, sizeof(sa_rec->dgid), &sa_rec->dgid); if (comp_mask & IB_SA_PATH_REC_SGID) nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_SGID, sizeof(sa_rec->sgid), &sa_rec->sgid); if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS) nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_TCLASS, sizeof(sa_rec->traffic_class), &sa_rec->traffic_class); if (comp_mask & IB_SA_PATH_REC_PKEY) { val16 = be16_to_cpu(sa_rec->pkey); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_PKEY, sizeof(val16), &val16); } if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) { val16 = be16_to_cpu(sa_rec->qos_class); nla_put(skb, RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_QOS_CLASS, sizeof(val16), &val16); } } static int ib_nl_get_path_rec_attrs_len(ib_sa_comp_mask comp_mask) { int len = 0; if (comp_mask & IB_SA_PATH_REC_SERVICE_ID) len += nla_total_size(sizeof(u64)); if (comp_mask & IB_SA_PATH_REC_DGID) len += nla_total_size(sizeof(struct rdma_nla_ls_gid)); if (comp_mask & IB_SA_PATH_REC_SGID) len += nla_total_size(sizeof(struct rdma_nla_ls_gid)); if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS) len += nla_total_size(sizeof(u8)); if (comp_mask & IB_SA_PATH_REC_PKEY) len += nla_total_size(sizeof(u16)); if (comp_mask & IB_SA_PATH_REC_QOS_CLASS) len += nla_total_size(sizeof(u16)); /* * Make sure that at least some of the required comp_mask bits are * set. */ if (WARN_ON(len == 0)) return len; /* Add the family header */ len += NLMSG_ALIGN(sizeof(struct rdma_ls_resolve_header)); return len; } static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; void *data; struct ib_sa_mad *mad; int len; unsigned long flags; unsigned long delay; gfp_t gfp_flag; int ret; INIT_LIST_HEAD(&query->list); query->seq = (u32)atomic_inc_return(&ib_nl_sa_request_seq); mad = query->mad_buf->mad; len = ib_nl_get_path_rec_attrs_len(mad->sa_hdr.comp_mask); if (len <= 0) return -EMSGSIZE; skb = nlmsg_new(len, gfp_mask); if (!skb) return -ENOMEM; /* Put nlmsg header only for now */ data = ibnl_put_msg(skb, &nlh, query->seq, 0, RDMA_NL_LS, RDMA_NL_LS_OP_RESOLVE, NLM_F_REQUEST); if (!data) { nlmsg_free(skb); return -EMSGSIZE; } /* Add attributes */ ib_nl_set_path_rec_attrs(skb, query); /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); gfp_flag = ((gfp_mask & GFP_ATOMIC) == GFP_ATOMIC) ? GFP_ATOMIC : GFP_NOWAIT; spin_lock_irqsave(&ib_nl_request_lock, flags); ret = rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, gfp_flag); if (ret) goto out; /* Put the request on the list.*/ delay = msecs_to_jiffies(sa_local_svc_timeout_ms); query->timeout = delay + jiffies; list_add_tail(&query->list, &ib_nl_request_list); /* Start the timeout if this is the only request */ if (ib_nl_request_list.next == &query->list) queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); out: spin_unlock_irqrestore(&ib_nl_request_lock, flags); return ret; } static int ib_nl_cancel_request(struct ib_sa_query *query) { unsigned long flags; struct ib_sa_query *wait_query; int found = 0; spin_lock_irqsave(&ib_nl_request_lock, flags); list_for_each_entry(wait_query, &ib_nl_request_list, list) { /* Let the timeout to take care of the callback */ if (query == wait_query) { query->flags |= IB_SA_CANCEL; query->timeout = jiffies; list_move(&query->list, &ib_nl_request_list); found = 1; mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, 1); break; } } spin_unlock_irqrestore(&ib_nl_request_lock, flags); return found; } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc); static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, const struct nlmsghdr *nlh) { struct sa_path_rec recs[RDMA_PRIMARY_PATH_MAX_REC_NUM]; struct ib_sa_path_query *path_query; struct ib_path_rec_data *rec_data; struct ib_mad_send_wc mad_send_wc; const struct nlattr *head, *curr; struct ib_sa_mad *mad = NULL; int len, rem, status = -EIO; unsigned int num_prs = 0; u32 mask = 0; if (!query->callback) goto out; path_query = container_of(query, struct ib_sa_path_query, sa_query); mad = query->mad_buf->mad; head = (const struct nlattr *) nlmsg_data(nlh); len = nlmsg_len(nlh); switch (query->path_use) { case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; break; case LS_RESOLVE_PATH_USE_ALL: mask = IB_PATH_PRIMARY; break; case LS_RESOLVE_PATH_USE_GMP: default: mask = IB_PATH_PRIMARY | IB_PATH_GMP | IB_PATH_BIDIRECTIONAL; break; } nla_for_each_attr(curr, head, len, rem) { if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD) continue; rec_data = nla_data(curr); if ((rec_data->flags & mask) != mask) continue; if ((query->flags & IB_SA_QUERY_OPA) || path_query->conv_pr) { mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; memcpy(mad->data, rec_data->path_rec, sizeof(rec_data->path_rec)); query->callback(query, 0, mad); goto out; } status = 0; ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), rec_data->path_rec, &recs[num_prs]); recs[num_prs].flags = rec_data->flags; recs[num_prs].rec_type = SA_PATH_REC_TYPE_IB; sa_path_set_dmac_zero(&recs[num_prs]); num_prs++; if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM) break; } if (!status) { mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; path_query->callback(status, recs, num_prs, path_query->context); } else query->callback(query, status, mad); out: mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_SUCCESS; send_handler(query->mad_buf->mad_agent, &mad_send_wc); } static void ib_nl_request_timeout(struct work_struct *work) { unsigned long flags; struct ib_sa_query *query; unsigned long delay; struct ib_mad_send_wc mad_send_wc; int ret; spin_lock_irqsave(&ib_nl_request_lock, flags); while (!list_empty(&ib_nl_request_list)) { query = list_entry(ib_nl_request_list.next, struct ib_sa_query, list); if (time_after(query->timeout, jiffies)) { delay = query->timeout - jiffies; if ((long)delay <= 0) delay = 1; queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, delay); break; } list_del(&query->list); ib_sa_disable_local_svc(query); /* Hold the lock to protect against query cancellation */ if (ib_sa_query_cancelled(query)) ret = -1; else ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_WR_FLUSH_ERR; spin_unlock_irqrestore(&ib_nl_request_lock, flags); send_handler(query->port->agent, &mad_send_wc); spin_lock_irqsave(&ib_nl_request_lock, flags); } } spin_unlock_irqrestore(&ib_nl_request_lock, flags); } int ib_nl_handle_set_timeout(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { int timeout, delta, abs_delta; const struct nlattr *attr; unsigned long flags; struct ib_sa_query *query; long delay = 0; struct nlattr *tb[LS_NLA_TYPE_MAX]; int ret; if (!(nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk)) return -EPERM; ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_policy, NULL); attr = (const struct nlattr *)tb[LS_NLA_TYPE_TIMEOUT]; if (ret || !attr) goto settimeout_out; timeout = *(int *) nla_data(attr); if (timeout < IB_SA_LOCAL_SVC_TIMEOUT_MIN) timeout = IB_SA_LOCAL_SVC_TIMEOUT_MIN; if (timeout > IB_SA_LOCAL_SVC_TIMEOUT_MAX) timeout = IB_SA_LOCAL_SVC_TIMEOUT_MAX; delta = timeout - sa_local_svc_timeout_ms; if (delta < 0) abs_delta = -delta; else abs_delta = delta; if (delta != 0) { spin_lock_irqsave(&ib_nl_request_lock, flags); sa_local_svc_timeout_ms = timeout; list_for_each_entry(query, &ib_nl_request_list, list) { if (delta < 0 && abs_delta > query->timeout) query->timeout = 0; else query->timeout += delta; /* Get the new delay from the first entry */ if (!delay) { delay = query->timeout - jiffies; if (delay <= 0) delay = 1; } } if (delay) mod_delayed_work(ib_nl_wq, &ib_nl_timed_work, (unsigned long)delay); spin_unlock_irqrestore(&ib_nl_request_lock, flags); } settimeout_out: return 0; } static inline int ib_nl_is_good_resolve_resp(const struct nlmsghdr *nlh) { struct nlattr *tb[LS_NLA_TYPE_MAX]; int ret; if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return 0; ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_policy, NULL); if (ret) return 0; return 1; } int ib_nl_handle_resolve_resp(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { unsigned long flags; struct ib_sa_query *query = NULL, *iter; struct ib_mad_send_buf *send_buf; struct ib_mad_send_wc mad_send_wc; int ret; if ((nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk)) return -EPERM; spin_lock_irqsave(&ib_nl_request_lock, flags); list_for_each_entry(iter, &ib_nl_request_list, list) { /* * If the query is cancelled, let the timeout routine * take care of it. */ if (nlh->nlmsg_seq == iter->seq) { if (!ib_sa_query_cancelled(iter)) { list_del(&iter->list); query = iter; } break; } } if (!query) { spin_unlock_irqrestore(&ib_nl_request_lock, flags); goto resp_out; } send_buf = query->mad_buf; if (!ib_nl_is_good_resolve_resp(nlh)) { /* if the result is a failure, send out the packet via IB */ ib_sa_disable_local_svc(query); ret = ib_post_send_mad(query->mad_buf, NULL); spin_unlock_irqrestore(&ib_nl_request_lock, flags); if (ret) { mad_send_wc.send_buf = send_buf; mad_send_wc.status = IB_WC_GENERAL_ERR; send_handler(query->port->agent, &mad_send_wc); } } else { spin_unlock_irqrestore(&ib_nl_request_lock, flags); ib_nl_process_good_resolve_rsp(query, nlh); } resp_out: return 0; } static void free_sm_ah(struct kref *kref) { struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); rdma_destroy_ah(sm_ah->ah, 0); kfree(sm_ah); } void ib_sa_register_client(struct ib_sa_client *client) { atomic_set(&client->users, 1); init_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_register_client); void ib_sa_unregister_client(struct ib_sa_client *client) { ib_sa_client_put(client); wait_for_completion(&client->comp); } EXPORT_SYMBOL(ib_sa_unregister_client); /** * ib_sa_cancel_query - try to cancel an SA query * @id:ID of query to cancel * @query:query pointer to cancel * * Try to cancel an SA query. If the id and query don't match up or * the query has already completed, nothing is done. Otherwise the * query is canceled and will complete with a status of -EINTR. */ void ib_sa_cancel_query(int id, struct ib_sa_query *query) { unsigned long flags; struct ib_mad_send_buf *mad_buf; xa_lock_irqsave(&queries, flags); if (xa_load(&queries, id) != query) { xa_unlock_irqrestore(&queries, flags); return; } mad_buf = query->mad_buf; xa_unlock_irqrestore(&queries, flags); /* * If the query is still on the netlink request list, schedule * it to be cancelled by the timeout routine. Otherwise, it has been * sent to the MAD layer and has to be cancelled from there. */ if (!ib_nl_cancel_request(query)) ib_cancel_mad(mad_buf); } EXPORT_SYMBOL(ib_sa_cancel_query); static u8 get_src_path_mask(struct ib_device *device, u32 port_num) { struct ib_sa_device *sa_dev; struct ib_sa_port *port; unsigned long flags; u8 src_path_mask; sa_dev = ib_get_client_data(device, &sa_client); if (!sa_dev) return 0x7f; port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->ah_lock, flags); src_path_mask = port->sm_ah ? port->sm_ah->src_path_mask : 0x7f; spin_unlock_irqrestore(&port->ah_lock, flags); return src_path_mask; } static int init_ah_attr_grh_fields(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr) { enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec); if (!gid_attr) { gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type, port_num, NULL); if (IS_ERR(gid_attr)) return PTR_ERR(gid_attr); } else rdma_hold_gid_attr(gid_attr); rdma_move_grh_sgid_attr(ah_attr, &rec->dgid, be32_to_cpu(rec->flow_label), rec->hop_limit, rec->traffic_class, gid_attr); return 0; } /** * ib_init_ah_attr_from_path - Initialize address handle attributes based on * an SA path record. * @device: Device associated ah attributes initialization. * @port_num: Port on the specified device. * @rec: path record entry to use for ah attributes initialization. * @ah_attr: address handle attributes to initialization from path record. * @gid_attr: SGID attribute to consider during initialization. * * When ib_init_ah_attr_from_path() returns success, * (a) for IB link layer it optionally contains a reference to SGID attribute * when GRH is present for IB link layer. * (b) for RoCE link layer it contains a reference to SGID attribute. * User must invoke rdma_destroy_ah_attr() to release reference to SGID * attributes which are initialized using ib_init_ah_attr_from_path(). */ int ib_init_ah_attr_from_path(struct ib_device *device, u32 port_num, struct sa_path_rec *rec, struct rdma_ah_attr *ah_attr, const struct ib_gid_attr *gid_attr) { int ret = 0; memset(ah_attr, 0, sizeof(*ah_attr)); ah_attr->type = rdma_ah_find_type(device, port_num); rdma_ah_set_sl(ah_attr, rec->sl); rdma_ah_set_port_num(ah_attr, port_num); rdma_ah_set_static_rate(ah_attr, rec->rate); if (sa_path_is_roce(rec)) { ret = roce_resolve_route_from_path(rec, gid_attr); if (ret) return ret; memcpy(ah_attr->roce.dmac, sa_path_get_dmac(rec), ETH_ALEN); } else { rdma_ah_set_dlid(ah_attr, be32_to_cpu(sa_path_get_dlid(rec))); if (sa_path_is_opa(rec) && rdma_ah_get_dlid(ah_attr) == be16_to_cpu(IB_LID_PERMISSIVE)) rdma_ah_set_make_grd(ah_attr, true); rdma_ah_set_path_bits(ah_attr, be32_to_cpu(sa_path_get_slid(rec)) & get_src_path_mask(device, port_num)); } if (rec->hop_limit > 0 || sa_path_is_roce(rec)) ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr, gid_attr); return ret; } EXPORT_SYMBOL(ib_init_ah_attr_from_path); static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask) { struct rdma_ah_attr ah_attr; unsigned long flags; spin_lock_irqsave(&query->port->ah_lock, flags); if (!query->port->sm_ah) { spin_unlock_irqrestore(&query->port->ah_lock, flags); return -EAGAIN; } kref_get(&query->port->sm_ah->ref); query->sm_ah = query->port->sm_ah; spin_unlock_irqrestore(&query->port->ah_lock, flags); /* * Always check if sm_ah has valid dlid assigned, * before querying for class port info */ if ((rdma_query_ah(query->sm_ah->ah, &ah_attr) < 0) || !rdma_is_valid_unicast_lid(&ah_attr)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -EAGAIN; } query->mad_buf = ib_create_send_mad(query->port->agent, 1, query->sm_ah->pkey_index, 0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA, gfp_mask, ((query->flags & IB_SA_QUERY_OPA) ? OPA_MGMT_BASE_VERSION : IB_MGMT_BASE_VERSION)); if (IS_ERR(query->mad_buf)) { kref_put(&query->sm_ah->ref, free_sm_ah); return -ENOMEM; } query->mad_buf->ah = query->sm_ah->ah; return 0; } static void free_mad(struct ib_sa_query *query) { ib_free_send_mad(query->mad_buf); kref_put(&query->sm_ah->ref, free_sm_ah); } static void init_mad(struct ib_sa_query *query, struct ib_mad_agent *agent) { struct ib_sa_mad *mad = query->mad_buf->mad; unsigned long flags; memset(mad, 0, sizeof *mad); if (query->flags & IB_SA_QUERY_OPA) { mad->mad_hdr.base_version = OPA_MGMT_BASE_VERSION; mad->mad_hdr.class_version = OPA_SA_CLASS_VERSION; } else { mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION; mad->mad_hdr.class_version = IB_SA_CLASS_VERSION; } mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM; spin_lock_irqsave(&tid_lock, flags); mad->mad_hdr.tid = cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++); spin_unlock_irqrestore(&tid_lock, flags); } static int send_mad(struct ib_sa_query *query, unsigned long timeout_ms, gfp_t gfp_mask) { unsigned long flags; int ret, id; const int nmbr_sa_query_retries = 10; xa_lock_irqsave(&queries, flags); ret = __xa_alloc(&queries, &id, query, xa_limit_32b, gfp_mask); xa_unlock_irqrestore(&queries, flags); if (ret < 0) return ret; query->mad_buf->timeout_ms = timeout_ms / nmbr_sa_query_retries; query->mad_buf->retries = nmbr_sa_query_retries; if (!query->mad_buf->timeout_ms) { /* Special case, very small timeout_ms */ query->mad_buf->timeout_ms = 1; query->mad_buf->retries = timeout_ms; } query->mad_buf->context[0] = query; query->id = id; if ((query->flags & IB_SA_ENABLE_LOCAL_SERVICE) && (!(query->flags & IB_SA_QUERY_OPA))) { if (rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) { if (!ib_nl_make_request(query, gfp_mask)) return id; } ib_sa_disable_local_svc(query); } ret = ib_post_send_mad(query->mad_buf, NULL); if (ret) { xa_lock_irqsave(&queries, flags); __xa_erase(&queries, id); xa_unlock_irqrestore(&queries, flags); } /* * It's not safe to dereference query any more, because the * send may already have completed and freed the query in * another context. */ return ret ? ret : id; } void ib_sa_unpack_path(void *attribute, struct sa_path_rec *rec) { ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), attribute, rec); } EXPORT_SYMBOL(ib_sa_unpack_path); void ib_sa_pack_path(struct sa_path_rec *rec, void *attribute) { ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, attribute); } EXPORT_SYMBOL(ib_sa_pack_path); static bool ib_sa_opa_pathrecord_support(struct ib_sa_client *client, struct ib_sa_device *sa_dev, u32 port_num) { struct ib_sa_port *port; unsigned long flags; bool ret = false; port = &sa_dev->port[port_num - sa_dev->start_port]; spin_lock_irqsave(&port->classport_lock, flags); if (!port->classport_info.valid) goto ret; if (port->classport_info.data.type == RDMA_CLASS_PORT_INFO_OPA) ret = opa_get_cpi_capmask2(&port->classport_info.data.opa) & OPA_CLASS_PORT_INFO_PR_SUPPORT; ret: spin_unlock_irqrestore(&port->classport_lock, flags); return ret; } enum opa_pr_supported { PR_NOT_SUPPORTED, PR_OPA_SUPPORTED, PR_IB_SUPPORTED }; /* * opa_pr_query_possible - Check if current PR query can be an OPA query. * * Returns PR_NOT_SUPPORTED if a path record query is not * possible, PR_OPA_SUPPORTED if an OPA path record query * is possible and PR_IB_SUPPORTED if an IB path record * query is possible. */ static int opa_pr_query_possible(struct ib_sa_client *client, struct ib_sa_device *sa_dev, struct ib_device *device, u32 port_num) { struct ib_port_attr port_attr; if (ib_query_port(device, port_num, &port_attr)) return PR_NOT_SUPPORTED; if (ib_sa_opa_pathrecord_support(client, sa_dev, port_num)) return PR_OPA_SUPPORTED; if (port_attr.lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) return PR_NOT_SUPPORTED; else return PR_IB_SUPPORTED; } static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); struct sa_path_rec rec = {}; if (!mad) { query->callback(status, NULL, 0, query->context); return; } if (sa_query->flags & IB_SA_QUERY_OPA) { ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_OPA; query->callback(status, &rec, 1, query->context); return; } ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), mad->data, &rec); rec.rec_type = SA_PATH_REC_TYPE_IB; sa_path_set_dmac_zero(&rec); if (query->conv_pr) { struct sa_path_rec opa; memset(&opa, 0, sizeof(struct sa_path_rec)); sa_convert_path_ib_to_opa(&opa, &rec); query->callback(status, &opa, 1, query->context); } else { query->callback(status, &rec, 1, query->context); } } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); kfree(query->conv_pr); kfree(query); } /** * ib_sa_path_rec_get - Start a Path get query * @client:SA client * @device:device to send query on * @port_num: port number to send query on * @rec:Path Record to send in query * @comp_mask:component mask to send in query * @timeout_ms:time to wait for response * @gfp_mask:GFP mask to use for internal allocations * @callback:function called when query completes, times out or is * canceled * @context:opaque user context passed to callback * @sa_query:query context, used to cancel query * * Send a Path Record Get query to the SA to look up a path. The * callback function will be called when the query completes (or * fails); status is 0 for a successful response, -EINTR if the query * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error * occurred sending the query. The resp parameter of the callback is * only valid if status is 0. * * If the return value of ib_sa_path_rec_get() is negative, it is an * error code. Otherwise it is a query ID that can be used to cancel * the query. */ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct sa_path_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, unsigned int num_paths, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_path_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; enum opa_pr_supported status; int ret; if (!sa_dev) return -ENODEV; if ((rec->rec_type != SA_PATH_REC_TYPE_IB) && (rec->rec_type != SA_PATH_REC_TYPE_OPA)) return -EINVAL; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; if (rec->rec_type == SA_PATH_REC_TYPE_OPA) { status = opa_pr_query_possible(client, sa_dev, device, port_num); if (status == PR_NOT_SUPPORTED) { ret = -EINVAL; goto err1; } else if (status == PR_OPA_SUPPORTED) { query->sa_query.flags |= IB_SA_QUERY_OPA; } else { query->conv_pr = kmalloc(sizeof(*query->conv_pr), gfp_mask); if (!query->conv_pr) { ret = -ENOMEM; goto err1; } } } ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err2; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL; query->sa_query.release = ib_sa_path_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC); mad->sa_hdr.comp_mask = comp_mask; if (query->sa_query.flags & IB_SA_QUERY_OPA) { ib_pack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), rec, mad->data); } else if (query->conv_pr) { sa_convert_path_opa_to_ib(query->conv_pr, rec); ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), query->conv_pr, mad->data); } else { ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data); } *sa_query = &query->sa_query; query->sa_query.flags |= IB_SA_ENABLE_LOCAL_SERVICE; query->sa_query.mad_buf->context[1] = (query->conv_pr) ? query->conv_pr : rec; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err3; return ret; err3: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err2: kfree(query->conv_pr); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = container_of(sa_query, struct ib_sa_mcmember_query, sa_query); if (mad) { struct ib_sa_mcmember_rec rec; ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query)); } int ib_sa_mcmember_rec_query(struct ib_sa_client *client, struct ib_device *device, u32 port_num, u8 method, struct ib_sa_mcmember_rec *rec, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_mcmember_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_mcmember_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL; query->sa_query.release = ib_sa_mcmember_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = container_of(sa_query, struct ib_sa_guidinfo_query, sa_query); if (mad) { struct ib_sa_guidinfo_rec rec; ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), mad->data, &rec); query->callback(status, &rec, query->context); } else query->callback(status, NULL, query->context); } static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query)); } int ib_sa_guid_info_rec_query(struct ib_sa_client *client, struct ib_device *device, u32 port_num, struct ib_sa_guidinfo_rec *rec, ib_sa_comp_mask comp_mask, u8 method, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct ib_sa_guidinfo_rec *resp, void *context), void *context, struct ib_sa_query **sa_query) { struct ib_sa_guidinfo_query *query; struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client); struct ib_sa_port *port; struct ib_mad_agent *agent; struct ib_sa_mad *mad; int ret; if (!sa_dev) return -ENODEV; if (method != IB_MGMT_METHOD_GET && method != IB_MGMT_METHOD_SET && method != IB_SA_METHOD_DELETE) { return -EINVAL; } port = &sa_dev->port[port_num - sa_dev->start_port]; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err1; ib_sa_client_get(client); query->sa_query.client = client; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL; query->sa_query.release = ib_sa_guidinfo_rec_release; mad->mad_hdr.method = method; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC); mad->sa_hdr.comp_mask = comp_mask; ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec, mad->data); *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err2; return ret; err2: *sa_query = NULL; ib_sa_client_put(query->sa_query.client); free_mad(&query->sa_query); err1: kfree(query); return ret; } EXPORT_SYMBOL(ib_sa_guid_info_rec_query); struct ib_classport_info_context { struct completion done; struct ib_sa_query *sa_query; }; static void ib_classportinfo_cb(void *context) { struct ib_classport_info_context *cb_ctx = context; complete(&cb_ctx->done); } static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, int status, struct ib_sa_mad *mad) { unsigned long flags; struct ib_sa_classport_info_query *query = container_of(sa_query, struct ib_sa_classport_info_query, sa_query); struct ib_sa_classport_cache *info = &sa_query->port->classport_info; if (mad) { if (sa_query->flags & IB_SA_QUERY_OPA) { struct opa_class_port_info rec; ib_unpack(opa_classport_info_rec_table, ARRAY_SIZE(opa_classport_info_rec_table), mad->data, &rec); spin_lock_irqsave(&sa_query->port->classport_lock, flags); if (!status && !info->valid) { memcpy(&info->data.opa, &rec, sizeof(info->data.opa)); info->valid = true; info->data.type = RDMA_CLASS_PORT_INFO_OPA; } spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); } else { struct ib_class_port_info rec; ib_unpack(ib_classport_info_rec_table, ARRAY_SIZE(ib_classport_info_rec_table), mad->data, &rec); spin_lock_irqsave(&sa_query->port->classport_lock, flags); if (!status && !info->valid) { memcpy(&info->data.ib, &rec, sizeof(info->data.ib)); info->valid = true; info->data.type = RDMA_CLASS_PORT_INFO_IB; } spin_unlock_irqrestore(&sa_query->port->classport_lock, flags); } } query->callback(query->context); } static void ib_sa_classport_info_rec_release(struct ib_sa_query *sa_query) { kfree(container_of(sa_query, struct ib_sa_classport_info_query, sa_query)); } static int ib_sa_classport_info_rec_query(struct ib_sa_port *port, unsigned long timeout_ms, void (*callback)(void *context), void *context, struct ib_sa_query **sa_query) { struct ib_mad_agent *agent; struct ib_sa_classport_info_query *query; struct ib_sa_mad *mad; gfp_t gfp_mask = GFP_KERNEL; int ret; agent = port->agent; query = kzalloc(sizeof(*query), gfp_mask); if (!query) return -ENOMEM; query->sa_query.port = port; query->sa_query.flags |= rdma_cap_opa_ah(port->agent->device, port->port_num) ? IB_SA_QUERY_OPA : 0; ret = alloc_mad(&query->sa_query, gfp_mask); if (ret) goto err_free; query->callback = callback; query->context = context; mad = query->sa_query.mad_buf->mad; init_mad(&query->sa_query, agent); query->sa_query.callback = ib_sa_classport_info_rec_callback; query->sa_query.release = ib_sa_classport_info_rec_release; mad->mad_hdr.method = IB_MGMT_METHOD_GET; mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_CLASS_PORTINFO); mad->sa_hdr.comp_mask = 0; *sa_query = &query->sa_query; ret = send_mad(&query->sa_query, timeout_ms, gfp_mask); if (ret < 0) goto err_free_mad; return ret; err_free_mad: *sa_query = NULL; free_mad(&query->sa_query); err_free: kfree(query); return ret; } static void update_ib_cpi(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, ib_cpi_work.work); struct ib_classport_info_context *cb_context; unsigned long flags; int ret; /* If the classport info is valid, nothing * to do here. */ spin_lock_irqsave(&port->classport_lock, flags); if (port->classport_info.valid) { spin_unlock_irqrestore(&port->classport_lock, flags); return; } spin_unlock_irqrestore(&port->classport_lock, flags); cb_context = kmalloc(sizeof(*cb_context), GFP_KERNEL); if (!cb_context) goto err_nomem; init_completion(&cb_context->done); ret = ib_sa_classport_info_rec_query(port, 3000, ib_classportinfo_cb, cb_context, &cb_context->sa_query); if (ret < 0) goto free_cb_err; wait_for_completion(&cb_context->done); free_cb_err: kfree(cb_context); spin_lock_irqsave(&port->classport_lock, flags); /* If the classport info is still not valid, the query should have * failed for some reason. Retry issuing the query */ if (!port->classport_info.valid) { port->classport_info.retry_cnt++; if (port->classport_info.retry_cnt <= IB_SA_CPI_MAX_RETRY_CNT) { unsigned long delay = msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT); queue_delayed_work(ib_wq, &port->ib_cpi_work, delay); } } spin_unlock_irqrestore(&port->classport_lock, flags); err_nomem: return; } static void send_handler(struct ib_mad_agent *agent, struct ib_mad_send_wc *mad_send_wc) { struct ib_sa_query *query = mad_send_wc->send_buf->context[0]; unsigned long flags; if (query->callback) switch (mad_send_wc->status) { case IB_WC_SUCCESS: /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: query->callback(query, -ETIMEDOUT, NULL); break; case IB_WC_WR_FLUSH_ERR: query->callback(query, -EINTR, NULL); break; default: query->callback(query, -EIO, NULL); break; } xa_lock_irqsave(&queries, flags); __xa_erase(&queries, query->id); xa_unlock_irqrestore(&queries, flags); free_mad(query); if (query->client) ib_sa_client_put(query->client); query->release(query); } static void recv_handler(struct ib_mad_agent *mad_agent, struct ib_mad_send_buf *send_buf, struct ib_mad_recv_wc *mad_recv_wc) { struct ib_sa_query *query; if (!send_buf) return; query = send_buf->context[0]; if (query->callback) { if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? -EINVAL : 0, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else query->callback(query, -EIO, NULL); } ib_free_recv_mad(mad_recv_wc); } static void update_sm_ah(struct work_struct *work) { struct ib_sa_port *port = container_of(work, struct ib_sa_port, update_task); struct ib_sa_sm_ah *new_ah; struct ib_port_attr port_attr; struct rdma_ah_attr ah_attr; bool grh_required; if (ib_query_port(port->agent->device, port->port_num, &port_attr)) { pr_warn("Couldn't query port\n"); return; } new_ah = kmalloc(sizeof(*new_ah), GFP_KERNEL); if (!new_ah) return; kref_init(&new_ah->ref); new_ah->src_path_mask = (1 << port_attr.lmc) - 1; new_ah->pkey_index = 0; if (ib_find_pkey(port->agent->device, port->port_num, IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index)) pr_err("Couldn't find index for default PKey\n"); memset(&ah_attr, 0, sizeof(ah_attr)); ah_attr.type = rdma_ah_find_type(port->agent->device, port->port_num); rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid); rdma_ah_set_sl(&ah_attr, port_attr.sm_sl); rdma_ah_set_port_num(&ah_attr, port->port_num); grh_required = rdma_is_grh_required(port->agent->device, port->port_num); /* * The OPA sm_lid of 0xFFFF needs special handling so that it can be * differentiated from a permissive LID of 0xFFFF. We set the * grh_required flag here so the SA can program the DGID in the * address handle appropriately */ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA && (grh_required || port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))) rdma_ah_set_make_grd(&ah_attr, true); if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) { rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH); rdma_ah_set_subnet_prefix(&ah_attr, cpu_to_be64(port_attr.subnet_prefix)); rdma_ah_set_interface_id(&ah_attr, cpu_to_be64(IB_SA_WELL_KNOWN_GUID)); } new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE); if (IS_ERR(new_ah->ah)) { pr_warn("Couldn't create new SM AH\n"); kfree(new_ah); return; } spin_lock_irq(&port->ah_lock); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = new_ah; spin_unlock_irq(&port->ah_lock); } static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event) { if (event->event == IB_EVENT_PORT_ERR || event->event == IB_EVENT_PORT_ACTIVE || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PKEY_CHANGE || event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER) { unsigned long flags; struct ib_sa_device *sa_dev = container_of(handler, typeof(*sa_dev), event_handler); u32 port_num = event->element.port_num - sa_dev->start_port; struct ib_sa_port *port = &sa_dev->port[port_num]; if (!rdma_cap_ib_sa(handler->device, port->port_num)) return; spin_lock_irqsave(&port->ah_lock, flags); if (port->sm_ah) kref_put(&port->sm_ah->ref, free_sm_ah); port->sm_ah = NULL; spin_unlock_irqrestore(&port->ah_lock, flags); if (event->event == IB_EVENT_SM_CHANGE || event->event == IB_EVENT_CLIENT_REREGISTER || event->event == IB_EVENT_LID_CHANGE || event->event == IB_EVENT_PORT_ACTIVE) { unsigned long delay = msecs_to_jiffies(IB_SA_CPI_RETRY_WAIT); spin_lock_irqsave(&port->classport_lock, flags); port->classport_info.valid = false; port->classport_info.retry_cnt = 0; spin_unlock_irqrestore(&port->classport_lock, flags); queue_delayed_work(ib_wq, &port->ib_cpi_work, delay); } queue_work(ib_wq, &sa_dev->port[port_num].update_task); } } static int ib_sa_add_one(struct ib_device *device) { struct ib_sa_device *sa_dev; int s, e, i; int count = 0; int ret; s = rdma_start_port(device); e = rdma_end_port(device); sa_dev = kzalloc(struct_size(sa_dev, port, size_add(size_sub(e, s), 1)), GFP_KERNEL); if (!sa_dev) return -ENOMEM; sa_dev->start_port = s; sa_dev->end_port = e; for (i = 0; i <= e - s; ++i) { spin_lock_init(&sa_dev->port[i].ah_lock); if (!rdma_cap_ib_sa(device, i + 1)) continue; sa_dev->port[i].sm_ah = NULL; sa_dev->port[i].port_num = i + s; spin_lock_init(&sa_dev->port[i].classport_lock); sa_dev->port[i].classport_info.valid = false; sa_dev->port[i].agent = ib_register_mad_agent(device, i + s, IB_QPT_GSI, NULL, 0, send_handler, recv_handler, sa_dev, 0); if (IS_ERR(sa_dev->port[i].agent)) { ret = PTR_ERR(sa_dev->port[i].agent); goto err; } INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah); INIT_DELAYED_WORK(&sa_dev->port[i].ib_cpi_work, update_ib_cpi); count++; } if (!count) { ret = -EOPNOTSUPP; goto free; } ib_set_client_data(device, &sa_client, sa_dev); /* * We register our event handler after everything is set up, * and then update our cached info after the event handler is * registered to avoid any problems if a port changes state * during our initialization. */ INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event); ib_register_event_handler(&sa_dev->event_handler); for (i = 0; i <= e - s; ++i) { if (rdma_cap_ib_sa(device, i + 1)) update_sm_ah(&sa_dev->port[i].update_task); } return 0; err: while (--i >= 0) { if (rdma_cap_ib_sa(device, i + 1)) ib_unregister_mad_agent(sa_dev->port[i].agent); } free: kfree(sa_dev); return ret; } static void ib_sa_remove_one(struct ib_device *device, void *client_data) { struct ib_sa_device *sa_dev = client_data; int i; ib_unregister_event_handler(&sa_dev->event_handler); flush_workqueue(ib_wq); for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) { if (rdma_cap_ib_sa(device, i + 1)) { cancel_delayed_work_sync(&sa_dev->port[i].ib_cpi_work); ib_unregister_mad_agent(sa_dev->port[i].agent); if (sa_dev->port[i].sm_ah) kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah); } } kfree(sa_dev); } int ib_sa_init(void) { int ret; get_random_bytes(&tid, sizeof tid); atomic_set(&ib_nl_sa_request_seq, 0); ret = ib_register_client(&sa_client); if (ret) { pr_err("Couldn't register ib_sa client\n"); goto err1; } ret = mcast_init(); if (ret) { pr_err("Couldn't initialize multicast handling\n"); goto err2; } ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM); if (!ib_nl_wq) { ret = -ENOMEM; goto err3; } INIT_DELAYED_WORK(&ib_nl_timed_work, ib_nl_request_timeout); return 0; err3: mcast_cleanup(); err2: ib_unregister_client(&sa_client); err1: return ret; } void ib_sa_cleanup(void) { cancel_delayed_work(&ib_nl_timed_work); destroy_workqueue(ib_nl_wq); mcast_cleanup(); ib_unregister_client(&sa_client); WARN_ON(!xa_empty(&queries)); }
3 5 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_KSTRTOX_H #define _LINUX_KSTRTOX_H #include <linux/compiler.h> #include <linux/types.h> /* Internal, do not use. */ int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); int __must_check _kstrtol(const char *s, unsigned int base, long *res); int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); int __must_check kstrtoll(const char *s, unsigned int base, long long *res); /** * kstrtoul - convert a string to an unsigned long * @s: The start of the string. The string must be null-terminated, and may also * include a single newline before its terminating null. The first character * may also be a plus sign, but not a minus sign. * @base: The number base to use. The maximum supported base is 16. If base is * given as 0, then the base of the string is automatically detected with the * conventional semantics - If it begins with 0x the number will be parsed as a * hexadecimal (case insensitive), if it otherwise begins with 0, it will be * parsed as an octal number. Otherwise it will be parsed as a decimal. * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtoul(). Return code must be checked. */ static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) { /* * We want to shortcut function call, but * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. */ if (sizeof(unsigned long) == sizeof(unsigned long long) && __alignof__(unsigned long) == __alignof__(unsigned long long)) return kstrtoull(s, base, (unsigned long long *)res); else return _kstrtoul(s, base, res); } /** * kstrtol - convert a string to a long * @s: The start of the string. The string must be null-terminated, and may also * include a single newline before its terminating null. The first character * may also be a plus sign or a minus sign. * @base: The number base to use. The maximum supported base is 16. If base is * given as 0, then the base of the string is automatically detected with the * conventional semantics - If it begins with 0x the number will be parsed as a * hexadecimal (case insensitive), if it otherwise begins with 0, it will be * parsed as an octal number. Otherwise it will be parsed as a decimal. * @res: Where to write the result of the conversion on success. * * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. * Preferred over simple_strtol(). Return code must be checked. */ static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) { /* * We want to shortcut function call, but * __builtin_types_compatible_p(long, long long) = 0. */ if (sizeof(long) == sizeof(long long) && __alignof__(long) == __alignof__(long long)) return kstrtoll(s, base, (long long *)res); else return _kstrtol(s, base, res); } int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); int __must_check kstrtoint(const char *s, unsigned int base, int *res); static inline int __must_check kstrtou64(const char *s, unsigned int base, u64 *res) { return kstrtoull(s, base, res); } static inline int __must_check kstrtos64(const char *s, unsigned int base, s64 *res) { return kstrtoll(s, base, res); } static inline int __must_check kstrtou32(const char *s, unsigned int base, u32 *res) { return kstrtouint(s, base, res); } static inline int __must_check kstrtos32(const char *s, unsigned int base, s32 *res) { return kstrtoint(s, base, res); } int __must_check kstrtou16(const char *s, unsigned int base, u16 *res); int __must_check kstrtos16(const char *s, unsigned int base, s16 *res); int __must_check kstrtou8(const char *s, unsigned int base, u8 *res); int __must_check kstrtos8(const char *s, unsigned int base, s8 *res); int __must_check kstrtobool(const char *s, bool *res); int __must_check kstrtoull_from_user(const char __user *s, size_t count, unsigned int base, unsigned long long *res); int __must_check kstrtoll_from_user(const char __user *s, size_t count, unsigned int base, long long *res); int __must_check kstrtoul_from_user(const char __user *s, size_t count, unsigned int base, unsigned long *res); int __must_check kstrtol_from_user(const char __user *s, size_t count, unsigned int base, long *res); int __must_check kstrtouint_from_user(const char __user *s, size_t count, unsigned int base, unsigned int *res); int __must_check kstrtoint_from_user(const char __user *s, size_t count, unsigned int base, int *res); int __must_check kstrtou16_from_user(const char __user *s, size_t count, unsigned int base, u16 *res); int __must_check kstrtos16_from_user(const char __user *s, size_t count, unsigned int base, s16 *res); int __must_check kstrtou8_from_user(const char __user *s, size_t count, unsigned int base, u8 *res); int __must_check kstrtos8_from_user(const char __user *s, size_t count, unsigned int base, s8 *res); int __must_check kstrtobool_from_user(const char __user *s, size_t count, bool *res); static inline int __must_check kstrtou64_from_user(const char __user *s, size_t count, unsigned int base, u64 *res) { return kstrtoull_from_user(s, count, base, res); } static inline int __must_check kstrtos64_from_user(const char __user *s, size_t count, unsigned int base, s64 *res) { return kstrtoll_from_user(s, count, base, res); } static inline int __must_check kstrtou32_from_user(const char __user *s, size_t count, unsigned int base, u32 *res) { return kstrtouint_from_user(s, count, base, res); } static inline int __must_check kstrtos32_from_user(const char __user *s, size_t count, unsigned int base, s32 *res) { return kstrtoint_from_user(s, count, base, res); } /* * Use kstrto<foo> instead. * * NOTE: simple_strto<foo> does not check for the range overflow and, * depending on the input, may give interesting results. * * Use these functions if and only if you cannot use kstrto<foo>, because * the conversion ends on the first non-digit character, which may be far * beyond the supported range. It might be useful to parse the strings like * 10x50 or 12:21 without altering original string or temporary buffer in use. * Keep in mind above caveat. */ extern unsigned long simple_strtoul(const char *,char **,unsigned int); extern unsigned long simple_strntoul(const char *,char **,unsigned int,size_t); extern long simple_strtol(const char *,char **,unsigned int); extern unsigned long long simple_strtoull(const char *,char **,unsigned int); extern long long simple_strtoll(const char *,char **,unsigned int); #endif /* _LINUX_KSTRTOX_H */
18 18 18 17 18 18 18 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 // SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 /******************************************************************************* * * Module Name: utmutex - local mutex support * ******************************************************************************/ #include <acpi/acpi.h> #include "accommon.h" #define _COMPONENT ACPI_UTILITIES ACPI_MODULE_NAME("utmutex") /* Local prototypes */ static acpi_status acpi_ut_create_mutex(acpi_mutex_handle mutex_id); static void acpi_ut_delete_mutex(acpi_mutex_handle mutex_id); /******************************************************************************* * * FUNCTION: acpi_ut_mutex_initialize * * PARAMETERS: None. * * RETURN: Status * * DESCRIPTION: Create the system mutex objects. This includes mutexes, * spin locks, and reader/writer locks. * ******************************************************************************/ acpi_status acpi_ut_mutex_initialize(void) { u32 i; acpi_status status; ACPI_FUNCTION_TRACE(ut_mutex_initialize); /* Create each of the predefined mutex objects */ for (i = 0; i < ACPI_NUM_MUTEX; i++) { status = acpi_ut_create_mutex(i); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } } /* Create the spinlocks for use at interrupt level or for speed */ status = acpi_os_create_lock (&acpi_gbl_gpe_lock); if (ACPI_FAILURE (status)) { return_ACPI_STATUS (status); } status = acpi_os_create_raw_lock(&acpi_gbl_hardware_lock); if (ACPI_FAILURE (status)) { return_ACPI_STATUS (status); } status = acpi_os_create_lock(&acpi_gbl_reference_count_lock); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } /* Mutex for _OSI support */ status = acpi_os_create_mutex(&acpi_gbl_osi_mutex); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } /* Create the reader/writer lock for namespace access */ status = acpi_ut_create_rw_lock(&acpi_gbl_namespace_rw_lock); if (ACPI_FAILURE(status)) { return_ACPI_STATUS(status); } return_ACPI_STATUS(status); } /******************************************************************************* * * FUNCTION: acpi_ut_mutex_terminate * * PARAMETERS: None. * * RETURN: None. * * DESCRIPTION: Delete all of the system mutex objects. This includes mutexes, * spin locks, and reader/writer locks. * ******************************************************************************/ void acpi_ut_mutex_terminate(void) { u32 i; ACPI_FUNCTION_TRACE(ut_mutex_terminate); /* Delete each predefined mutex object */ for (i = 0; i < ACPI_NUM_MUTEX; i++) { acpi_ut_delete_mutex(i); } acpi_os_delete_mutex(acpi_gbl_osi_mutex); /* Delete the spinlocks */ acpi_os_delete_lock(acpi_gbl_gpe_lock); acpi_os_delete_raw_lock(acpi_gbl_hardware_lock); acpi_os_delete_lock(acpi_gbl_reference_count_lock); /* Delete the reader/writer lock */ acpi_ut_delete_rw_lock(&acpi_gbl_namespace_rw_lock); return_VOID; } /******************************************************************************* * * FUNCTION: acpi_ut_create_mutex * * PARAMETERS: mutex_ID - ID of the mutex to be created * * RETURN: Status * * DESCRIPTION: Create a mutex object. * ******************************************************************************/ static acpi_status acpi_ut_create_mutex(acpi_mutex_handle mutex_id) { acpi_status status = AE_OK; ACPI_FUNCTION_TRACE_U32(ut_create_mutex, mutex_id); if (!acpi_gbl_mutex_info[mutex_id].mutex) { status = acpi_os_create_mutex(&acpi_gbl_mutex_info[mutex_id].mutex); acpi_gbl_mutex_info[mutex_id].thread_id = ACPI_MUTEX_NOT_ACQUIRED; acpi_gbl_mutex_info[mutex_id].use_count = 0; } return_ACPI_STATUS(status); } /******************************************************************************* * * FUNCTION: acpi_ut_delete_mutex * * PARAMETERS: mutex_ID - ID of the mutex to be deleted * * RETURN: Status * * DESCRIPTION: Delete a mutex object. * ******************************************************************************/ static void acpi_ut_delete_mutex(acpi_mutex_handle mutex_id) { ACPI_FUNCTION_TRACE_U32(ut_delete_mutex, mutex_id); acpi_os_delete_mutex(acpi_gbl_mutex_info[mutex_id].mutex); acpi_gbl_mutex_info[mutex_id].mutex = NULL; acpi_gbl_mutex_info[mutex_id].thread_id = ACPI_MUTEX_NOT_ACQUIRED; return_VOID; } /******************************************************************************* * * FUNCTION: acpi_ut_acquire_mutex * * PARAMETERS: mutex_ID - ID of the mutex to be acquired * * RETURN: Status * * DESCRIPTION: Acquire a mutex object. * ******************************************************************************/ acpi_status acpi_ut_acquire_mutex(acpi_mutex_handle mutex_id) { acpi_status status; acpi_thread_id this_thread_id; ACPI_FUNCTION_NAME(ut_acquire_mutex); if (mutex_id > ACPI_MAX_MUTEX) { return (AE_BAD_PARAMETER); } this_thread_id = acpi_os_get_thread_id(); #ifdef ACPI_MUTEX_DEBUG { u32 i; /* * Mutex debug code, for internal debugging only. * * Deadlock prevention. Check if this thread owns any mutexes of value * greater than or equal to this one. If so, the thread has violated * the mutex ordering rule. This indicates a coding error somewhere in * the ACPI subsystem code. */ for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) { if (acpi_gbl_mutex_info[i].thread_id == this_thread_id) { if (i == mutex_id) { ACPI_ERROR((AE_INFO, "Mutex [%s] already acquired by this thread [%u]", acpi_ut_get_mutex_name (mutex_id), (u32)this_thread_id)); return (AE_ALREADY_ACQUIRED); } ACPI_ERROR((AE_INFO, "Invalid acquire order: Thread %u owns [%s], wants [%s]", (u32)this_thread_id, acpi_ut_get_mutex_name(i), acpi_ut_get_mutex_name(mutex_id))); return (AE_ACQUIRE_DEADLOCK); } } } #endif ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Thread %u attempting to acquire Mutex [%s]\n", (u32)this_thread_id, acpi_ut_get_mutex_name(mutex_id))); status = acpi_os_acquire_mutex(acpi_gbl_mutex_info[mutex_id].mutex, ACPI_WAIT_FOREVER); if (ACPI_SUCCESS(status)) { ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Thread %u acquired Mutex [%s]\n", (u32)this_thread_id, acpi_ut_get_mutex_name(mutex_id))); acpi_gbl_mutex_info[mutex_id].use_count++; acpi_gbl_mutex_info[mutex_id].thread_id = this_thread_id; } else { ACPI_EXCEPTION((AE_INFO, status, "Thread %u could not acquire Mutex [%s] (0x%X)", (u32)this_thread_id, acpi_ut_get_mutex_name(mutex_id), mutex_id)); } return (status); } /******************************************************************************* * * FUNCTION: acpi_ut_release_mutex * * PARAMETERS: mutex_ID - ID of the mutex to be released * * RETURN: Status * * DESCRIPTION: Release a mutex object. * ******************************************************************************/ acpi_status acpi_ut_release_mutex(acpi_mutex_handle mutex_id) { ACPI_FUNCTION_NAME(ut_release_mutex); ACPI_DEBUG_PRINT((ACPI_DB_MUTEX, "Thread %u releasing Mutex [%s]\n", (u32)acpi_os_get_thread_id(), acpi_ut_get_mutex_name(mutex_id))); if (mutex_id > ACPI_MAX_MUTEX) { return (AE_BAD_PARAMETER); } /* * Mutex must be acquired in order to release it! */ if (acpi_gbl_mutex_info[mutex_id].thread_id == ACPI_MUTEX_NOT_ACQUIRED) { ACPI_ERROR((AE_INFO, "Mutex [%s] (0x%X) is not acquired, cannot release", acpi_ut_get_mutex_name(mutex_id), mutex_id)); return (AE_NOT_ACQUIRED); } #ifdef ACPI_MUTEX_DEBUG { u32 i; /* * Mutex debug code, for internal debugging only. * * Deadlock prevention. Check if this thread owns any mutexes of value * greater than this one. If so, the thread has violated the mutex * ordering rule. This indicates a coding error somewhere in * the ACPI subsystem code. */ for (i = mutex_id; i < ACPI_NUM_MUTEX; i++) { if (acpi_gbl_mutex_info[i].thread_id == acpi_os_get_thread_id()) { if (i == mutex_id) { continue; } ACPI_ERROR((AE_INFO, "Invalid release order: owns [%s], releasing [%s]", acpi_ut_get_mutex_name(i), acpi_ut_get_mutex_name(mutex_id))); return (AE_RELEASE_DEADLOCK); } } } #endif /* Mark unlocked FIRST */ acpi_gbl_mutex_info[mutex_id].thread_id = ACPI_MUTEX_NOT_ACQUIRED; acpi_os_release_mutex(acpi_gbl_mutex_info[mutex_id].mutex); return (AE_OK); }
29 14 1 1 1 1 154 156 156 19 153 5 2 5 154 153 1 152 1 1 1 1 10 10 10 5 5 5 5 10 1 1 1 1 2 1 1 1 2 2 5 5 5 5 5 5 5 2 2 2 2 5 8 8 8 3 3 3 8 153 153 152 154 153 688 320 154 154 687 7 7 7 1 1 1 1 1 1 1 1 8 9 9 7 7 7 7 7 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 // SPDX-License-Identifier: GPL-2.0-only /* * File: pn_dev.c * * Phonet network device * * Copyright (C) 2008 Nokia Corporation. * * Authors: Sakari Ailus <sakari.ailus@nokia.com> * Rémi Denis-Courmont */ #include <linux/kernel.h> #include <linux/net.h> #include <linux/slab.h> #include <linux/netdevice.h> #include <linux/phonet.h> #include <linux/proc_fs.h> #include <linux/if_arp.h> #include <net/sock.h> #include <net/netns/generic.h> #include <net/phonet/pn_dev.h> struct phonet_routes { spinlock_t lock; struct net_device __rcu *table[64]; }; struct phonet_net { struct phonet_device_list pndevs; struct phonet_routes routes; }; static unsigned int phonet_net_id __read_mostly; static struct phonet_net *phonet_pernet(struct net *net) { return net_generic(net, phonet_net_id); } struct phonet_device_list *phonet_device_list(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); return &pnn->pndevs; } /* Allocate new Phonet device. */ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); if (pnd == NULL) return NULL; pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); lockdep_assert_held(&pndevs->lock); list_add_rcu(&pnd->list, &pndevs->list); return pnd; } static struct phonet_device *__phonet_get(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; lockdep_assert_held(&pndevs->lock); list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; } return NULL; } static struct phonet_device *__phonet_get_rcu(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; list_for_each_entry_rcu(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; } return NULL; } static void phonet_device_destroy(struct net_device *dev) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; ASSERT_RTNL(); spin_lock(&pndevs->lock); pnd = __phonet_get(dev); if (pnd) list_del_rcu(&pnd->list); spin_unlock(&pndevs->lock); if (pnd) { struct net *net = dev_net(dev); u32 ifindex = dev->ifindex; u8 addr; for_each_set_bit(addr, pnd->addrs, 64) phonet_address_notify(net, RTM_DELADDR, ifindex, addr); kfree(pnd); } } struct net_device *phonet_device_get(struct net *net) { struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; struct net_device *dev = NULL; rcu_read_lock(); list_for_each_entry_rcu(pnd, &pndevs->list, list) { dev = pnd->netdev; BUG_ON(!dev); if ((dev->reg_state == NETREG_REGISTERED) && ((pnd->netdev->flags & IFF_UP)) == IFF_UP) break; dev = NULL; } dev_hold(dev); rcu_read_unlock(); return dev; } int phonet_address_add(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; int err = 0; spin_lock(&pndevs->lock); /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) pnd = __phonet_device_alloc(dev); if (unlikely(pnd == NULL)) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; spin_unlock(&pndevs->lock); return err; } int phonet_address_del(struct net_device *dev, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; int err = 0; spin_lock(&pndevs->lock); pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) { err = -EADDRNOTAVAIL; pnd = NULL; } else if (bitmap_empty(pnd->addrs, 64)) list_del_rcu(&pnd->list); else pnd = NULL; spin_unlock(&pndevs->lock); if (pnd) kfree_rcu(pnd, rcu); return err; } /* Gets a source address toward a destination, through a interface. */ u8 phonet_address_get(struct net_device *dev, u8 daddr) { struct phonet_device *pnd; u8 saddr; rcu_read_lock(); pnd = __phonet_get_rcu(dev); if (pnd) { BUG_ON(bitmap_empty(pnd->addrs, 64)); /* Use same source address as destination, if possible */ if (test_bit(daddr >> 2, pnd->addrs)) saddr = daddr; else saddr = find_first_bit(pnd->addrs, 64) << 2; } else saddr = PN_NO_ADDR; rcu_read_unlock(); if (saddr == PN_NO_ADDR) { /* Fallback to another device */ struct net_device *def_dev; def_dev = phonet_device_get(dev_net(dev)); if (def_dev) { if (def_dev != dev) saddr = phonet_address_get(def_dev, daddr); dev_put(def_dev); } } return saddr; } int phonet_address_lookup(struct net *net, u8 addr) { struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; int err = -EADDRNOTAVAIL; rcu_read_lock(); list_for_each_entry_rcu(pnd, &pndevs->list, list) { /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) continue; if (test_bit(addr >> 2, pnd->addrs)) { err = 0; goto found; } } found: rcu_read_unlock(); return err; } /* automatically configure a Phonet device, if supported */ static int phonet_device_autoconf(struct net_device *dev) { struct if_phonet_req req; int ret; if (!dev->netdev_ops->ndo_siocdevprivate) return -EOPNOTSUPP; ret = dev->netdev_ops->ndo_siocdevprivate(dev, (struct ifreq *)&req, NULL, SIOCPNGAUTOCONF); if (ret < 0) return ret; ASSERT_RTNL(); ret = phonet_address_add(dev, req.ifr_phonet_autoconf.device); if (ret) return ret; phonet_address_notify(dev_net(dev), RTM_NEWADDR, dev->ifindex, req.ifr_phonet_autoconf.device); return 0; } static void phonet_route_autodel(struct net_device *dev) { struct net *net = dev_net(dev); DECLARE_BITMAP(deleted, 64); u32 ifindex = dev->ifindex; struct phonet_net *pnn; unsigned int i; pnn = phonet_pernet(net); /* Remove left-over Phonet routes */ bitmap_zero(deleted, 64); spin_lock(&pnn->routes.lock); for (i = 0; i < 64; i++) { if (rcu_access_pointer(pnn->routes.table[i]) == dev) { RCU_INIT_POINTER(pnn->routes.table[i], NULL); set_bit(i, deleted); } } spin_unlock(&pnn->routes.lock); if (bitmap_empty(deleted, 64)) return; /* short-circuit RCU */ synchronize_rcu(); for_each_set_bit(i, deleted, 64) { rtm_phonet_notify(net, RTM_DELROUTE, ifindex, i); dev_put(dev); } } /* notify Phonet of device events */ static int phonet_device_notify(struct notifier_block *me, unsigned long what, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (what) { case NETDEV_REGISTER: if (dev->type == ARPHRD_PHONET) phonet_device_autoconf(dev); break; case NETDEV_UNREGISTER: phonet_device_destroy(dev); phonet_route_autodel(dev); break; } return 0; } static struct notifier_block phonet_device_notifier = { .notifier_call = phonet_device_notify, .priority = 0, }; /* Per-namespace Phonet devices handling */ static int __net_init phonet_init_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); if (!proc_create_net("phonet", 0, net->proc_net, &pn_sock_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; INIT_LIST_HEAD(&pnn->pndevs.list); spin_lock_init(&pnn->pndevs.lock); spin_lock_init(&pnn->routes.lock); return 0; } static void __net_exit phonet_exit_net(struct net *net) { struct phonet_net *pnn = phonet_pernet(net); remove_proc_entry("phonet", net->proc_net); WARN_ON_ONCE(!list_empty(&pnn->pndevs.list)); } static struct pernet_operations phonet_net_ops = { .init = phonet_init_net, .exit = phonet_exit_net, .id = &phonet_net_id, .size = sizeof(struct phonet_net), }; /* Initialize Phonet devices list */ int __init phonet_device_init(void) { int err = register_pernet_subsys(&phonet_net_ops); if (err) return err; proc_create_net("pnresource", 0, init_net.proc_net, &pn_res_seq_ops, sizeof(struct seq_net_private)); register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); if (err) phonet_device_exit(); return err; } void phonet_device_exit(void) { rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); unregister_pernet_subsys(&phonet_net_ops); remove_proc_entry("pnresource", init_net.proc_net); } int phonet_route_add(struct net_device *dev, u8 daddr) { struct phonet_net *pnn = phonet_pernet(dev_net(dev)); struct phonet_routes *routes = &pnn->routes; int err = -EEXIST; daddr = daddr >> 2; spin_lock(&routes->lock); if (routes->table[daddr] == NULL) { rcu_assign_pointer(routes->table[daddr], dev); dev_hold(dev); err = 0; } spin_unlock(&routes->lock); return err; } int phonet_route_del(struct net_device *dev, u8 daddr) { struct phonet_net *pnn = phonet_pernet(dev_net(dev)); struct phonet_routes *routes = &pnn->routes; daddr = daddr >> 2; spin_lock(&routes->lock); if (rcu_access_pointer(routes->table[daddr]) == dev) RCU_INIT_POINTER(routes->table[daddr], NULL); else dev = NULL; spin_unlock(&routes->lock); if (!dev) return -ENOENT; /* Note : our caller must call synchronize_rcu() and dev_put(dev) */ return 0; } struct net_device *phonet_route_get_rcu(struct net *net, u8 daddr) { struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; daddr >>= 2; dev = rcu_dereference(routes->table[daddr]); return dev; } struct net_device *phonet_route_output(struct net *net, u8 daddr) { struct phonet_net *pnn = phonet_pernet(net); struct phonet_routes *routes = &pnn->routes; struct net_device *dev; daddr >>= 2; rcu_read_lock(); dev = rcu_dereference(routes->table[daddr]); dev_hold(dev); rcu_read_unlock(); if (!dev) dev = phonet_device_get(net); /* Default route */ return dev; }
46 45 46 46 46 19 19 19 19 19 19 16 3 2 2 4 18 8 2 6 6 2 4 6 8 1 8 1 8 50 49 47 3 3 2 4 4 47 47 1 1 1 1 46 53 53 53 52 53 52 52 50 52 52 2 51 5 47 50 3 6 7 5 20 20 17 20 20 20 19 1 20 19 19 19 19 19 18 18 19 20 20 20 20 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/init.h> #include <linux/errno.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/io_uring.h> #include <linux/io_uring_types.h> #include <asm/shmparam.h> #include "memmap.h" #include "kbuf.h" #include "rsrc.h" static void *io_mem_alloc_compound(struct page **pages, int nr_pages, size_t size, gfp_t gfp) { struct page *page; int i, order; order = get_order(size); if (order > MAX_PAGE_ORDER) return ERR_PTR(-ENOMEM); else if (order) gfp |= __GFP_COMP; page = alloc_pages(gfp, order); if (!page) return ERR_PTR(-ENOMEM); for (i = 0; i < nr_pages; i++) pages[i] = page + i; return page_address(page); } struct page **io_pin_pages(unsigned long uaddr, unsigned long len, int *npages) { unsigned long start, end, nr_pages; struct page **pages; int ret; if (check_add_overflow(uaddr, len, &end)) return ERR_PTR(-EOVERFLOW); if (check_add_overflow(end, PAGE_SIZE - 1, &end)) return ERR_PTR(-EOVERFLOW); end = end >> PAGE_SHIFT; start = uaddr >> PAGE_SHIFT; nr_pages = end - start; if (WARN_ON_ONCE(!nr_pages)) return ERR_PTR(-EINVAL); if (WARN_ON_ONCE(nr_pages > INT_MAX)) return ERR_PTR(-EOVERFLOW); pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); if (!pages) return ERR_PTR(-ENOMEM); ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM, pages); /* success, mapped all pages */ if (ret == nr_pages) { *npages = nr_pages; return pages; } /* partial map, or didn't map anything */ if (ret >= 0) { /* if we did partial map, release any pages we did get */ if (ret) unpin_user_pages(pages, ret); ret = -EFAULT; } kvfree(pages); return ERR_PTR(ret); } enum { /* memory was vmap'ed for the kernel, freeing the region vunmap's it */ IO_REGION_F_VMAP = 1, /* memory is provided by user and pinned by the kernel */ IO_REGION_F_USER_PROVIDED = 2, /* only the first page in the array is ref'ed */ IO_REGION_F_SINGLE_REF = 4, }; void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr) { if (mr->pages) { long nr_refs = mr->nr_pages; if (mr->flags & IO_REGION_F_SINGLE_REF) nr_refs = 1; if (mr->flags & IO_REGION_F_USER_PROVIDED) unpin_user_pages(mr->pages, nr_refs); else release_pages(mr->pages, nr_refs); kvfree(mr->pages); } if ((mr->flags & IO_REGION_F_VMAP) && mr->ptr) vunmap(mr->ptr); if (mr->nr_pages && ctx->user) __io_unaccount_mem(ctx->user, mr->nr_pages); memset(mr, 0, sizeof(*mr)); } static int io_region_init_ptr(struct io_mapped_region *mr) { struct io_imu_folio_data ifd; void *ptr; if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) { if (ifd.nr_folios == 1) { mr->ptr = page_address(mr->pages[0]); return 0; } } ptr = vmap(mr->pages, mr->nr_pages, VM_MAP, PAGE_KERNEL); if (!ptr) return -ENOMEM; mr->ptr = ptr; mr->flags |= IO_REGION_F_VMAP; return 0; } static int io_region_pin_pages(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg) { unsigned long size = mr->nr_pages << PAGE_SHIFT; struct page **pages; int nr_pages; pages = io_pin_pages(reg->user_addr, size, &nr_pages); if (IS_ERR(pages)) return PTR_ERR(pages); if (WARN_ON_ONCE(nr_pages != mr->nr_pages)) return -EFAULT; mr->pages = pages; mr->flags |= IO_REGION_F_USER_PROVIDED; return 0; } static int io_region_allocate_pages(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset) { gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN; unsigned long size = mr->nr_pages << PAGE_SHIFT; unsigned long nr_allocated; struct page **pages; void *p; pages = kvmalloc_array(mr->nr_pages, sizeof(*pages), gfp); if (!pages) return -ENOMEM; p = io_mem_alloc_compound(pages, mr->nr_pages, size, gfp); if (!IS_ERR(p)) { mr->flags |= IO_REGION_F_SINGLE_REF; goto done; } nr_allocated = alloc_pages_bulk_node(gfp, NUMA_NO_NODE, mr->nr_pages, pages); if (nr_allocated != mr->nr_pages) { if (nr_allocated) release_pages(pages, nr_allocated); kvfree(pages); return -ENOMEM; } done: reg->mmap_offset = mmap_offset; mr->pages = pages; return 0; } int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset) { int nr_pages, ret; u64 end; if (WARN_ON_ONCE(mr->pages || mr->ptr || mr->nr_pages)) return -EFAULT; if (memchr_inv(&reg->__resv, 0, sizeof(reg->__resv))) return -EINVAL; if (reg->flags & ~IORING_MEM_REGION_TYPE_USER) return -EINVAL; /* user_addr should be set IFF it's a user memory backed region */ if ((reg->flags & IORING_MEM_REGION_TYPE_USER) != !!reg->user_addr) return -EFAULT; if (!reg->size || reg->mmap_offset || reg->id) return -EINVAL; if ((reg->size >> PAGE_SHIFT) > INT_MAX) return -E2BIG; if ((reg->user_addr | reg->size) & ~PAGE_MASK) return -EINVAL; if (check_add_overflow(reg->user_addr, reg->size, &end)) return -EOVERFLOW; nr_pages = reg->size >> PAGE_SHIFT; if (ctx->user) { ret = __io_account_mem(ctx->user, nr_pages); if (ret) return ret; } mr->nr_pages = nr_pages; if (reg->flags & IORING_MEM_REGION_TYPE_USER) ret = io_region_pin_pages(ctx, mr, reg); else ret = io_region_allocate_pages(ctx, mr, reg, mmap_offset); if (ret) goto out_free; ret = io_region_init_ptr(mr); if (ret) goto out_free; return 0; out_free: io_free_region(ctx, mr); return ret; } int io_create_region_mmap_safe(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset) { struct io_mapped_region tmp_mr; int ret; memcpy(&tmp_mr, mr, sizeof(tmp_mr)); ret = io_create_region(ctx, &tmp_mr, reg, mmap_offset); if (ret) return ret; /* * Once published mmap can find it without holding only the ->mmap_lock * and not ->uring_lock. */ guard(mutex)(&ctx->mmap_lock); memcpy(mr, &tmp_mr, sizeof(tmp_mr)); return 0; } static struct io_mapped_region *io_mmap_get_region(struct io_ring_ctx *ctx, loff_t pgoff) { loff_t offset = pgoff << PAGE_SHIFT; unsigned int bgid; switch (offset & IORING_OFF_MMAP_MASK) { case IORING_OFF_SQ_RING: case IORING_OFF_CQ_RING: return &ctx->ring_region; case IORING_OFF_SQES: return &ctx->sq_region; case IORING_OFF_PBUF_RING: bgid = (offset & ~IORING_OFF_MMAP_MASK) >> IORING_OFF_PBUF_SHIFT; return io_pbuf_get_region(ctx, bgid); case IORING_MAP_OFF_PARAM_REGION: return &ctx->param_region; case IORING_MAP_OFF_ZCRX_REGION: return &ctx->zcrx_region; } return NULL; } static void *io_region_validate_mmap(struct io_ring_ctx *ctx, struct io_mapped_region *mr) { lockdep_assert_held(&ctx->mmap_lock); if (!io_region_is_set(mr)) return ERR_PTR(-EINVAL); if (mr->flags & IO_REGION_F_USER_PROVIDED) return ERR_PTR(-EINVAL); return io_region_get_ptr(mr); } static void *io_uring_validate_mmap_request(struct file *file, loff_t pgoff, size_t sz) { struct io_ring_ctx *ctx = file->private_data; struct io_mapped_region *region; region = io_mmap_get_region(ctx, pgoff); if (!region) return ERR_PTR(-EINVAL); return io_region_validate_mmap(ctx, region); } #ifdef CONFIG_MMU static int io_region_mmap(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct vm_area_struct *vma, unsigned max_pages) { unsigned long nr_pages = min(mr->nr_pages, max_pages); vm_flags_set(vma, VM_DONTEXPAND); return vm_insert_pages(vma, vma->vm_start, mr->pages, &nr_pages); } __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) { struct io_ring_ctx *ctx = file->private_data; size_t sz = vma->vm_end - vma->vm_start; long offset = vma->vm_pgoff << PAGE_SHIFT; unsigned int page_limit = UINT_MAX; struct io_mapped_region *region; void *ptr; guard(mutex)(&ctx->mmap_lock); ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); if (IS_ERR(ptr)) return PTR_ERR(ptr); switch (offset & IORING_OFF_MMAP_MASK) { case IORING_OFF_SQ_RING: case IORING_OFF_CQ_RING: page_limit = (sz + PAGE_SIZE - 1) >> PAGE_SHIFT; break; } region = io_mmap_get_region(ctx, vma->vm_pgoff); return io_region_mmap(ctx, region, vma, page_limit); } unsigned long io_uring_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct io_ring_ctx *ctx = filp->private_data; void *ptr; /* * Do not allow to map to user-provided address to avoid breaking the * aliasing rules. Userspace is not able to guess the offset address of * kernel kmalloc()ed memory area. */ if (addr) return -EINVAL; guard(mutex)(&ctx->mmap_lock); ptr = io_uring_validate_mmap_request(filp, pgoff, len); if (IS_ERR(ptr)) return -ENOMEM; /* * Some architectures have strong cache aliasing requirements. * For such architectures we need a coherent mapping which aliases * kernel memory *and* userspace memory. To achieve that: * - use a NULL file pointer to reference physical memory, and * - use the kernel virtual address of the shared io_uring context * (instead of the userspace-provided address, which has to be 0UL * anyway). * - use the same pgoff which the get_unmapped_area() uses to * calculate the page colouring. * For architectures without such aliasing requirements, the * architecture will return any suitable mapping because addr is 0. */ filp = NULL; flags |= MAP_SHARED; pgoff = 0; /* has been translated to ptr above */ #ifdef SHM_COLOUR addr = (uintptr_t) ptr; pgoff = addr >> PAGE_SHIFT; #else addr = 0UL; #endif return mm_get_unmapped_area(current->mm, filp, addr, len, pgoff, flags); } #else /* !CONFIG_MMU */ int io_uring_mmap(struct file *file, struct vm_area_struct *vma) { return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -EINVAL; } unsigned int io_uring_nommu_mmap_capabilities(struct file *file) { return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; } unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct io_ring_ctx *ctx = file->private_data; void *ptr; guard(mutex)(&ctx->mmap_lock); ptr = io_uring_validate_mmap_request(file, pgoff, len); if (IS_ERR(ptr)) return PTR_ERR(ptr); return (unsigned long) ptr; } #endif /* !CONFIG_MMU */
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 // SPDX-License-Identifier: GPL-2.0 /* * consolidates trace point definitions * * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> */ #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> #include <linux/inet.h> #include <linux/interrupt.h> #include <linux/export.h> #include <linux/netpoll.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/rcupdate.h> #include <linux/types.h> #include <linux/workqueue.h> #include <linux/netlink.h> #include <linux/net_dropmon.h> #include <linux/slab.h> #include <linux/unaligned.h> #include <asm/bitops.h> #define CREATE_TRACE_POINTS #include <trace/events/skb.h> #include <trace/events/net.h> #include <trace/events/napi.h> #include <trace/events/sock.h> #include <trace/events/udp.h> #include <trace/events/tcp.h> #include <trace/events/fib.h> #include <trace/events/qdisc.h> #if IS_ENABLED(CONFIG_BRIDGE) #include <trace/events/bridge.h> EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full); #endif #if IS_ENABLED(CONFIG_PAGE_POOL) #include <trace/events/page_pool.h> #endif #include <trace/events/neigh.h> EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_timer_handler); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_done); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_event_send_dead); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_cleanup_and_release); EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum); EXPORT_TRACEPOINT_SYMBOL_GPL(udp_fail_queue_rcv_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
50 20 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 #ifndef IO_URING_MEMMAP_H #define IO_URING_MEMMAP_H #define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL #define IORING_MAP_OFF_ZCRX_REGION 0x30000000ULL struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); #ifndef CONFIG_MMU unsigned int io_uring_nommu_mmap_capabilities(struct file *file); #endif unsigned long io_uring_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); int io_uring_mmap(struct file *file, struct vm_area_struct *vma); void io_free_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr); int io_create_region(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset); int io_create_region_mmap_safe(struct io_ring_ctx *ctx, struct io_mapped_region *mr, struct io_uring_region_desc *reg, unsigned long mmap_offset); static inline void *io_region_get_ptr(struct io_mapped_region *mr) { return mr->ptr; } static inline bool io_region_is_set(struct io_mapped_region *mr) { return !!mr->nr_pages; } #endif
3 3 3 3 3 3 2 3 3 2 3 3 3 3 3 3 2 2 2 3 3 3 3 3 3 1 1 1 3 1 3 2 3 3 3 2 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 // SPDX-License-Identifier: GPL-2.0+ OR BSD-2-Clause /* * Streebog hash function as specified by GOST R 34.11-2012 and * described at https://tools.ietf.org/html/rfc6986 * * Copyright (c) 2013 Alexey Degtyarev <alexey@renatasystems.org> * Copyright (c) 2018 Vitaly Chikunov <vt@altlinux.org> * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. */ #include <crypto/internal/hash.h> #include <linux/module.h> #include <linux/crypto.h> #include <crypto/streebog.h> static const struct streebog_uint512 buffer0 = { { 0, 0, 0, 0, 0, 0, 0, 0 } }; static const struct streebog_uint512 buffer512 = { { cpu_to_le64(0x200), 0, 0, 0, 0, 0, 0, 0 } }; static const struct streebog_uint512 C[12] = { { { cpu_to_le64(0xdd806559f2a64507ULL), cpu_to_le64(0x05767436cc744d23ULL), cpu_to_le64(0xa2422a08a460d315ULL), cpu_to_le64(0x4b7ce09192676901ULL), cpu_to_le64(0x714eb88d7585c4fcULL), cpu_to_le64(0x2f6a76432e45d016ULL), cpu_to_le64(0xebcb2f81c0657c1fULL), cpu_to_le64(0xb1085bda1ecadae9ULL) } }, { { cpu_to_le64(0xe679047021b19bb7ULL), cpu_to_le64(0x55dda21bd7cbcd56ULL), cpu_to_le64(0x5cb561c2db0aa7caULL), cpu_to_le64(0x9ab5176b12d69958ULL), cpu_to_le64(0x61d55e0f16b50131ULL), cpu_to_le64(0xf3feea720a232b98ULL), cpu_to_le64(0x4fe39d460f70b5d7ULL), cpu_to_le64(0x6fa3b58aa99d2f1aULL) } }, { { cpu_to_le64(0x991e96f50aba0ab2ULL), cpu_to_le64(0xc2b6f443867adb31ULL), cpu_to_le64(0xc1c93a376062db09ULL), cpu_to_le64(0xd3e20fe490359eb1ULL), cpu_to_le64(0xf2ea7514b1297b7bULL), cpu_to_le64(0x06f15e5f529c1f8bULL), cpu_to_le64(0x0a39fc286a3d8435ULL), cpu_to_le64(0xf574dcac2bce2fc7ULL) } }, { { cpu_to_le64(0x220cbebc84e3d12eULL), cpu_to_le64(0x3453eaa193e837f1ULL), cpu_to_le64(0xd8b71333935203beULL), cpu_to_le64(0xa9d72c82ed03d675ULL), cpu_to_le64(0x9d721cad685e353fULL), cpu_to_le64(0x488e857e335c3c7dULL), cpu_to_le64(0xf948e1a05d71e4ddULL), cpu_to_le64(0xef1fdfb3e81566d2ULL) } }, { { cpu_to_le64(0x601758fd7c6cfe57ULL), cpu_to_le64(0x7a56a27ea9ea63f5ULL), cpu_to_le64(0xdfff00b723271a16ULL), cpu_to_le64(0xbfcd1747253af5a3ULL), cpu_to_le64(0x359e35d7800fffbdULL), cpu_to_le64(0x7f151c1f1686104aULL), cpu_to_le64(0x9a3f410c6ca92363ULL), cpu_to_le64(0x4bea6bacad474799ULL) } }, { { cpu_to_le64(0xfa68407a46647d6eULL), cpu_to_le64(0xbf71c57236904f35ULL), cpu_to_le64(0x0af21f66c2bec6b6ULL), cpu_to_le64(0xcffaa6b71c9ab7b4ULL), cpu_to_le64(0x187f9ab49af08ec6ULL), cpu_to_le64(0x2d66c4f95142a46cULL), cpu_to_le64(0x6fa4c33b7a3039c0ULL), cpu_to_le64(0xae4faeae1d3ad3d9ULL) } }, { { cpu_to_le64(0x8886564d3a14d493ULL), cpu_to_le64(0x3517454ca23c4af3ULL), cpu_to_le64(0x06476983284a0504ULL), cpu_to_le64(0x0992abc52d822c37ULL), cpu_to_le64(0xd3473e33197a93c9ULL), cpu_to_le64(0x399ec6c7e6bf87c9ULL), cpu_to_le64(0x51ac86febf240954ULL), cpu_to_le64(0xf4c70e16eeaac5ecULL) } }, { { cpu_to_le64(0xa47f0dd4bf02e71eULL), cpu_to_le64(0x36acc2355951a8d9ULL), cpu_to_le64(0x69d18d2bd1a5c42fULL), cpu_to_le64(0xf4892bcb929b0690ULL), cpu_to_le64(0x89b4443b4ddbc49aULL), cpu_to_le64(0x4eb7f8719c36de1eULL), cpu_to_le64(0x03e7aa020c6e4141ULL), cpu_to_le64(0x9b1f5b424d93c9a7ULL) } }, { { cpu_to_le64(0x7261445183235adbULL), cpu_to_le64(0x0e38dc92cb1f2a60ULL), cpu_to_le64(0x7b2b8a9aa6079c54ULL), cpu_to_le64(0x800a440bdbb2ceb1ULL), cpu_to_le64(0x3cd955b7e00d0984ULL), cpu_to_le64(0x3a7d3a1b25894224ULL), cpu_to_le64(0x944c9ad8ec165fdeULL), cpu_to_le64(0x378f5a541631229bULL) } }, { { cpu_to_le64(0x74b4c7fb98459cedULL), cpu_to_le64(0x3698fad1153bb6c3ULL), cpu_to_le64(0x7a1e6c303b7652f4ULL), cpu_to_le64(0x9fe76702af69334bULL), cpu_to_le64(0x1fffe18a1b336103ULL), cpu_to_le64(0x8941e71cff8a78dbULL), cpu_to_le64(0x382ae548b2e4f3f3ULL), cpu_to_le64(0xabbedea680056f52ULL) } }, { { cpu_to_le64(0x6bcaa4cd81f32d1bULL), cpu_to_le64(0xdea2594ac06fd85dULL), cpu_to_le64(0xefbacd1d7d476e98ULL), cpu_to_le64(0x8a1d71efea48b9caULL), cpu_to_le64(0x2001802114846679ULL), cpu_to_le64(0xd8fa6bbbebab0761ULL), cpu_to_le64(0x3002c6cd635afe94ULL), cpu_to_le64(0x7bcd9ed0efc889fbULL) } }, { { cpu_to_le64(0x48bc924af11bd720ULL), cpu_to_le64(0xfaf417d5d9b21b99ULL), cpu_to_le64(0xe71da4aa88e12852ULL), cpu_to_le64(0x5d80ef9d1891cc86ULL), cpu_to_le64(0xf82012d430219f9bULL), cpu_to_le64(0xcda43c32bcdf1d77ULL), cpu_to_le64(0xd21380b00449b17aULL), cpu_to_le64(0x378ee767f11631baULL) } } }; static const unsigned long long Ax[8][256] = { { 0xd01f715b5c7ef8e6ULL, 0x16fa240980778325ULL, 0xa8a42e857ee049c8ULL, 0x6ac1068fa186465bULL, 0x6e417bd7a2e9320bULL, 0x665c8167a437daabULL, 0x7666681aa89617f6ULL, 0x4b959163700bdcf5ULL, 0xf14be6b78df36248ULL, 0xc585bd689a625cffULL, 0x9557d7fca67d82cbULL, 0x89f0b969af6dd366ULL, 0xb0833d48749f6c35ULL, 0xa1998c23b1ecbc7cULL, 0x8d70c431ac02a736ULL, 0xd6dfbc2fd0a8b69eULL, 0x37aeb3e551fa198bULL, 0x0b7d128a40b5cf9cULL, 0x5a8f2008b5780cbcULL, 0xedec882284e333e5ULL, 0xd25fc177d3c7c2ceULL, 0x5e0f5d50b61778ecULL, 0x1d873683c0c24cb9ULL, 0xad040bcbb45d208cULL, 0x2f89a0285b853c76ULL, 0x5732fff6791b8d58ULL, 0x3e9311439ef6ec3fULL, 0xc9183a809fd3c00fULL, 0x83adf3f5260a01eeULL, 0xa6791941f4e8ef10ULL, 0x103ae97d0ca1cd5dULL, 0x2ce948121dee1b4aULL, 0x39738421dbf2bf53ULL, 0x093da2a6cf0cf5b4ULL, 0xcd9847d89cbcb45fULL, 0xf9561c078b2d8ae8ULL, 0x9c6a755a6971777fULL, 0xbc1ebaa0712ef0c5ULL, 0x72e61542abf963a6ULL, 0x78bb5fde229eb12eULL, 0x14ba94250fceb90dULL, 0x844d6697630e5282ULL, 0x98ea08026a1e032fULL, 0xf06bbea144217f5cULL, 0xdb6263d11ccb377aULL, 0x641c314b2b8ee083ULL, 0x320e96ab9b4770cfULL, 0x1ee7deb986a96b85ULL, 0xe96cf57a878c47b5ULL, 0xfdd6615f8842feb8ULL, 0xc83862965601dd1bULL, 0x2ea9f83e92572162ULL, 0xf876441142ff97fcULL, 0xeb2c455608357d9dULL, 0x5612a7e0b0c9904cULL, 0x6c01cbfb2d500823ULL, 0x4548a6a7fa037a2dULL, 0xabc4c6bf388b6ef4ULL, 0xbade77d4fdf8bebdULL, 0x799b07c8eb4cac3aULL, 0x0c9d87e805b19cf0ULL, 0xcb588aac106afa27ULL, 0xea0c1d40c1e76089ULL, 0x2869354a1e816f1aULL, 0xff96d17307fbc490ULL, 0x9f0a9d602f1a5043ULL, 0x96373fc6e016a5f7ULL, 0x5292dab8b3a6e41cULL, 0x9b8ae0382c752413ULL, 0x4f15ec3b7364a8a5ULL, 0x3fb349555724f12bULL, 0xc7c50d4415db66d7ULL, 0x92b7429ee379d1a7ULL, 0xd37f99611a15dfdaULL, 0x231427c05e34a086ULL, 0xa439a96d7b51d538ULL, 0xb403401077f01865ULL, 0xdda2aea5901d7902ULL, 0x0a5d4a9c8967d288ULL, 0xc265280adf660f93ULL, 0x8bb0094520d4e94eULL, 0x2a29856691385532ULL, 0x42a833c5bf072941ULL, 0x73c64d54622b7eb2ULL, 0x07e095624504536cULL, 0x8a905153e906f45aULL, 0x6f6123c16b3b2f1fULL, 0xc6e55552dc097bc3ULL, 0x4468feb133d16739ULL, 0xe211e7f0c7398829ULL, 0xa2f96419f7879b40ULL, 0x19074bdbc3ad38e9ULL, 0xf4ebc3f9474e0b0cULL, 0x43886bd376d53455ULL, 0xd8028beb5aa01046ULL, 0x51f23282f5cdc320ULL, 0xe7b1c2be0d84e16dULL, 0x081dfab006dee8a0ULL, 0x3b33340d544b857bULL, 0x7f5bcabc679ae242ULL, 0x0edd37c48a08a6d8ULL, 0x81ed43d9a9b33bc6ULL, 0xb1a3655ebd4d7121ULL, 0x69a1eeb5e7ed6167ULL, 0xf6ab73d5c8f73124ULL, 0x1a67a3e185c61fd5ULL, 0x2dc91004d43c065eULL, 0x0240b02c8fb93a28ULL, 0x90f7f2b26cc0eb8fULL, 0x3cd3a16f114fd617ULL, 0xaae49ea9f15973e0ULL, 0x06c0cd748cd64e78ULL, 0xda423bc7d5192a6eULL, 0xc345701c16b41287ULL, 0x6d2193ede4821537ULL, 0xfcf639494190e3acULL, 0x7c3b228621f1c57eULL, 0xfb16ac2b0494b0c0ULL, 0xbf7e529a3745d7f9ULL, 0x6881b6a32e3f7c73ULL, 0xca78d2bad9b8e733ULL, 0xbbfe2fc2342aa3a9ULL, 0x0dbddffecc6381e4ULL, 0x70a6a56e2440598eULL, 0xe4d12a844befc651ULL, 0x8c509c2765d0ba22ULL, 0xee8c6018c28814d9ULL, 0x17da7c1f49a59e31ULL, 0x609c4c1328e194d3ULL, 0xb3e3d57232f44b09ULL, 0x91d7aaa4a512f69bULL, 0x0ffd6fd243dabbccULL, 0x50d26a943c1fde34ULL, 0x6be15e9968545b4fULL, 0x94778fea6faf9fdfULL, 0x2b09dd7058ea4826ULL, 0x677cd9716de5c7bfULL, 0x49d5214fffb2e6ddULL, 0x0360e83a466b273cULL, 0x1fc786af4f7b7691ULL, 0xa0b9d435783ea168ULL, 0xd49f0c035f118cb6ULL, 0x01205816c9d21d14ULL, 0xac2453dd7d8f3d98ULL, 0x545217cc3f70aa64ULL, 0x26b4028e9489c9c2ULL, 0xdec2469fd6765e3eULL, 0x04807d58036f7450ULL, 0xe5f17292823ddb45ULL, 0xf30b569b024a5860ULL, 0x62dcfc3fa758aefbULL, 0xe84cad6c4e5e5aa1ULL, 0xccb81fce556ea94bULL, 0x53b282ae7a74f908ULL, 0x1b47fbf74c1402c1ULL, 0x368eebf39828049fULL, 0x7afbeff2ad278b06ULL, 0xbe5e0a8cfe97caedULL, 0xcfd8f7f413058e77ULL, 0xf78b2bc301252c30ULL, 0x4d555c17fcdd928dULL, 0x5f2f05467fc565f8ULL, 0x24f4b2a21b30f3eaULL, 0x860dd6bbecb768aaULL, 0x4c750401350f8f99ULL, 0x0000000000000000ULL, 0xecccd0344d312ef1ULL, 0xb5231806be220571ULL, 0xc105c030990d28afULL, 0x653c695de25cfd97ULL, 0x159acc33c61ca419ULL, 0xb89ec7f872418495ULL, 0xa9847693b73254dcULL, 0x58cf90243ac13694ULL, 0x59efc832f3132b80ULL, 0x5c4fed7c39ae42c4ULL, 0x828dabe3efd81cfaULL, 0xd13f294d95ace5f2ULL, 0x7d1b7a90e823d86aULL, 0xb643f03cf849224dULL, 0x3df3f979d89dcb03ULL, 0x7426d836272f2ddeULL, 0xdfe21e891fa4432aULL, 0x3a136c1b9d99986fULL, 0xfa36f43dcd46add4ULL, 0xc025982650df35bbULL, 0x856d3e81aadc4f96ULL, 0xc4a5e57e53b041ebULL, 0x4708168b75ba4005ULL, 0xaf44bbe73be41aa4ULL, 0x971767d029c4b8e3ULL, 0xb9be9feebb939981ULL, 0x215497ecd18d9aaeULL, 0x316e7e91dd2c57f3ULL, 0xcef8afe2dad79363ULL, 0x3853dc371220a247ULL, 0x35ee03c9de4323a3ULL, 0xe6919aa8c456fc79ULL, 0xe05157dc4880b201ULL, 0x7bdbb7e464f59612ULL, 0x127a59518318f775ULL, 0x332ecebd52956ddbULL, 0x8f30741d23bb9d1eULL, 0xd922d3fd93720d52ULL, 0x7746300c61440ae2ULL, 0x25d4eab4d2e2eefeULL, 0x75068020eefd30caULL, 0x135a01474acaea61ULL, 0x304e268714fe4ae7ULL, 0xa519f17bb283c82cULL, 0xdc82f6b359cf6416ULL, 0x5baf781e7caa11a8ULL, 0xb2c38d64fb26561dULL, 0x34ce5bdf17913eb7ULL, 0x5d6fb56af07c5fd0ULL, 0x182713cd0a7f25fdULL, 0x9e2ac576e6c84d57ULL, 0x9aaab82ee5a73907ULL, 0xa3d93c0f3e558654ULL, 0x7e7b92aaae48ff56ULL, 0x872d8ead256575beULL, 0x41c8dbfff96c0e7dULL, 0x99ca5014a3cc1e3bULL, 0x40e883e930be1369ULL, 0x1ca76e95091051adULL, 0x4e35b42dbab6b5b1ULL, 0x05a0254ecabd6944ULL, 0xe1710fca8152af15ULL, 0xf22b0e8dcb984574ULL, 0xb763a82a319b3f59ULL, 0x63fca4296e8ab3efULL, 0x9d4a2d4ca0a36a6bULL, 0xe331bfe60eeb953dULL, 0xd5bf541596c391a2ULL, 0xf5cb9bef8e9c1618ULL, 0x46284e9dbc685d11ULL, 0x2074cffa185f87baULL, 0xbd3ee2b6b8fcedd1ULL, 0xae64e3f1f23607b0ULL, 0xfeb68965ce29d984ULL, 0x55724fdaf6a2b770ULL, 0x29496d5cd753720eULL, 0xa75941573d3af204ULL, 0x8e102c0bea69800aULL, 0x111ab16bc573d049ULL, 0xd7ffe439197aab8aULL, 0xefac380e0b5a09cdULL, 0x48f579593660fbc9ULL, 0x22347fd697e6bd92ULL, 0x61bc1405e13389c7ULL, 0x4ab5c975b9d9c1e1ULL, 0x80cd1bcf606126d2ULL, 0x7186fd78ed92449aULL, 0x93971a882aabccb3ULL, 0x88d0e17f66bfce72ULL, 0x27945a985d5bd4d6ULL }, { 0xde553f8c05a811c8ULL, 0x1906b59631b4f565ULL, 0x436e70d6b1964ff7ULL, 0x36d343cb8b1e9d85ULL, 0x843dfacc858aab5aULL, 0xfdfc95c299bfc7f9ULL, 0x0f634bdea1d51fa2ULL, 0x6d458b3b76efb3cdULL, 0x85c3f77cf8593f80ULL, 0x3c91315fbe737cb2ULL, 0x2148b03366ace398ULL, 0x18f8b8264c6761bfULL, 0xc830c1c495c9fb0fULL, 0x981a76102086a0aaULL, 0xaa16012142f35760ULL, 0x35cc54060c763cf6ULL, 0x42907d66cc45db2dULL, 0x8203d44b965af4bcULL, 0x3d6f3cefc3a0e868ULL, 0xbc73ff69d292bda7ULL, 0x8722ed0102e20a29ULL, 0x8f8185e8cd34deb7ULL, 0x9b0561dda7ee01d9ULL, 0x5335a0193227fad6ULL, 0xc9cecc74e81a6fd5ULL, 0x54f5832e5c2431eaULL, 0x99e47ba05d553470ULL, 0xf7bee756acd226ceULL, 0x384e05a5571816fdULL, 0xd1367452a47d0e6aULL, 0xf29fde1c386ad85bULL, 0x320c77316275f7caULL, 0xd0c879e2d9ae9ab0ULL, 0xdb7406c69110ef5dULL, 0x45505e51a2461011ULL, 0xfc029872e46c5323ULL, 0xfa3cb6f5f7bc0cc5ULL, 0x031f17cd8768a173ULL, 0xbd8df2d9af41297dULL, 0x9d3b4f5ab43e5e3fULL, 0x4071671b36feee84ULL, 0x716207e7d3e3b83dULL, 0x48d20ff2f9283a1aULL, 0x27769eb4757cbc7eULL, 0x5c56ebc793f2e574ULL, 0xa48b474f9ef5dc18ULL, 0x52cbada94ff46e0cULL, 0x60c7da982d8199c6ULL, 0x0e9d466edc068b78ULL, 0x4eec2175eaf865fcULL, 0x550b8e9e21f7a530ULL, 0x6b7ba5bc653fec2bULL, 0x5eb7f1ba6949d0ddULL, 0x57ea94e3db4c9099ULL, 0xf640eae6d101b214ULL, 0xdd4a284182c0b0bbULL, 0xff1d8fbf6304f250ULL, 0xb8accb933bf9d7e8ULL, 0xe8867c478eb68c4dULL, 0x3f8e2692391bddc1ULL, 0xcb2fd60912a15a7cULL, 0xaec935dbab983d2fULL, 0xf55ffd2b56691367ULL, 0x80e2ce366ce1c115ULL, 0x179bf3f8edb27e1dULL, 0x01fe0db07dd394daULL, 0xda8a0b76ecc37b87ULL, 0x44ae53e1df9584cbULL, 0xb310b4b77347a205ULL, 0xdfab323c787b8512ULL, 0x3b511268d070b78eULL, 0x65e6e3d2b9396753ULL, 0x6864b271e2574d58ULL, 0x259784c98fc789d7ULL, 0x02e11a7dfabb35a9ULL, 0x8841a6dfa337158bULL, 0x7ade78c39b5dcdd0ULL, 0xb7cf804d9a2cc84aULL, 0x20b6bd831b7f7742ULL, 0x75bd331d3a88d272ULL, 0x418f6aab4b2d7a5eULL, 0xd9951cbb6babdaf4ULL, 0xb6318dfde7ff5c90ULL, 0x1f389b112264aa83ULL, 0x492c024284fbaec0ULL, 0xe33a0363c608f9a0ULL, 0x2688930408af28a4ULL, 0xc7538a1a341ce4adULL, 0x5da8e677ee2171aeULL, 0x8c9e92254a5c7fc4ULL, 0x63d8cd55aae938b5ULL, 0x29ebd8daa97a3706ULL, 0x959827b37be88aa1ULL, 0x1484e4356adadf6eULL, 0xa7945082199d7d6bULL, 0xbf6ce8a455fa1cd4ULL, 0x9cc542eac9edcae5ULL, 0x79c16f0e1c356ca3ULL, 0x89bfab6fdee48151ULL, 0xd4174d1830c5f0ffULL, 0x9258048415eb419dULL, 0x6139d72850520d1cULL, 0x6a85a80c18ec78f1ULL, 0xcd11f88e0171059aULL, 0xcceff53e7ca29140ULL, 0xd229639f2315af19ULL, 0x90b91ef9ef507434ULL, 0x5977d28d074a1be1ULL, 0x311360fce51d56b9ULL, 0xc093a92d5a1f2f91ULL, 0x1a19a25bb6dc5416ULL, 0xeb996b8a09de2d3eULL, 0xfee3820f1ed7668aULL, 0xd7085ad5b7ad518cULL, 0x7fff41890fe53345ULL, 0xec5948bd67dde602ULL, 0x2fd5f65dbaaa68e0ULL, 0xa5754affe32648c2ULL, 0xf8ddac880d07396cULL, 0x6fa491468c548664ULL, 0x0c7c5c1326bdbed1ULL, 0x4a33158f03930fb3ULL, 0x699abfc19f84d982ULL, 0xe4fa2054a80b329cULL, 0x6707f9af438252faULL, 0x08a368e9cfd6d49eULL, 0x47b1442c58fd25b8ULL, 0xbbb3dc5ebc91769bULL, 0x1665fe489061eac7ULL, 0x33f27a811fa66310ULL, 0x93a609346838d547ULL, 0x30ed6d4c98cec263ULL, 0x1dd9816cd8df9f2aULL, 0x94662a03063b1e7bULL, 0x83fdd9fbeb896066ULL, 0x7b207573e68e590aULL, 0x5f49fc0a149a4407ULL, 0x343259b671a5a82cULL, 0xfbc2bb458a6f981fULL, 0xc272b350a0a41a38ULL, 0x3aaf1fd8ada32354ULL, 0x6cbb868b0b3c2717ULL, 0xa2b569c88d2583feULL, 0xf180c9d1bf027928ULL, 0xaf37386bd64ba9f5ULL, 0x12bacab2790a8088ULL, 0x4c0d3b0810435055ULL, 0xb2eeb9070e9436dfULL, 0xc5b29067cea7d104ULL, 0xdcb425f1ff132461ULL, 0x4f122cc5972bf126ULL, 0xac282fa651230886ULL, 0xe7e537992f6393efULL, 0xe61b3a2952b00735ULL, 0x709c0a57ae302ce7ULL, 0xe02514ae416058d3ULL, 0xc44c9dd7b37445deULL, 0x5a68c5408022ba92ULL, 0x1c278cdca50c0bf0ULL, 0x6e5a9cf6f18712beULL, 0x86dce0b17f319ef3ULL, 0x2d34ec2040115d49ULL, 0x4bcd183f7e409b69ULL, 0x2815d56ad4a9a3dcULL, 0x24698979f2141d0dULL, 0x0000000000000000ULL, 0x1ec696a15fb73e59ULL, 0xd86b110b16784e2eULL, 0x8e7f8858b0e74a6dULL, 0x063e2e8713d05fe6ULL, 0xe2c40ed3bbdb6d7aULL, 0xb1f1aeca89fc97acULL, 0xe1db191e3cb3cc09ULL, 0x6418ee62c4eaf389ULL, 0xc6ad87aa49cf7077ULL, 0xd6f65765ca7ec556ULL, 0x9afb6c6dda3d9503ULL, 0x7ce05644888d9236ULL, 0x8d609f95378feb1eULL, 0x23a9aa4e9c17d631ULL, 0x6226c0e5d73aac6fULL, 0x56149953a69f0443ULL, 0xeeb852c09d66d3abULL, 0x2b0ac2a753c102afULL, 0x07c023376e03cb3cULL, 0x2ccae1903dc2c993ULL, 0xd3d76e2f5ec63bc3ULL, 0x9e2458973356ff4cULL, 0xa66a5d32644ee9b1ULL, 0x0a427294356de137ULL, 0x783f62be61e6f879ULL, 0x1344c70204d91452ULL, 0x5b96c8f0fdf12e48ULL, 0xa90916ecc59bf613ULL, 0xbe92e5142829880eULL, 0x727d102a548b194eULL, 0x1be7afebcb0fc0ccULL, 0x3e702b2244c8491bULL, 0xd5e940a84d166425ULL, 0x66f9f41f3e51c620ULL, 0xabe80c913f20c3baULL, 0xf07ec461c2d1edf2ULL, 0xf361d3ac45b94c81ULL, 0x0521394a94b8fe95ULL, 0xadd622162cf09c5cULL, 0xe97871f7f3651897ULL, 0xf4a1f09b2bba87bdULL, 0x095d6559b2054044ULL, 0x0bbc7f2448be75edULL, 0x2af4cf172e129675ULL, 0x157ae98517094bb4ULL, 0x9fda55274e856b96ULL, 0x914713499283e0eeULL, 0xb952c623462a4332ULL, 0x74433ead475b46a8ULL, 0x8b5eb112245fb4f8ULL, 0xa34b6478f0f61724ULL, 0x11a5dd7ffe6221fbULL, 0xc16da49d27ccbb4bULL, 0x76a224d0bde07301ULL, 0x8aa0bca2598c2022ULL, 0x4df336b86d90c48fULL, 0xea67663a740db9e4ULL, 0xef465f70e0b54771ULL, 0x39b008152acb8227ULL, 0x7d1e5bf4f55e06ecULL, 0x105bd0cf83b1b521ULL, 0x775c2960c033e7dbULL, 0x7e014c397236a79fULL, 0x811cc386113255cfULL, 0xeda7450d1a0e72d8ULL, 0x5889df3d7a998f3bULL, 0x2e2bfbedc779fc3aULL, 0xce0eef438619a4e9ULL, 0x372d4e7bf6cd095fULL, 0x04df34fae96b6a4fULL, 0xf923a13870d4adb6ULL, 0xa1aa7e050a4d228dULL, 0xa8f71b5cb84862c9ULL, 0xb52e9a306097fde3ULL, 0x0d8251a35b6e2a0bULL, 0x2257a7fee1c442ebULL, 0x73831d9a29588d94ULL, 0x51d4ba64c89ccf7fULL, 0x502ab7d4b54f5ba5ULL, 0x97793dce8153bf08ULL, 0xe5042de4d5d8a646ULL, 0x9687307efc802bd2ULL, 0xa05473b5779eb657ULL, 0xb4d097801d446939ULL, 0xcff0e2f3fbca3033ULL, 0xc38cbee0dd778ee2ULL, 0x464f499c252eb162ULL, 0xcad1dbb96f72cea6ULL, 0xba4dd1eec142e241ULL, 0xb00fa37af42f0376ULL }, { 0xcce4cd3aa968b245ULL, 0x089d5484e80b7fafULL, 0x638246c1b3548304ULL, 0xd2fe0ec8c2355492ULL, 0xa7fbdf7ff2374eeeULL, 0x4df1600c92337a16ULL, 0x84e503ea523b12fbULL, 0x0790bbfd53ab0c4aULL, 0x198a780f38f6ea9dULL, 0x2ab30c8f55ec48cbULL, 0xe0f7fed6b2c49db5ULL, 0xb6ecf3f422cadbdcULL, 0x409c9a541358df11ULL, 0xd3ce8a56dfde3fe3ULL, 0xc3e9224312c8c1a0ULL, 0x0d6dfa58816ba507ULL, 0xddf3e1b179952777ULL, 0x04c02a42748bb1d9ULL, 0x94c2abff9f2decb8ULL, 0x4f91752da8f8acf4ULL, 0x78682befb169bf7bULL, 0xe1c77a48af2ff6c4ULL, 0x0c5d7ec69c80ce76ULL, 0x4cc1e4928fd81167ULL, 0xfeed3d24d9997b62ULL, 0x518bb6dfc3a54a23ULL, 0x6dbf2d26151f9b90ULL, 0xb5bc624b05ea664fULL, 0xe86aaa525acfe21aULL, 0x4801ced0fb53a0beULL, 0xc91463e6c00868edULL, 0x1027a815cd16fe43ULL, 0xf67069a0319204cdULL, 0xb04ccc976c8abce7ULL, 0xc0b9b3fc35e87c33ULL, 0xf380c77c58f2de65ULL, 0x50bb3241de4e2152ULL, 0xdf93f490435ef195ULL, 0xf1e0d25d62390887ULL, 0xaf668bfb1a3c3141ULL, 0xbc11b251f00a7291ULL, 0x73a5eed47e427d47ULL, 0x25bee3f6ee4c3b2eULL, 0x43cc0beb34786282ULL, 0xc824e778dde3039cULL, 0xf97d86d98a327728ULL, 0xf2b043e24519b514ULL, 0xe297ebf7880f4b57ULL, 0x3a94a49a98fab688ULL, 0x868516cb68f0c419ULL, 0xeffa11af0964ee50ULL, 0xa4ab4ec0d517f37dULL, 0xa9c6b498547c567aULL, 0x8e18424f80fbbbb6ULL, 0x0bcdc53bcf2bc23cULL, 0x137739aaea3643d0ULL, 0x2c1333ec1bac2ff0ULL, 0x8d48d3f0a7db0625ULL, 0x1e1ac3f26b5de6d7ULL, 0xf520f81f16b2b95eULL, 0x9f0f6ec450062e84ULL, 0x0130849e1deb6b71ULL, 0xd45e31ab8c7533a9ULL, 0x652279a2fd14e43fULL, 0x3209f01e70f1c927ULL, 0xbe71a770cac1a473ULL, 0x0e3d6be7a64b1894ULL, 0x7ec8148cff29d840ULL, 0xcb7476c7fac3be0fULL, 0x72956a4a63a91636ULL, 0x37f95ec21991138fULL, 0x9e3fea5a4ded45f5ULL, 0x7b38ba50964902e8ULL, 0x222e580bbde73764ULL, 0x61e253e0899f55e6ULL, 0xfc8d2805e352ad80ULL, 0x35994be3235ac56dULL, 0x09add01af5e014deULL, 0x5e8659a6780539c6ULL, 0xb17c48097161d796ULL, 0x026015213acbd6e2ULL, 0xd1ae9f77e515e901ULL, 0xb7dc776a3f21b0adULL, 0xaba6a1b96eb78098ULL, 0x9bcf4486248d9f5dULL, 0x582666c536455efdULL, 0xfdbdac9bfeb9c6f1ULL, 0xc47999be4163cdeaULL, 0x765540081722a7efULL, 0x3e548ed8ec710751ULL, 0x3d041f67cb51bac2ULL, 0x7958af71ac82d40aULL, 0x36c9da5c047a78feULL, 0xed9a048e33af38b2ULL, 0x26ee7249c96c86bdULL, 0x900281bdeba65d61ULL, 0x11172c8bd0fd9532ULL, 0xea0abf73600434f8ULL, 0x42fc8f75299309f3ULL, 0x34a9cf7d3eb1ae1cULL, 0x2b838811480723baULL, 0x5ce64c8742ceef24ULL, 0x1adae9b01fd6570eULL, 0x3c349bf9d6bad1b3ULL, 0x82453c891c7b75c0ULL, 0x97923a40b80d512bULL, 0x4a61dbf1c198765cULL, 0xb48ce6d518010d3eULL, 0xcfb45c858e480fd6ULL, 0xd933cbf30d1e96aeULL, 0xd70ea014ab558e3aULL, 0xc189376228031742ULL, 0x9262949cd16d8b83ULL, 0xeb3a3bed7def5f89ULL, 0x49314a4ee6b8cbcfULL, 0xdcc3652f647e4c06ULL, 0xda635a4c2a3e2b3dULL, 0x470c21a940f3d35bULL, 0x315961a157d174b4ULL, 0x6672e81dda3459acULL, 0x5b76f77a1165e36eULL, 0x445cb01667d36ec8ULL, 0xc5491d205c88a69bULL, 0x456c34887a3805b9ULL, 0xffddb9bac4721013ULL, 0x99af51a71e4649bfULL, 0xa15be01cbc7729d5ULL, 0x52db2760e485f7b0ULL, 0x8c78576eba306d54ULL, 0xae560f6507d75a30ULL, 0x95f22f6182c687c9ULL, 0x71c5fbf54489aba5ULL, 0xca44f259e728d57eULL, 0x88b87d2ccebbdc8dULL, 0xbab18d32be4a15aaULL, 0x8be8ec93e99b611eULL, 0x17b713e89ebdf209ULL, 0xb31c5d284baa0174ULL, 0xeeca9531148f8521ULL, 0xb8d198138481c348ULL, 0x8988f9b2d350b7fcULL, 0xb9e11c8d996aa839ULL, 0x5a4673e40c8e881fULL, 0x1687977683569978ULL, 0xbf4123eed72acf02ULL, 0x4ea1f1b3b513c785ULL, 0xe767452be16f91ffULL, 0x7505d1b730021a7cULL, 0xa59bca5ec8fc980cULL, 0xad069eda20f7e7a3ULL, 0x38f4b1bba231606aULL, 0x60d2d77e94743e97ULL, 0x9affc0183966f42cULL, 0x248e6768f3a7505fULL, 0xcdd449a4b483d934ULL, 0x87b59255751baf68ULL, 0x1bea6d2e023d3c7fULL, 0x6b1f12455b5ffcabULL, 0x743555292de9710dULL, 0xd8034f6d10f5fddfULL, 0xc6198c9f7ba81b08ULL, 0xbb8109aca3a17edbULL, 0xfa2d1766ad12cabbULL, 0xc729080166437079ULL, 0x9c5fff7b77269317ULL, 0x0000000000000000ULL, 0x15d706c9a47624ebULL, 0x6fdf38072fd44d72ULL, 0x5fb6dd3865ee52b7ULL, 0xa33bf53d86bcff37ULL, 0xe657c1b5fc84fa8eULL, 0xaa962527735cebe9ULL, 0x39c43525bfda0b1bULL, 0x204e4d2a872ce186ULL, 0x7a083ece8ba26999ULL, 0x554b9c9db72efbfaULL, 0xb22cd9b656416a05ULL, 0x96a2bedea5e63a5aULL, 0x802529a826b0a322ULL, 0x8115ad363b5bc853ULL, 0x8375b81701901eb1ULL, 0x3069e53f4a3a1fc5ULL, 0xbd2136cfede119e0ULL, 0x18bafc91251d81ecULL, 0x1d4a524d4c7d5b44ULL, 0x05f0aedc6960daa8ULL, 0x29e39d3072ccf558ULL, 0x70f57f6b5962c0d4ULL, 0x989fd53903ad22ceULL, 0xf84d024797d91c59ULL, 0x547b1803aac5908bULL, 0xf0d056c37fd263f6ULL, 0xd56eb535919e58d8ULL, 0x1c7ad6d351963035ULL, 0x2e7326cd2167f912ULL, 0xac361a443d1c8cd2ULL, 0x697f076461942a49ULL, 0x4b515f6fdc731d2dULL, 0x8ad8680df4700a6fULL, 0x41ac1eca0eb3b460ULL, 0x7d988533d80965d3ULL, 0xa8f6300649973d0bULL, 0x7765c4960ac9cc9eULL, 0x7ca801adc5e20ea2ULL, 0xdea3700e5eb59ae4ULL, 0xa06b6482a19c42a4ULL, 0x6a2f96db46b497daULL, 0x27def6d7d487edccULL, 0x463ca5375d18b82aULL, 0xa6cb5be1efdc259fULL, 0x53eba3fef96e9cc1ULL, 0xce84d81b93a364a7ULL, 0xf4107c810b59d22fULL, 0x333974806d1aa256ULL, 0x0f0def79bba073e5ULL, 0x231edc95a00c5c15ULL, 0xe437d494c64f2c6cULL, 0x91320523f64d3610ULL, 0x67426c83c7df32ddULL, 0x6eefbc99323f2603ULL, 0x9d6f7be56acdf866ULL, 0x5916e25b2bae358cULL, 0x7ff89012e2c2b331ULL, 0x035091bf2720bd93ULL, 0x561b0d22900e4669ULL, 0x28d319ae6f279e29ULL, 0x2f43a2533c8c9263ULL, 0xd09e1be9f8fe8270ULL, 0xf740ed3e2c796fbcULL, 0xdb53ded237d5404cULL, 0x62b2c25faebfe875ULL, 0x0afd41a5d2c0a94dULL, 0x6412fd3ce0ff8f4eULL, 0xe3a76f6995e42026ULL, 0x6c8fa9b808f4f0e1ULL, 0xc2d9a6dd0f23aad1ULL, 0x8f28c6d19d10d0c7ULL, 0x85d587744fd0798aULL, 0xa20b71a39b579446ULL, 0x684f83fa7c7f4138ULL, 0xe507500adba4471dULL, 0x3f640a46f19a6c20ULL, 0x1247bd34f7dd28a1ULL, 0x2d23b77206474481ULL, 0x93521002cc86e0f2ULL, 0x572b89bc8de52d18ULL, 0xfb1d93f8b0f9a1caULL, 0xe95a2ecc4724896bULL, 0x3ba420048511ddf9ULL, 0xd63e248ab6bee54bULL, 0x5dd6c8195f258455ULL, 0x06a03f634e40673bULL, 0x1f2a476c76b68da6ULL, 0x217ec9b49ac78af7ULL, 0xecaa80102e4453c3ULL, 0x14e78257b99d4f9aULL }, { 0x20329b2cc87bba05ULL, 0x4f5eb6f86546a531ULL, 0xd4f44775f751b6b1ULL, 0x8266a47b850dfa8bULL, 0xbb986aa15a6ca985ULL, 0xc979eb08f9ae0f99ULL, 0x2da6f447a2375ea1ULL, 0x1e74275dcd7d8576ULL, 0xbc20180a800bc5f8ULL, 0xb4a2f701b2dc65beULL, 0xe726946f981b6d66ULL, 0x48e6c453bf21c94cULL, 0x42cad9930f0a4195ULL, 0xefa47b64aacccd20ULL, 0x71180a8960409a42ULL, 0x8bb3329bf6a44e0cULL, 0xd34c35de2d36daccULL, 0xa92f5b7cbc23dc96ULL, 0xb31a85aa68bb09c3ULL, 0x13e04836a73161d2ULL, 0xb24dfc4129c51d02ULL, 0x8ae44b70b7da5acdULL, 0xe671ed84d96579a7ULL, 0xa4bb3417d66f3832ULL, 0x4572ab38d56d2de8ULL, 0xb1b47761ea47215cULL, 0xe81c09cf70aba15dULL, 0xffbdb872ce7f90acULL, 0xa8782297fd5dc857ULL, 0x0d946f6b6a4ce4a4ULL, 0xe4df1f4f5b995138ULL, 0x9ebc71edca8c5762ULL, 0x0a2c1dc0b02b88d9ULL, 0x3b503c115d9d7b91ULL, 0xc64376a8111ec3a2ULL, 0xcec199a323c963e4ULL, 0xdc76a87ec58616f7ULL, 0x09d596e073a9b487ULL, 0x14583a9d7d560dafULL, 0xf4c6dc593f2a0cb4ULL, 0xdd21d19584f80236ULL, 0x4a4836983ddde1d3ULL, 0xe58866a41ae745f9ULL, 0xf591a5b27e541875ULL, 0x891dc05074586693ULL, 0x5b068c651810a89eULL, 0xa30346bc0c08544fULL, 0x3dbf3751c684032dULL, 0x2a1e86ec785032dcULL, 0xf73f5779fca830eaULL, 0xb60c05ca30204d21ULL, 0x0cc316802b32f065ULL, 0x8770241bdd96be69ULL, 0xb861e18199ee95dbULL, 0xf805cad91418fcd1ULL, 0x29e70dccbbd20e82ULL, 0xc7140f435060d763ULL, 0x0f3a9da0e8b0cc3bULL, 0xa2543f574d76408eULL, 0xbd7761e1c175d139ULL, 0x4b1f4f737ca3f512ULL, 0x6dc2df1f2fc137abULL, 0xf1d05c3967b14856ULL, 0xa742bf3715ed046cULL, 0x654030141d1697edULL, 0x07b872abda676c7dULL, 0x3ce84eba87fa17ecULL, 0xc1fb0403cb79afdfULL, 0x3e46bc7105063f73ULL, 0x278ae987121cd678ULL, 0xa1adb4778ef47cd0ULL, 0x26dd906c5362c2b9ULL, 0x05168060589b44e2ULL, 0xfbfc41f9d79ac08fULL, 0x0e6de44ba9ced8faULL, 0x9feb08068bf243a3ULL, 0x7b341749d06b129bULL, 0x229c69e74a87929aULL, 0xe09ee6c4427c011bULL, 0x5692e30e725c4c3aULL, 0xda99a33e5e9f6e4bULL, 0x353dd85af453a36bULL, 0x25241b4c90e0fee7ULL, 0x5de987258309d022ULL, 0xe230140fc0802984ULL, 0x93281e86a0c0b3c6ULL, 0xf229d719a4337408ULL, 0x6f6c2dd4ad3d1f34ULL, 0x8ea5b2fbae3f0aeeULL, 0x8331dd90c473ee4aULL, 0x346aa1b1b52db7aaULL, 0xdf8f235e06042aa9ULL, 0xcc6f6b68a1354b7bULL, 0x6c95a6f46ebf236aULL, 0x52d31a856bb91c19ULL, 0x1a35ded6d498d555ULL, 0xf37eaef2e54d60c9ULL, 0x72e181a9a3c2a61cULL, 0x98537aad51952fdeULL, 0x16f6c856ffaa2530ULL, 0xd960281e9d1d5215ULL, 0x3a0745fa1ce36f50ULL, 0x0b7b642bf1559c18ULL, 0x59a87eae9aec8001ULL, 0x5e100c05408bec7cULL, 0x0441f98b19e55023ULL, 0xd70dcc5534d38aefULL, 0x927f676de1bea707ULL, 0x9769e70db925e3e5ULL, 0x7a636ea29115065aULL, 0x468b201816ef11b6ULL, 0xab81a9b73edff409ULL, 0xc0ac7de88a07bb1eULL, 0x1f235eb68c0391b7ULL, 0x6056b074458dd30fULL, 0xbe8eeac102f7ed67ULL, 0xcd381283e04b5fbaULL, 0x5cbefecec277c4e3ULL, 0xd21b4c356c48ce0dULL, 0x1019c31664b35d8cULL, 0x247362a7d19eea26ULL, 0xebe582efb3299d03ULL, 0x02aef2cb82fc289fULL, 0x86275df09ce8aaa8ULL, 0x28b07427faac1a43ULL, 0x38a9b7319e1f47cfULL, 0xc82e92e3b8d01b58ULL, 0x06ef0b409b1978bcULL, 0x62f842bfc771fb90ULL, 0x9904034610eb3b1fULL, 0xded85ab5477a3e68ULL, 0x90d195a663428f98ULL, 0x5384636e2ac708d8ULL, 0xcbd719c37b522706ULL, 0xae9729d76644b0ebULL, 0x7c8c65e20a0c7ee6ULL, 0x80c856b007f1d214ULL, 0x8c0b40302cc32271ULL, 0xdbcedad51fe17a8aULL, 0x740e8ae938dbdea0ULL, 0xa615c6dc549310adULL, 0x19cc55f6171ae90bULL, 0x49b1bdb8fe5fdd8dULL, 0xed0a89af2830e5bfULL, 0x6a7aadb4f5a65bd6ULL, 0x7e22972988f05679ULL, 0xf952b3325566e810ULL, 0x39fecedadf61530eULL, 0x6101c99f04f3c7ceULL, 0x2e5f7f6761b562ffULL, 0xf08725d226cf5c97ULL, 0x63af3b54860fef51ULL, 0x8ff2cb10ef411e2fULL, 0x884ab9bb35267252ULL, 0x4df04433e7ba8daeULL, 0x9afd8866d3690741ULL, 0x66b9bb34de94abb3ULL, 0x9baaf18d92171380ULL, 0x543c11c5f0a064a5ULL, 0x17a1b1bdbed431f1ULL, 0xb5f58eeaf3a2717fULL, 0xc355f6c849858740ULL, 0xec5df044694ef17eULL, 0xd83751f5dc6346d4ULL, 0xfc4433520dfdacf2ULL, 0x0000000000000000ULL, 0x5a51f58e596ebc5fULL, 0x3285aaf12e34cf16ULL, 0x8d5c39db6dbd36b0ULL, 0x12b731dde64f7513ULL, 0x94906c2d7aa7dfbbULL, 0x302b583aacc8e789ULL, 0x9d45facd090e6b3cULL, 0x2165e2c78905aec4ULL, 0x68d45f7f775a7349ULL, 0x189b2c1d5664fdcaULL, 0xe1c99f2f030215daULL, 0x6983269436246788ULL, 0x8489af3b1e148237ULL, 0xe94b702431d5b59cULL, 0x33d2d31a6f4adbd7ULL, 0xbfd9932a4389f9a6ULL, 0xb0e30e8aab39359dULL, 0xd1e2c715afcaf253ULL, 0x150f43763c28196eULL, 0xc4ed846393e2eb3dULL, 0x03f98b20c3823c5eULL, 0xfd134ab94c83b833ULL, 0x556b682eb1de7064ULL, 0x36c4537a37d19f35ULL, 0x7559f30279a5ca61ULL, 0x799ae58252973a04ULL, 0x9c12832648707ffdULL, 0x78cd9c6913e92ec5ULL, 0x1d8dac7d0effb928ULL, 0x439da0784e745554ULL, 0x413352b3cc887dcbULL, 0xbacf134a1b12bd44ULL, 0x114ebafd25cd494dULL, 0x2f08068c20cb763eULL, 0x76a07822ba27f63fULL, 0xeab2fb04f25789c2ULL, 0xe3676de481fe3d45ULL, 0x1b62a73d95e6c194ULL, 0x641749ff5c68832cULL, 0xa5ec4dfc97112cf3ULL, 0xf6682e92bdd6242bULL, 0x3f11c59a44782bb2ULL, 0x317c21d1edb6f348ULL, 0xd65ab5be75ad9e2eULL, 0x6b2dd45fb4d84f17ULL, 0xfaab381296e4d44eULL, 0xd0b5befeeeb4e692ULL, 0x0882ef0b32d7a046ULL, 0x512a91a5a83b2047ULL, 0x963e9ee6f85bf724ULL, 0x4e09cf132438b1f0ULL, 0x77f701c9fb59e2feULL, 0x7ddb1c094b726a27ULL, 0x5f4775ee01f5f8bdULL, 0x9186ec4d223c9b59ULL, 0xfeeac1998f01846dULL, 0xac39db1ce4b89874ULL, 0xb75b7c21715e59e0ULL, 0xafc0503c273aa42aULL, 0x6e3b543fec430bf5ULL, 0x704f7362213e8e83ULL, 0x58ff0745db9294c0ULL, 0x67eec2df9feabf72ULL, 0xa0facd9ccf8a6811ULL, 0xb936986ad890811aULL, 0x95c715c63bd9cb7aULL, 0xca8060283a2c33c7ULL, 0x507de84ee9453486ULL, 0x85ded6d05f6a96f6ULL, 0x1cdad5964f81ade9ULL, 0xd5a33e9eb62fa270ULL, 0x40642b588df6690aULL, 0x7f75eec2c98e42b8ULL, 0x2cf18dace3494a60ULL, 0x23cb100c0bf9865bULL, 0xeef3028febb2d9e1ULL, 0x4425d2d394133929ULL, 0xaad6d05c7fa1e0c8ULL, 0xad6ea2f7a5c68cb5ULL, 0xc2028f2308fb9381ULL, 0x819f2f5b468fc6d5ULL, 0xc5bafd88d29cfffcULL, 0x47dc59f357910577ULL, 0x2b49ff07392e261dULL, 0x57c59ae5332258fbULL, 0x73b6f842e2bcb2ddULL, 0xcf96e04862b77725ULL, 0x4ca73dd8a6c4996fULL, 0x015779eb417e14c1ULL, 0x37932a9176af8bf4ULL }, { 0x190a2c9b249df23eULL, 0x2f62f8b62263e1e9ULL, 0x7a7f754740993655ULL, 0x330b7ba4d5564d9fULL, 0x4c17a16a46672582ULL, 0xb22f08eb7d05f5b8ULL, 0x535f47f40bc148ccULL, 0x3aec5d27d4883037ULL, 0x10ed0a1825438f96ULL, 0x516101f72c233d17ULL, 0x13cc6f949fd04eaeULL, 0x739853c441474bfdULL, 0x653793d90d3f5b1bULL, 0x5240647b96b0fc2fULL, 0x0c84890ad27623e0ULL, 0xd7189b32703aaea3ULL, 0x2685de3523bd9c41ULL, 0x99317c5b11bffefaULL, 0x0d9baa854f079703ULL, 0x70b93648fbd48ac5ULL, 0xa80441fce30bc6beULL, 0x7287704bdc36ff1eULL, 0xb65384ed33dc1f13ULL, 0xd36417343ee34408ULL, 0x39cd38ab6e1bf10fULL, 0x5ab861770a1f3564ULL, 0x0ebacf09f594563bULL, 0xd04572b884708530ULL, 0x3cae9722bdb3af47ULL, 0x4a556b6f2f5cbaf2ULL, 0xe1704f1f76c4bd74ULL, 0x5ec4ed7144c6dfcfULL, 0x16afc01d4c7810e6ULL, 0x283f113cd629ca7aULL, 0xaf59a8761741ed2dULL, 0xeed5a3991e215facULL, 0x3bf37ea849f984d4ULL, 0xe413e096a56ce33cULL, 0x2c439d3a98f020d1ULL, 0x637559dc6404c46bULL, 0x9e6c95d1e5f5d569ULL, 0x24bb9836045fe99aULL, 0x44efa466dac8ecc9ULL, 0xc6eab2a5c80895d6ULL, 0x803b50c035220cc4ULL, 0x0321658cba93c138ULL, 0x8f9ebc465dc7ee1cULL, 0xd15a5137190131d3ULL, 0x0fa5ec8668e5e2d8ULL, 0x91c979578d1037b1ULL, 0x0642ca05693b9f70ULL, 0xefca80168350eb4fULL, 0x38d21b24f36a45ecULL, 0xbeab81e1af73d658ULL, 0x8cbfd9cae7542f24ULL, 0xfd19cc0d81f11102ULL, 0x0ac6430fbb4dbc90ULL, 0x1d76a09d6a441895ULL, 0x2a01573ff1cbbfa1ULL, 0xb572e161894fde2bULL, 0x8124734fa853b827ULL, 0x614b1fdf43e6b1b0ULL, 0x68ac395c4238cc18ULL, 0x21d837bfd7f7b7d2ULL, 0x20c714304a860331ULL, 0x5cfaab726324aa14ULL, 0x74c5ba4eb50d606eULL, 0xf3a3030474654739ULL, 0x23e671bcf015c209ULL, 0x45f087e947b9582aULL, 0xd8bd77b418df4c7bULL, 0xe06f6c90ebb50997ULL, 0x0bd96080263c0873ULL, 0x7e03f9410e40dcfeULL, 0xb8e94be4c6484928ULL, 0xfb5b0608e8ca8e72ULL, 0x1a2b49179e0e3306ULL, 0x4e29e76961855059ULL, 0x4f36c4e6fcf4e4baULL, 0x49740ee395cf7bcaULL, 0xc2963ea386d17f7dULL, 0x90d65ad810618352ULL, 0x12d34c1b02a1fa4dULL, 0xfa44258775bb3a91ULL, 0x18150f14b9ec46ddULL, 0x1491861e6b9a653dULL, 0x9a1019d7ab2c3fc2ULL, 0x3668d42d06fe13d7ULL, 0xdcc1fbb25606a6d0ULL, 0x969490dd795a1c22ULL, 0x3549b1a1bc6dd2efULL, 0xc94f5e23a0ed770eULL, 0xb9f6686b5b39fdcbULL, 0xc4d4f4a6efeae00dULL, 0xe732851a1fff2204ULL, 0x94aad6de5eb869f9ULL, 0x3f8ff2ae07206e7fULL, 0xfe38a9813b62d03aULL, 0xa7a1ad7a8bee2466ULL, 0x7b6056c8dde882b6ULL, 0x302a1e286fc58ca7ULL, 0x8da0fa457a259bc7ULL, 0xb3302b64e074415bULL, 0x5402ae7eff8b635fULL, 0x08f8050c9cafc94bULL, 0xae468bf98a3059ceULL, 0x88c355cca98dc58fULL, 0xb10e6d67c7963480ULL, 0xbad70de7e1aa3cf3ULL, 0xbfb4a26e320262bbULL, 0xcb711820870f02d5ULL, 0xce12b7a954a75c9dULL, 0x563ce87dd8691684ULL, 0x9f73b65e7884618aULL, 0x2b1e74b06cba0b42ULL, 0x47cec1ea605b2df1ULL, 0x1c698312f735ac76ULL, 0x5fdbcefed9b76b2cULL, 0x831a354c8fb1cdfcULL, 0x820516c312c0791fULL, 0xb74ca762aeadabf0ULL, 0xfc06ef821c80a5e1ULL, 0x5723cbf24518a267ULL, 0x9d4df05d5f661451ULL, 0x588627742dfd40bfULL, 0xda8331b73f3d39a0ULL, 0x17b0e392d109a405ULL, 0xf965400bcf28fba9ULL, 0x7c3dbf4229a2a925ULL, 0x023e460327e275dbULL, 0x6cd0b55a0ce126b3ULL, 0xe62da695828e96e7ULL, 0x42ad6e63b3f373b9ULL, 0xe50cc319381d57dfULL, 0xc5cbd729729b54eeULL, 0x46d1e265fd2a9912ULL, 0x6428b056904eeff8ULL, 0x8be23040131e04b7ULL, 0x6709d5da2add2ec0ULL, 0x075de98af44a2b93ULL, 0x8447dcc67bfbe66fULL, 0x6616f655b7ac9a23ULL, 0xd607b8bded4b1a40ULL, 0x0563af89d3a85e48ULL, 0x3db1b4ad20c21ba4ULL, 0x11f22997b8323b75ULL, 0x292032b34b587e99ULL, 0x7f1cdace9331681dULL, 0x8e819fc9c0b65affULL, 0xa1e3677fe2d5bb16ULL, 0xcd33d225ee349da5ULL, 0xd9a2543b85aef898ULL, 0x795e10cbfa0af76dULL, 0x25a4bbb9992e5d79ULL, 0x78413344677b438eULL, 0xf0826688cef68601ULL, 0xd27b34bba392f0ebULL, 0x551d8df162fad7bcULL, 0x1e57c511d0d7d9adULL, 0xdeffbdb171e4d30bULL, 0xf4feea8e802f6caaULL, 0xa480c8f6317de55eULL, 0xa0fc44f07fa40ff5ULL, 0x95b5f551c3c9dd1aULL, 0x22f952336d6476eaULL, 0x0000000000000000ULL, 0xa6be8ef5169f9085ULL, 0xcc2cf1aa73452946ULL, 0x2e7ddb39bf12550aULL, 0xd526dd3157d8db78ULL, 0x486b2d6c08becf29ULL, 0x9b0f3a58365d8b21ULL, 0xac78cdfaadd22c15ULL, 0xbc95c7e28891a383ULL, 0x6a927f5f65dab9c3ULL, 0xc3891d2c1ba0cb9eULL, 0xeaa92f9f50f8b507ULL, 0xcf0d9426c9d6e87eULL, 0xca6e3baf1a7eb636ULL, 0xab25247059980786ULL, 0x69b31ad3df4978fbULL, 0xe2512a93cc577c4cULL, 0xff278a0ea61364d9ULL, 0x71a615c766a53e26ULL, 0x89dc764334fc716cULL, 0xf87a638452594f4aULL, 0xf2bc208be914f3daULL, 0x8766b94ac1682757ULL, 0xbbc82e687cdb8810ULL, 0x626a7a53f9757088ULL, 0xa2c202f358467a2eULL, 0x4d0882e5db169161ULL, 0x09e7268301de7da8ULL, 0xe897699c771ac0dcULL, 0xc8507dac3d9cc3edULL, 0xc0a878a0a1330aa6ULL, 0x978bb352e42ba8c1ULL, 0xe9884a13ea6b743fULL, 0x279afdbabecc28a2ULL, 0x047c8c064ed9eaabULL, 0x507e2278b15289f4ULL, 0x599904fbb08cf45cULL, 0xbd8ae46d15e01760ULL, 0x31353da7f2b43844ULL, 0x8558ff49e68a528cULL, 0x76fbfc4d92ef15b5ULL, 0x3456922e211c660cULL, 0x86799ac55c1993b4ULL, 0x3e90d1219a51da9cULL, 0x2d5cbeb505819432ULL, 0x982e5fd48cce4a19ULL, 0xdb9c1238a24c8d43ULL, 0xd439febecaa96f9bULL, 0x418c0bef0960b281ULL, 0x158ea591f6ebd1deULL, 0x1f48e69e4da66d4eULL, 0x8afd13cf8e6fb054ULL, 0xf5e1c9011d5ed849ULL, 0xe34e091c5126c8afULL, 0xad67ee7530a398f6ULL, 0x43b24dec2e82c75aULL, 0x75da99c1287cd48dULL, 0x92e81cdb3783f689ULL, 0xa3dd217cc537cecdULL, 0x60543c50de970553ULL, 0x93f73f54aaf2426aULL, 0xa91b62737e7a725dULL, 0xf19d4507538732e2ULL, 0x77e4dfc20f9ea156ULL, 0x7d229ccdb4d31dc6ULL, 0x1b346a98037f87e5ULL, 0xedf4c615a4b29e94ULL, 0x4093286094110662ULL, 0xb0114ee85ae78063ULL, 0x6ff1d0d6b672e78bULL, 0x6dcf96d591909250ULL, 0xdfe09e3eec9567e8ULL, 0x3214582b4827f97cULL, 0xb46dc2ee143e6ac8ULL, 0xf6c0ac8da7cd1971ULL, 0xebb60c10cd8901e4ULL, 0xf7df8f023abcad92ULL, 0x9c52d3d2c217a0b2ULL, 0x6b8d5cd0f8ab0d20ULL, 0x3777f7a29b8fa734ULL, 0x011f238f9d71b4e3ULL, 0xc1b75b2f3c42be45ULL, 0x5de588fdfe551ef7ULL, 0x6eeef3592b035368ULL, 0xaa3a07ffc4e9b365ULL, 0xecebe59a39c32a77ULL, 0x5ba742f8976e8187ULL, 0x4b4a48e0b22d0e11ULL, 0xddded83dcb771233ULL, 0xa59feb79ac0c51bdULL, 0xc7f5912a55792135ULL }, { 0x6d6ae04668a9b08aULL, 0x3ab3f04b0be8c743ULL, 0xe51e166b54b3c908ULL, 0xbe90a9eb35c2f139ULL, 0xb2c7066637f2bec1ULL, 0xaa6945613392202cULL, 0x9a28c36f3b5201ebULL, 0xddce5a93ab536994ULL, 0x0e34133ef6382827ULL, 0x52a02ba1ec55048bULL, 0xa2f88f97c4b2a177ULL, 0x8640e513ca2251a5ULL, 0xcdf1d36258137622ULL, 0xfe6cb708dedf8ddbULL, 0x8a174a9ec8121e5dULL, 0x679896036b81560eULL, 0x59ed033395795feeULL, 0x1dd778ab8b74edafULL, 0xee533ef92d9f926dULL, 0x2a8c79baf8a8d8f5ULL, 0x6bcf398e69b119f6ULL, 0xe20491742fafdd95ULL, 0x276488e0809c2aecULL, 0xea955b82d88f5cceULL, 0x7102c63a99d9e0c4ULL, 0xf9763017a5c39946ULL, 0x429fa2501f151b3dULL, 0x4659c72bea05d59eULL, 0x984b7fdccf5a6634ULL, 0xf742232953fbb161ULL, 0x3041860e08c021c7ULL, 0x747bfd9616cd9386ULL, 0x4bb1367192312787ULL, 0x1b72a1638a6c44d3ULL, 0x4a0e68a6e8359a66ULL, 0x169a5039f258b6caULL, 0xb98a2ef44edee5a4ULL, 0xd9083fe85e43a737ULL, 0x967f6ce239624e13ULL, 0x8874f62d3c1a7982ULL, 0x3c1629830af06e3fULL, 0x9165ebfd427e5a8eULL, 0xb5dd81794ceeaa5cULL, 0x0de8f15a7834f219ULL, 0x70bd98ede3dd5d25ULL, 0xaccc9ca9328a8950ULL, 0x56664eda1945ca28ULL, 0x221db34c0f8859aeULL, 0x26dbd637fa98970dULL, 0x1acdffb4f068f932ULL, 0x4585254f64090fa0ULL, 0x72de245e17d53afaULL, 0x1546b25d7c546cf4ULL, 0x207e0ffffb803e71ULL, 0xfaaad2732bcf4378ULL, 0xb462dfae36ea17bdULL, 0xcf926fd1ac1b11fdULL, 0xe0672dc7dba7ba4aULL, 0xd3fa49ad5d6b41b3ULL, 0x8ba81449b216a3bcULL, 0x14f9ec8a0650d115ULL, 0x40fc1ee3eb1d7ce2ULL, 0x23a2ed9b758ce44fULL, 0x782c521b14fddc7eULL, 0x1c68267cf170504eULL, 0xbcf31558c1ca96e6ULL, 0xa781b43b4ba6d235ULL, 0xf6fd7dfe29ff0c80ULL, 0xb0a4bad5c3fad91eULL, 0xd199f51ea963266cULL, 0x414340349119c103ULL, 0x5405f269ed4dadf7ULL, 0xabd61bb649969dcdULL, 0x6813dbeae7bdc3c8ULL, 0x65fb2ab09f8931d1ULL, 0xf1e7fae152e3181dULL, 0xc1a67cef5a2339daULL, 0x7a4feea8e0f5bba1ULL, 0x1e0b9acf05783791ULL, 0x5b8ebf8061713831ULL, 0x80e53cdbcb3af8d9ULL, 0x7e898bd315e57502ULL, 0xc6bcfbf0213f2d47ULL, 0x95a38e86b76e942dULL, 0x092e94218d243cbaULL, 0x8339debf453622e7ULL, 0xb11be402b9fe64ffULL, 0x57d9100d634177c9ULL, 0xcc4e8db52217cbc3ULL, 0x3b0cae9c71ec7aa2ULL, 0xfb158ca451cbfe99ULL, 0x2b33276d82ac6514ULL, 0x01bf5ed77a04bde1ULL, 0xc5601994af33f779ULL, 0x75c4a3416cc92e67ULL, 0xf3844652a6eb7fc2ULL, 0x3487e375fdd0ef64ULL, 0x18ae430704609eedULL, 0x4d14efb993298efbULL, 0x815a620cb13e4538ULL, 0x125c354207487869ULL, 0x9eeea614ce42cf48ULL, 0xce2d3106d61fac1cULL, 0xbbe99247bad6827bULL, 0x071a871f7b1c149dULL, 0x2e4a1cc10db81656ULL, 0x77a71ff298c149b8ULL, 0x06a5d9c80118a97cULL, 0xad73c27e488e34b1ULL, 0x443a7b981e0db241ULL, 0xe3bbcfa355ab6074ULL, 0x0af276450328e684ULL, 0x73617a896dd1871bULL, 0x58525de4ef7de20fULL, 0xb7be3dcab8e6cd83ULL, 0x19111dd07e64230cULL, 0x842359a03e2a367aULL, 0x103f89f1f3401fb6ULL, 0xdc710444d157d475ULL, 0xb835702334da5845ULL, 0x4320fc876511a6dcULL, 0xd026abc9d3679b8dULL, 0x17250eee885c0b2bULL, 0x90dab52a387ae76fULL, 0x31fed8d972c49c26ULL, 0x89cba8fa461ec463ULL, 0x2ff5421677bcabb7ULL, 0x396f122f85e41d7dULL, 0xa09b332430bac6a8ULL, 0xc888e8ced7070560ULL, 0xaeaf201ac682ee8fULL, 0x1180d7268944a257ULL, 0xf058a43628e7a5fcULL, 0xbd4c4b8fbbce2b07ULL, 0xa1246df34abe7b49ULL, 0x7d5569b79be9af3cULL, 0xa9b5a705bd9efa12ULL, 0xdb6b835baa4bc0e8ULL, 0x05793bac8f147342ULL, 0x21c1512881848390ULL, 0xfdb0556c50d357e5ULL, 0x613d4fcb6a99ff72ULL, 0x03dce2648e0cda3eULL, 0xe949b9e6568386f0ULL, 0xfc0f0bbb2ad7ea04ULL, 0x6a70675913b5a417ULL, 0x7f36d5046fe1c8e3ULL, 0x0c57af8d02304ff8ULL, 0x32223abdfcc84618ULL, 0x0891caf6f720815bULL, 0xa63eeaec31a26fd4ULL, 0x2507345374944d33ULL, 0x49d28ac266394058ULL, 0xf5219f9aa7f3d6beULL, 0x2d96fea583b4cc68ULL, 0x5a31e1571b7585d0ULL, 0x8ed12fe53d02d0feULL, 0xdfade6205f5b0e4bULL, 0x4cabb16ee92d331aULL, 0x04c6657bf510cea3ULL, 0xd73c2cd6a87b8f10ULL, 0xe1d87310a1a307abULL, 0x6cd5be9112ad0d6bULL, 0x97c032354366f3f2ULL, 0xd4e0ceb22677552eULL, 0x0000000000000000ULL, 0x29509bde76a402cbULL, 0xc27a9e8bd42fe3e4ULL, 0x5ef7842cee654b73ULL, 0xaf107ecdbc86536eULL, 0x3fcacbe784fcb401ULL, 0xd55f90655c73e8cfULL, 0xe6c2f40fdabf1336ULL, 0xe8f6e7312c873b11ULL, 0xeb2a0555a28be12fULL, 0xe4a148bc2eb774e9ULL, 0x9b979db84156bc0aULL, 0x6eb60222e6a56ab4ULL, 0x87ffbbc4b026ec44ULL, 0xc703a5275b3b90a6ULL, 0x47e699fc9001687fULL, 0x9c8d1aa73a4aa897ULL, 0x7cea3760e1ed12ddULL, 0x4ec80ddd1d2554c5ULL, 0x13e36b957d4cc588ULL, 0x5d2b66486069914dULL, 0x92b90999cc7280b0ULL, 0x517cc9c56259deb5ULL, 0xc937b619ad03b881ULL, 0xec30824ad997f5b2ULL, 0xa45d565fc5aa080bULL, 0xd6837201d27f32f1ULL, 0x635ef3789e9198adULL, 0x531f75769651b96aULL, 0x4f77530a6721e924ULL, 0x486dd4151c3dfdb9ULL, 0x5f48dafb9461f692ULL, 0x375b011173dc355aULL, 0x3da9775470f4d3deULL, 0x8d0dcd81b30e0ac0ULL, 0x36e45fc609d888bbULL, 0x55baacbe97491016ULL, 0x8cb29356c90ab721ULL, 0x76184125e2c5f459ULL, 0x99f4210bb55edbd5ULL, 0x6f095cf59ca1d755ULL, 0x9f51f8c3b44672a9ULL, 0x3538bda287d45285ULL, 0x50c39712185d6354ULL, 0xf23b1885dcefc223ULL, 0x79930ccc6ef9619fULL, 0xed8fdc9da3934853ULL, 0xcb540aaa590bdf5eULL, 0x5c94389f1a6d2cacULL, 0xe77daad8a0bbaed7ULL, 0x28efc5090ca0bf2aULL, 0xbf2ff73c4fc64cd8ULL, 0xb37858b14df60320ULL, 0xf8c96ec0dfc724a7ULL, 0x828680683f329f06ULL, 0x941cd051cd6a29ccULL, 0xc3c5c05cae2b5e05ULL, 0xb601631dc2e27062ULL, 0xc01922382027843bULL, 0x24b86a840e90f0d2ULL, 0xd245177a276ffc52ULL, 0x0f8b4de98c3c95c6ULL, 0x3e759530fef809e0ULL, 0x0b4d2892792c5b65ULL, 0xc4df4743d5374a98ULL, 0xa5e20888bfaeb5eaULL, 0xba56cc90c0d23f9aULL, 0x38d04cf8ffe0a09cULL, 0x62e1adafe495254cULL, 0x0263bcb3f40867dfULL, 0xcaeb547d230f62bfULL, 0x6082111c109d4293ULL, 0xdad4dd8cd04f7d09ULL, 0xefec602e579b2f8cULL, 0x1fb4c4187f7c8a70ULL, 0xffd3e9dfa4db303aULL, 0x7bf0b07f9af10640ULL, 0xf49ec14dddf76b5fULL, 0x8f6e713247066d1fULL, 0x339d646a86ccfbf9ULL, 0x64447467e58d8c30ULL, 0x2c29a072f9b07189ULL, 0xd8b7613f24471ad6ULL, 0x6627c8d41185ebefULL, 0xa347d140beb61c96ULL, 0xde12b8f7255fb3aaULL, 0x9d324470404e1576ULL, 0x9306574eb6763d51ULL, 0xa80af9d2c79a47f3ULL, 0x859c0777442e8b9bULL, 0x69ac853d9db97e29ULL }, { 0xc3407dfc2de6377eULL, 0x5b9e93eea4256f77ULL, 0xadb58fdd50c845e0ULL, 0x5219ff11a75bed86ULL, 0x356b61cfd90b1de9ULL, 0xfb8f406e25abe037ULL, 0x7a5a0231c0f60796ULL, 0x9d3cd216e1f5020bULL, 0x0c6550fb6b48d8f3ULL, 0xf57508c427ff1c62ULL, 0x4ad35ffa71cb407dULL, 0x6290a2da1666aa6dULL, 0xe284ec2349355f9fULL, 0xb3c307c53d7c84ecULL, 0x05e23c0468365a02ULL, 0x190bac4d6c9ebfa8ULL, 0x94bbbee9e28b80faULL, 0xa34fc777529cb9b5ULL, 0xcc7b39f095bcd978ULL, 0x2426addb0ce532e3ULL, 0x7e79329312ce4fc7ULL, 0xab09a72eebec2917ULL, 0xf8d15499f6b9d6c2ULL, 0x1a55b8babf8c895dULL, 0xdb8add17fb769a85ULL, 0xb57f2f368658e81bULL, 0x8acd36f18f3f41f6ULL, 0x5ce3b7bba50f11d3ULL, 0x114dcc14d5ee2f0aULL, 0xb91a7fcded1030e8ULL, 0x81d5425fe55de7a1ULL, 0xb6213bc1554adeeeULL, 0x80144ef95f53f5f2ULL, 0x1e7688186db4c10cULL, 0x3b912965db5fe1bcULL, 0xc281715a97e8252dULL, 0x54a5d7e21c7f8171ULL, 0x4b12535ccbc5522eULL, 0x1d289cefbea6f7f9ULL, 0x6ef5f2217d2e729eULL, 0xe6a7dc819b0d17ceULL, 0x1b94b41c05829b0eULL, 0x33d7493c622f711eULL, 0xdcf7f942fa5ce421ULL, 0x600fba8b7f7a8ecbULL, 0x46b60f011a83988eULL, 0x235b898e0dcf4c47ULL, 0x957ab24f588592a9ULL, 0x4354330572b5c28cULL, 0xa5f3ef84e9b8d542ULL, 0x8c711e02341b2d01ULL, 0x0b1874ae6a62a657ULL, 0x1213d8e306fc19ffULL, 0xfe6d7c6a4d9dba35ULL, 0x65ed868f174cd4c9ULL, 0x88522ea0e6236550ULL, 0x899322065c2d7703ULL, 0xc01e690bfef4018bULL, 0x915982ed8abddaf8ULL, 0xbe675b98ec3a4e4cULL, 0xa996bf7f82f00db1ULL, 0xe1daf8d49a27696aULL, 0x2effd5d3dc8986e7ULL, 0xd153a51f2b1a2e81ULL, 0x18caa0ebd690adfbULL, 0x390e3134b243c51aULL, 0x2778b92cdff70416ULL, 0x029f1851691c24a6ULL, 0x5e7cafeacc133575ULL, 0xfa4e4cc89fa5f264ULL, 0x5a5f9f481e2b7d24ULL, 0x484c47ab18d764dbULL, 0x400a27f2a1a7f479ULL, 0xaeeb9b2a83da7315ULL, 0x721c626879869734ULL, 0x042330a2d2384851ULL, 0x85f672fd3765aff0ULL, 0xba446b3a3e02061dULL, 0x73dd6ecec3888567ULL, 0xffac70ccf793a866ULL, 0xdfa9edb5294ed2d4ULL, 0x6c6aea7014325638ULL, 0x834a5a0e8c41c307ULL, 0xcdba35562fb2cb2bULL, 0x0ad97808d06cb404ULL, 0x0f3b440cb85aee06ULL, 0xe5f9c876481f213bULL, 0x98deee1289c35809ULL, 0x59018bbfcd394bd1ULL, 0xe01bf47220297b39ULL, 0xde68e1139340c087ULL, 0x9fa3ca4788e926adULL, 0xbb85679c840c144eULL, 0x53d8f3b71d55ffd5ULL, 0x0da45c5dd146caa0ULL, 0x6f34fe87c72060cdULL, 0x57fbc315cf6db784ULL, 0xcee421a1fca0fddeULL, 0x3d2d0196607b8d4bULL, 0x642c8a29ad42c69aULL, 0x14aff010bdd87508ULL, 0xac74837beac657b3ULL, 0x3216459ad821634dULL, 0x3fb219c70967a9edULL, 0x06bc28f3bb246cf7ULL, 0xf2082c9126d562c6ULL, 0x66b39278c45ee23cULL, 0xbd394f6f3f2878b9ULL, 0xfd33689d9e8f8cc0ULL, 0x37f4799eb017394fULL, 0x108cc0b26fe03d59ULL, 0xda4bd1b1417888d6ULL, 0xb09d1332ee6eb219ULL, 0x2f3ed975668794b4ULL, 0x58c0871977375982ULL, 0x7561463d78ace990ULL, 0x09876cff037e82f1ULL, 0x7fb83e35a8c05d94ULL, 0x26b9b58a65f91645ULL, 0xef20b07e9873953fULL, 0x3148516d0b3355b8ULL, 0x41cb2b541ba9e62aULL, 0x790416c613e43163ULL, 0xa011d380818e8f40ULL, 0x3a5025c36151f3efULL, 0xd57095bdf92266d0ULL, 0x498d4b0da2d97688ULL, 0x8b0c3a57353153a5ULL, 0x21c491df64d368e1ULL, 0x8f2f0af5e7091bf4ULL, 0x2da1c1240f9bb012ULL, 0xc43d59a92ccc49daULL, 0xbfa6573e56345c1fULL, 0x828b56a8364fd154ULL, 0x9a41f643e0df7cafULL, 0xbcf843c985266aeaULL, 0x2b1de9d7b4bfdce5ULL, 0x20059d79dedd7ab2ULL, 0x6dabe6d6ae3c446bULL, 0x45e81bf6c991ae7bULL, 0x6351ae7cac68b83eULL, 0xa432e32253b6c711ULL, 0xd092a9b991143cd2ULL, 0xcac711032e98b58fULL, 0xd8d4c9e02864ac70ULL, 0xc5fc550f96c25b89ULL, 0xd7ef8dec903e4276ULL, 0x67729ede7e50f06fULL, 0xeac28c7af045cf3dULL, 0xb15c1f945460a04aULL, 0x9cfddeb05bfb1058ULL, 0x93c69abce3a1fe5eULL, 0xeb0380dc4a4bdd6eULL, 0xd20db1e8f8081874ULL, 0x229a8528b7c15e14ULL, 0x44291750739fbc28ULL, 0xd3ccbd4e42060a27ULL, 0xf62b1c33f4ed2a97ULL, 0x86a8660ae4779905ULL, 0xd62e814a2a305025ULL, 0x477703a7a08d8addULL, 0x7b9b0e977af815c5ULL, 0x78c51a60a9ea2330ULL, 0xa6adfb733aaae3b7ULL, 0x97e5aa1e3199b60fULL, 0x0000000000000000ULL, 0xf4b404629df10e31ULL, 0x5564db44a6719322ULL, 0x9207961a59afec0dULL, 0x9624a6b88b97a45cULL, 0x363575380a192b1cULL, 0x2c60cd82b595a241ULL, 0x7d272664c1dc7932ULL, 0x7142769faa94a1c1ULL, 0xa1d0df263b809d13ULL, 0x1630e841d4c451aeULL, 0xc1df65ad44fa13d8ULL, 0x13d2d445bcf20bacULL, 0xd915c546926abe23ULL, 0x38cf3d92084dd749ULL, 0xe766d0272103059dULL, 0xc7634d5effde7f2fULL, 0x077d2455012a7ea4ULL, 0xedbfa82ff16fb199ULL, 0xaf2a978c39d46146ULL, 0x42953fa3c8bbd0dfULL, 0xcb061da59496a7dcULL, 0x25e7a17db6eb20b0ULL, 0x34aa6d6963050fbaULL, 0xa76cf7d580a4f1e4ULL, 0xf7ea10954ee338c4ULL, 0xfcf2643b24819e93ULL, 0xcf252d0746aeef8dULL, 0x4ef06f58a3f3082cULL, 0x563acfb37563a5d7ULL, 0x5086e740ce47c920ULL, 0x2982f186dda3f843ULL, 0x87696aac5e798b56ULL, 0x5d22bb1d1f010380ULL, 0x035e14f7d31236f5ULL, 0x3cec0d30da759f18ULL, 0xf3c920379cdb7095ULL, 0xb8db736b571e22bbULL, 0xdd36f5e44052f672ULL, 0xaac8ab8851e23b44ULL, 0xa857b3d938fe1fe2ULL, 0x17f1e4e76eca43fdULL, 0xec7ea4894b61a3caULL, 0x9e62c6e132e734feULL, 0xd4b1991b432c7483ULL, 0x6ad6c283af163acfULL, 0x1ce9904904a8e5aaULL, 0x5fbda34c761d2726ULL, 0xf910583f4cb7c491ULL, 0xc6a241f845d06d7cULL, 0x4f3163fe19fd1a7fULL, 0xe99c988d2357f9c8ULL, 0x8eee06535d0709a7ULL, 0x0efa48aa0254fc55ULL, 0xb4be23903c56fa48ULL, 0x763f52caabbedf65ULL, 0xeee1bcd8227d876cULL, 0xe345e085f33b4dccULL, 0x3e731561b369bbbeULL, 0x2843fd2067adea10ULL, 0x2adce5710eb1ceb6ULL, 0xb7e03767ef44ccbdULL, 0x8db012a48e153f52ULL, 0x61ceb62dc5749c98ULL, 0xe85d942b9959eb9bULL, 0x4c6f7709caef2c8aULL, 0x84377e5b8d6bbda3ULL, 0x30895dcbb13d47ebULL, 0x74a04a9bc2a2fbc3ULL, 0x6b17ce251518289cULL, 0xe438c4d0f2113368ULL, 0x1fb784bed7bad35fULL, 0x9b80fae55ad16efcULL, 0x77fe5e6c11b0cd36ULL, 0xc858095247849129ULL, 0x08466059b97090a2ULL, 0x01c10ca6ba0e1253ULL, 0x6988d6747c040c3aULL, 0x6849dad2c60a1e69ULL, 0x5147ebe67449db73ULL, 0xc99905f4fd8a837aULL, 0x991fe2b433cd4a5aULL, 0xf09734c04fc94660ULL, 0xa28ecbd1e892abe6ULL, 0xf1563866f5c75433ULL, 0x4dae7baf70e13ed9ULL, 0x7ce62ac27bd26b61ULL, 0x70837a39109ab392ULL, 0x90988e4b30b3c8abULL, 0xb2020b63877296bfULL, 0x156efcb607d6675bULL }, { 0xe63f55ce97c331d0ULL, 0x25b506b0015bba16ULL, 0xc8706e29e6ad9ba8ULL, 0x5b43d3775d521f6aULL, 0x0bfa3d577035106eULL, 0xab95fc172afb0e66ULL, 0xf64b63979e7a3276ULL, 0xf58b4562649dad4bULL, 0x48f7c3dbae0c83f1ULL, 0xff31916642f5c8c5ULL, 0xcbb048dc1c4a0495ULL, 0x66b8f83cdf622989ULL, 0x35c130e908e2b9b0ULL, 0x7c761a61f0b34fa1ULL, 0x3601161cf205268dULL, 0x9e54ccfe2219b7d6ULL, 0x8b7d90a538940837ULL, 0x9cd403588ea35d0bULL, 0xbc3c6fea9ccc5b5aULL, 0xe5ff733b6d24aeedULL, 0xceed22de0f7eb8d2ULL, 0xec8581cab1ab545eULL, 0xb96105e88ff8e71dULL, 0x8ca03501871a5eadULL, 0x76ccce65d6db2a2fULL, 0x5883f582a7b58057ULL, 0x3f7be4ed2e8adc3eULL, 0x0fe7be06355cd9c9ULL, 0xee054e6c1d11be83ULL, 0x1074365909b903a6ULL, 0x5dde9f80b4813c10ULL, 0x4a770c7d02b6692cULL, 0x5379c8d5d7809039ULL, 0xb4067448161ed409ULL, 0x5f5e5026183bd6cdULL, 0xe898029bf4c29df9ULL, 0x7fb63c940a54d09cULL, 0xc5171f897f4ba8bcULL, 0xa6f28db7b31d3d72ULL, 0x2e4f3be7716eaa78ULL, 0x0d6771a099e63314ULL, 0x82076254e41bf284ULL, 0x2f0fd2b42733df98ULL, 0x5c9e76d3e2dc49f0ULL, 0x7aeb569619606cdbULL, 0x83478b07b2468764ULL, 0xcfadcb8d5923cd32ULL, 0x85dac7f05b95a41eULL, 0xb5469d1b4043a1e9ULL, 0xb821ecbbd9a592fdULL, 0x1b8e0b0e798c13c8ULL, 0x62a57b6d9a0be02eULL, 0xfcf1b793b81257f8ULL, 0x9d94ea0bd8fe28ebULL, 0x4cea408aeb654a56ULL, 0x23284a47e888996cULL, 0x2d8f1d128b893545ULL, 0xf4cbac3132c0d8abULL, 0xbd7c86b9ca912ebaULL, 0x3a268eef3dbe6079ULL, 0xf0d62f6077a9110cULL, 0x2735c916ade150cbULL, 0x89fd5f03942ee2eaULL, 0x1acee25d2fd16628ULL, 0x90f39bab41181bffULL, 0x430dfe8cde39939fULL, 0xf70b8ac4c8274796ULL, 0x1c53aeaac6024552ULL, 0x13b410acf35e9c9bULL, 0xa532ab4249faa24fULL, 0x2b1251e5625a163fULL, 0xd7e3e676da4841c7ULL, 0xa7b264e4e5404892ULL, 0xda8497d643ae72d3ULL, 0x861ae105a1723b23ULL, 0x38a6414991048aa4ULL, 0x6578dec92585b6b4ULL, 0x0280cfa6acbaeaddULL, 0x88bdb650c273970aULL, 0x9333bd5ebbff84c2ULL, 0x4e6a8f2c47dfa08bULL, 0x321c954db76cef2aULL, 0x418d312a72837942ULL, 0xb29b38bfffcdf773ULL, 0x6c022c38f90a4c07ULL, 0x5a033a240b0f6a8aULL, 0x1f93885f3ce5da6fULL, 0xc38a537e96988bc6ULL, 0x39e6a81ac759ff44ULL, 0x29929e43cee0fce2ULL, 0x40cdd87924de0ca2ULL, 0xe9d8ebc8a29fe819ULL, 0x0c2798f3cfbb46f4ULL, 0x55e484223e53b343ULL, 0x4650948ecd0d2fd8ULL, 0x20e86cb2126f0651ULL, 0x6d42c56baf5739e7ULL, 0xa06fc1405ace1e08ULL, 0x7babbfc54f3d193bULL, 0x424d17df8864e67fULL, 0xd8045870ef14980eULL, 0xc6d7397c85ac3781ULL, 0x21a885e1443273b1ULL, 0x67f8116f893f5c69ULL, 0x24f5efe35706cff6ULL, 0xd56329d076f2ab1aULL, 0x5e1eb9754e66a32dULL, 0x28d2771098bd8902ULL, 0x8f6013f47dfdc190ULL, 0x17a993fdb637553cULL, 0xe0a219397e1012aaULL, 0x786b9930b5da8606ULL, 0x6e82e39e55b0a6daULL, 0x875a0856f72f4ec3ULL, 0x3741ff4fa458536dULL, 0xac4859b3957558fcULL, 0x7ef6d5c75c09a57cULL, 0xc04a758b6c7f14fbULL, 0xf9acdd91ab26ebbfULL, 0x7391a467c5ef9668ULL, 0x335c7c1ee1319acaULL, 0xa91533b18641e4bbULL, 0xe4bf9a683b79db0dULL, 0x8e20faa72ba0b470ULL, 0x51f907737b3a7ae4ULL, 0x2268a314bed5ec8cULL, 0xd944b123b949edeeULL, 0x31dcb3b84d8b7017ULL, 0xd3fe65279f218860ULL, 0x097af2f1dc8ffab3ULL, 0x9b09a6fc312d0b91ULL, 0xcc6ded78a3c4520fULL, 0x3481d9ba5ebfcc50ULL, 0x4f2a667f1182d56bULL, 0xdfd9fdd4509ace94ULL, 0x26752045fbbc252bULL, 0xbffc491f662bc467ULL, 0xdd593272fc202449ULL, 0x3cbbc218d46d4303ULL, 0x91b372f817456e1fULL, 0x681faf69bc6385a0ULL, 0xb686bbeebaa43ed4ULL, 0x1469b5084cd0ca01ULL, 0x98c98009cbca94acULL, 0x6438379a73d8c354ULL, 0xc2caba2dc0c5fe26ULL, 0x3e3b0dbe78d7a9deULL, 0x50b9ee202d670f04ULL, 0x4590b27b37eab0e5ULL, 0x6025b4cb36b10af3ULL, 0xfb2c1237079c0162ULL, 0xa12f28130c936be8ULL, 0x4b37e52e54eb1cccULL, 0x083a1ba28ad28f53ULL, 0xc10a9cd83a22611bULL, 0x9f1425ad7444c236ULL, 0x069d4cf7e9d3237aULL, 0xedc56899e7f621beULL, 0x778c273680865fcfULL, 0x309c5aeb1bd605f7ULL, 0x8de0dc52d1472b4dULL, 0xf8ec34c2fd7b9e5fULL, 0xea18cd3d58787724ULL, 0xaad515447ca67b86ULL, 0x9989695a9d97e14cULL, 0x0000000000000000ULL, 0xf196c63321f464ecULL, 0x71116bc169557cb5ULL, 0xaf887f466f92c7c1ULL, 0x972e3e0ffe964d65ULL, 0x190ec4a8d536f915ULL, 0x95aef1a9522ca7b8ULL, 0xdc19db21aa7d51a9ULL, 0x94ee18fa0471d258ULL, 0x8087adf248a11859ULL, 0xc457f6da2916dd5cULL, 0xfa6cfb6451c17482ULL, 0xf256e0c6db13fbd1ULL, 0x6a9f60cf10d96f7dULL, 0x4daaa9d9bd383fb6ULL, 0x03c026f5fae79f3dULL, 0xde99148706c7bb74ULL, 0x2a52b8b6340763dfULL, 0x6fc20acd03edd33aULL, 0xd423c08320afdefaULL, 0xbbe1ca4e23420dc0ULL, 0x966ed75ca8cb3885ULL, 0xeb58246e0e2502c4ULL, 0x055d6a021334bc47ULL, 0xa47242111fa7d7afULL, 0xe3623fcc84f78d97ULL, 0x81c744a11efc6db9ULL, 0xaec8961539cfb221ULL, 0xf31609958d4e8e31ULL, 0x63e5923ecc5695ceULL, 0x47107ddd9b505a38ULL, 0xa3afe7b5a0298135ULL, 0x792b7063e387f3e6ULL, 0x0140e953565d75e0ULL, 0x12f4f9ffa503e97bULL, 0x750ce8902c3cb512ULL, 0xdbc47e8515f30733ULL, 0x1ed3610c6ab8af8fULL, 0x5239218681dde5d9ULL, 0xe222d69fd2aaf877ULL, 0xfe71783514a8bd25ULL, 0xcaf0a18f4a177175ULL, 0x61655d9860ec7f13ULL, 0xe77fbc9dc19e4430ULL, 0x2ccff441ddd440a5ULL, 0x16e97aaee06a20dcULL, 0xa855dae2d01c915bULL, 0x1d1347f9905f30b2ULL, 0xb7c652bdecf94b34ULL, 0xd03e43d265c6175dULL, 0xfdb15ec0ee4f2218ULL, 0x57644b8492e9599eULL, 0x07dda5a4bf8e569aULL, 0x54a46d71680ec6a3ULL, 0x5624a2d7c4b42c7eULL, 0xbebca04c3076b187ULL, 0x7d36f332a6ee3a41ULL, 0x3b6667bc6be31599ULL, 0x695f463aea3ef040ULL, 0xad08b0e0c3282d1cULL, 0xb15b1e4a052a684eULL, 0x44d05b2861b7c505ULL, 0x15295c5b1a8dbfe1ULL, 0x744c01c37a61c0f2ULL, 0x59c31cd1f1e8f5b7ULL, 0xef45a73f4b4ccb63ULL, 0x6bdf899c46841a9dULL, 0x3dfb2b4b823036e3ULL, 0xa2ef0ee6f674f4d5ULL, 0x184e2dfb836b8cf5ULL, 0x1134df0a5fe47646ULL, 0xbaa1231d751f7820ULL, 0xd17eaa81339b62bdULL, 0xb01bf71953771daeULL, 0x849a2ea30dc8d1feULL, 0x705182923f080955ULL, 0x0ea757556301ac29ULL, 0x041d83514569c9a7ULL, 0x0abad4042668658eULL, 0x49b72a88f851f611ULL, 0x8a3d79f66ec97dd7ULL, 0xcd2d042bf59927efULL, 0xc930877ab0f0ee48ULL, 0x9273540deda2f122ULL, 0xc797d02fd3f14261ULL, 0xe1e2f06a284d674aULL, 0xd2be8c74c97cfd80ULL, 0x9a494faf67707e71ULL, 0xb3dbd1eca9908293ULL, 0x72d14d3493b2e388ULL, 0xd6a30f258c153427ULL } }; /* Ax */ static void streebog_xor(const struct streebog_uint512 *x, const struct streebog_uint512 *y, struct streebog_uint512 *z) { z->qword[0] = x->qword[0] ^ y->qword[0]; z->qword[1] = x->qword[1] ^ y->qword[1]; z->qword[2] = x->qword[2] ^ y->qword[2]; z->qword[3] = x->qword[3] ^ y->qword[3]; z->qword[4] = x->qword[4] ^ y->qword[4]; z->qword[5] = x->qword[5] ^ y->qword[5]; z->qword[6] = x->qword[6] ^ y->qword[6]; z->qword[7] = x->qword[7] ^ y->qword[7]; } static void streebog_xlps(const struct streebog_uint512 *x, const struct streebog_uint512 *y, struct streebog_uint512 *data) { u64 r0, r1, r2, r3, r4, r5, r6, r7; int i; r0 = le64_to_cpu(x->qword[0] ^ y->qword[0]); r1 = le64_to_cpu(x->qword[1] ^ y->qword[1]); r2 = le64_to_cpu(x->qword[2] ^ y->qword[2]); r3 = le64_to_cpu(x->qword[3] ^ y->qword[3]); r4 = le64_to_cpu(x->qword[4] ^ y->qword[4]); r5 = le64_to_cpu(x->qword[5] ^ y->qword[5]); r6 = le64_to_cpu(x->qword[6] ^ y->qword[6]); r7 = le64_to_cpu(x->qword[7] ^ y->qword[7]); for (i = 0; i <= 7; i++) { data->qword[i] = cpu_to_le64(Ax[0][r0 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[1][r1 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[2][r2 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[3][r3 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[4][r4 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[5][r5 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[6][r6 & 0xFF]); data->qword[i] ^= cpu_to_le64(Ax[7][r7 & 0xFF]); r0 >>= 8; r1 >>= 8; r2 >>= 8; r3 >>= 8; r4 >>= 8; r5 >>= 8; r6 >>= 8; r7 >>= 8; } } static void streebog_round(int i, struct streebog_uint512 *Ki, struct streebog_uint512 *data) { streebog_xlps(Ki, &C[i], Ki); streebog_xlps(Ki, data, data); } static int streebog_init(struct shash_desc *desc) { struct streebog_state *ctx = shash_desc_ctx(desc); unsigned int digest_size = crypto_shash_digestsize(desc->tfm); unsigned int i; memset(ctx, 0, sizeof(struct streebog_state)); for (i = 0; i < 8; i++) { if (digest_size == STREEBOG256_DIGEST_SIZE) ctx->h.qword[i] = cpu_to_le64(0x0101010101010101ULL); } return 0; } static void streebog_pad(struct streebog_state *ctx) { if (ctx->fillsize >= STREEBOG_BLOCK_SIZE) return; memset(ctx->buffer + ctx->fillsize, 0, sizeof(ctx->buffer) - ctx->fillsize); ctx->buffer[ctx->fillsize] = 1; } static void streebog_add512(const struct streebog_uint512 *x, const struct streebog_uint512 *y, struct streebog_uint512 *r) { u64 carry = 0; int i; for (i = 0; i < 8; i++) { const u64 left = le64_to_cpu(x->qword[i]); u64 sum; sum = left + le64_to_cpu(y->qword[i]) + carry; if (sum != left) carry = (sum < left); r->qword[i] = cpu_to_le64(sum); } } static void streebog_g(struct streebog_uint512 *h, const struct streebog_uint512 *N, const struct streebog_uint512 *m) { struct streebog_uint512 Ki, data; unsigned int i; streebog_xlps(h, N, &data); /* Starting E() */ Ki = data; streebog_xlps(&Ki, m, &data); for (i = 0; i < 11; i++) streebog_round(i, &Ki, &data); streebog_xlps(&Ki, &C[11], &Ki); streebog_xor(&Ki, &data, &data); /* E() done */ streebog_xor(&data, h, &data); streebog_xor(&data, m, h); } static void streebog_stage2(struct streebog_state *ctx, const u8 *data) { struct streebog_uint512 m; memcpy(&m, data, sizeof(m)); streebog_g(&ctx->h, &ctx->N, &m); streebog_add512(&ctx->N, &buffer512, &ctx->N); streebog_add512(&ctx->Sigma, &m, &ctx->Sigma); } static void streebog_stage3(struct streebog_state *ctx) { struct streebog_uint512 buf = { { 0 } }; buf.qword[0] = cpu_to_le64(ctx->fillsize << 3); streebog_pad(ctx); streebog_g(&ctx->h, &ctx->N, &ctx->m); streebog_add512(&ctx->N, &buf, &ctx->N); streebog_add512(&ctx->Sigma, &ctx->m, &ctx->Sigma); streebog_g(&ctx->h, &buffer0, &ctx->N); streebog_g(&ctx->h, &buffer0, &ctx->Sigma); memcpy(&ctx->hash, &ctx->h, sizeof(struct streebog_uint512)); } static int streebog_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct streebog_state *ctx = shash_desc_ctx(desc); size_t chunksize; if (ctx->fillsize) { chunksize = STREEBOG_BLOCK_SIZE - ctx->fillsize; if (chunksize > len) chunksize = len; memcpy(&ctx->buffer[ctx->fillsize], data, chunksize); ctx->fillsize += chunksize; len -= chunksize; data += chunksize; if (ctx->fillsize == STREEBOG_BLOCK_SIZE) { streebog_stage2(ctx, ctx->buffer); ctx->fillsize = 0; } } while (len >= STREEBOG_BLOCK_SIZE) { streebog_stage2(ctx, data); data += STREEBOG_BLOCK_SIZE; len -= STREEBOG_BLOCK_SIZE; } if (len) { memcpy(&ctx->buffer, data, len); ctx->fillsize = len; } return 0; } static int streebog_final(struct shash_desc *desc, u8 *digest) { struct streebog_state *ctx = shash_desc_ctx(desc); streebog_stage3(ctx); ctx->fillsize = 0; if (crypto_shash_digestsize(desc->tfm) == STREEBOG256_DIGEST_SIZE) memcpy(digest, &ctx->hash.qword[4], STREEBOG256_DIGEST_SIZE); else memcpy(digest, &ctx->hash.qword[0], STREEBOG512_DIGEST_SIZE); return 0; } static struct shash_alg algs[2] = { { .digestsize = STREEBOG256_DIGEST_SIZE, .init = streebog_init, .update = streebog_update, .final = streebog_final, .descsize = sizeof(struct streebog_state), .base = { .cra_name = "streebog256", .cra_driver_name = "streebog256-generic", .cra_blocksize = STREEBOG_BLOCK_SIZE, .cra_module = THIS_MODULE, }, }, { .digestsize = STREEBOG512_DIGEST_SIZE, .init = streebog_init, .update = streebog_update, .final = streebog_final, .descsize = sizeof(struct streebog_state), .base = { .cra_name = "streebog512", .cra_driver_name = "streebog512-generic", .cra_blocksize = STREEBOG_BLOCK_SIZE, .cra_module = THIS_MODULE, } } }; static int __init streebog_mod_init(void) { return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } static void __exit streebog_mod_fini(void) { crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); } subsys_initcall(streebog_mod_init); module_exit(streebog_mod_fini); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Vitaly Chikunov <vt@altlinux.org>"); MODULE_DESCRIPTION("Streebog Hash Function"); MODULE_ALIAS_CRYPTO("streebog256"); MODULE_ALIAS_CRYPTO("streebog256-generic"); MODULE_ALIAS_CRYPTO("streebog512"); MODULE_ALIAS_CRYPTO("streebog512-generic");
2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 // SPDX-License-Identifier: GPL-2.0-only /* Kernel module to match AH parameters. */ /* (C) 2001-2002 Andras Kis-Szabo <kisza@sch.bme.hu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/types.h> #include <net/checksum.h> #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_ah.h> MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: IPv6 IPsec-AH match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ static inline bool spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { bool r; pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ', min, spi, max); r = (spi >= min && spi <= max) ^ invert; pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par) { struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; const struct ip6t_ah *ahinfo = par->matchinfo; unsigned int ptr = 0; unsigned int hdrlen = 0; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL, NULL); if (err < 0) { if (err != -ENOENT) par->hotdrop = true; return false; } ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); if (ah == NULL) { par->hotdrop = true; return false; } hdrlen = ipv6_authlen(ah); pr_debug("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); pr_debug("RES %04X ", ah->reserved); pr_debug("SPI %u %08X\n", ntohl(ah->spi), ntohl(ah->spi)); pr_debug("IPv6 AH spi %02X ", spi_match(ahinfo->spis[0], ahinfo->spis[1], ntohl(ah->spi), !!(ahinfo->invflags & IP6T_AH_INV_SPI))); pr_debug("len %02X %04X %02X ", ahinfo->hdrlen, hdrlen, (!ahinfo->hdrlen || (ahinfo->hdrlen == hdrlen) ^ !!(ahinfo->invflags & IP6T_AH_INV_LEN))); pr_debug("res %02X %04X %02X\n", ahinfo->hdrres, ah->reserved, !(ahinfo->hdrres && ah->reserved)); return spi_match(ahinfo->spis[0], ahinfo->spis[1], ntohl(ah->spi), !!(ahinfo->invflags & IP6T_AH_INV_SPI)) && (!ahinfo->hdrlen || (ahinfo->hdrlen == hdrlen) ^ !!(ahinfo->invflags & IP6T_AH_INV_LEN)) && !(ahinfo->hdrres && ah->reserved); } static int ah_mt6_check(const struct xt_mtchk_param *par) { const struct ip6t_ah *ahinfo = par->matchinfo; if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { pr_debug("unknown flags %X\n", ahinfo->invflags); return -EINVAL; } return 0; } static struct xt_match ah_mt6_reg __read_mostly = { .name = "ah", .family = NFPROTO_IPV6, .match = ah_mt6, .matchsize = sizeof(struct ip6t_ah), .checkentry = ah_mt6_check, .me = THIS_MODULE, }; static int __init ah_mt6_init(void) { return xt_register_match(&ah_mt6_reg); } static void __exit ah_mt6_exit(void) { xt_unregister_match(&ah_mt6_reg); } module_init(ah_mt6_init); module_exit(ah_mt6_exit);
1622 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * include/net/dsa.h - Driver for Distributed Switch Architecture switch chips * Copyright (c) 2008-2009 Marvell Semiconductor */ #ifndef __LINUX_NET_DSA_H #define __LINUX_NET_DSA_H #include <linux/if.h> #include <linux/if_ether.h> #include <linux/list.h> #include <linux/notifier.h> #include <linux/timer.h> #include <linux/workqueue.h> #include <linux/of.h> #include <linux/ethtool.h> #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/platform_data/dsa.h> #include <linux/phylink.h> #include <net/devlink.h> #include <net/switchdev.h> struct dsa_8021q_context; struct tc_action; #define DSA_TAG_PROTO_NONE_VALUE 0 #define DSA_TAG_PROTO_BRCM_VALUE 1 #define DSA_TAG_PROTO_BRCM_PREPEND_VALUE 2 #define DSA_TAG_PROTO_DSA_VALUE 3 #define DSA_TAG_PROTO_EDSA_VALUE 4 #define DSA_TAG_PROTO_GSWIP_VALUE 5 #define DSA_TAG_PROTO_KSZ9477_VALUE 6 #define DSA_TAG_PROTO_KSZ9893_VALUE 7 #define DSA_TAG_PROTO_LAN9303_VALUE 8 #define DSA_TAG_PROTO_MTK_VALUE 9 #define DSA_TAG_PROTO_QCA_VALUE 10 #define DSA_TAG_PROTO_TRAILER_VALUE 11 #define DSA_TAG_PROTO_8021Q_VALUE 12 #define DSA_TAG_PROTO_SJA1105_VALUE 13 #define DSA_TAG_PROTO_KSZ8795_VALUE 14 #define DSA_TAG_PROTO_OCELOT_VALUE 15 #define DSA_TAG_PROTO_AR9331_VALUE 16 #define DSA_TAG_PROTO_RTL4_A_VALUE 17 #define DSA_TAG_PROTO_HELLCREEK_VALUE 18 #define DSA_TAG_PROTO_XRS700X_VALUE 19 #define DSA_TAG_PROTO_OCELOT_8021Q_VALUE 20 #define DSA_TAG_PROTO_SEVILLE_VALUE 21 #define DSA_TAG_PROTO_BRCM_LEGACY_VALUE 22 #define DSA_TAG_PROTO_SJA1110_VALUE 23 #define DSA_TAG_PROTO_RTL8_4_VALUE 24 #define DSA_TAG_PROTO_RTL8_4T_VALUE 25 #define DSA_TAG_PROTO_RZN1_A5PSW_VALUE 26 #define DSA_TAG_PROTO_LAN937X_VALUE 27 #define DSA_TAG_PROTO_VSC73XX_8021Q_VALUE 28 enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE, DSA_TAG_PROTO_BRCM = DSA_TAG_PROTO_BRCM_VALUE, DSA_TAG_PROTO_BRCM_LEGACY = DSA_TAG_PROTO_BRCM_LEGACY_VALUE, DSA_TAG_PROTO_BRCM_PREPEND = DSA_TAG_PROTO_BRCM_PREPEND_VALUE, DSA_TAG_PROTO_DSA = DSA_TAG_PROTO_DSA_VALUE, DSA_TAG_PROTO_EDSA = DSA_TAG_PROTO_EDSA_VALUE, DSA_TAG_PROTO_GSWIP = DSA_TAG_PROTO_GSWIP_VALUE, DSA_TAG_PROTO_KSZ9477 = DSA_TAG_PROTO_KSZ9477_VALUE, DSA_TAG_PROTO_KSZ9893 = DSA_TAG_PROTO_KSZ9893_VALUE, DSA_TAG_PROTO_LAN9303 = DSA_TAG_PROTO_LAN9303_VALUE, DSA_TAG_PROTO_MTK = DSA_TAG_PROTO_MTK_VALUE, DSA_TAG_PROTO_QCA = DSA_TAG_PROTO_QCA_VALUE, DSA_TAG_PROTO_TRAILER = DSA_TAG_PROTO_TRAILER_VALUE, DSA_TAG_PROTO_8021Q = DSA_TAG_PROTO_8021Q_VALUE, DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE, DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE, DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE, DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE, DSA_TAG_PROTO_RTL4_A = DSA_TAG_PROTO_RTL4_A_VALUE, DSA_TAG_PROTO_HELLCREEK = DSA_TAG_PROTO_HELLCREEK_VALUE, DSA_TAG_PROTO_XRS700X = DSA_TAG_PROTO_XRS700X_VALUE, DSA_TAG_PROTO_OCELOT_8021Q = DSA_TAG_PROTO_OCELOT_8021Q_VALUE, DSA_TAG_PROTO_SEVILLE = DSA_TAG_PROTO_SEVILLE_VALUE, DSA_TAG_PROTO_SJA1110 = DSA_TAG_PROTO_SJA1110_VALUE, DSA_TAG_PROTO_RTL8_4 = DSA_TAG_PROTO_RTL8_4_VALUE, DSA_TAG_PROTO_RTL8_4T = DSA_TAG_PROTO_RTL8_4T_VALUE, DSA_TAG_PROTO_RZN1_A5PSW = DSA_TAG_PROTO_RZN1_A5PSW_VALUE, DSA_TAG_PROTO_LAN937X = DSA_TAG_PROTO_LAN937X_VALUE, DSA_TAG_PROTO_VSC73XX_8021Q = DSA_TAG_PROTO_VSC73XX_8021Q_VALUE, }; struct dsa_switch; struct dsa_device_ops { struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); void (*flow_dissect)(const struct sk_buff *skb, __be16 *proto, int *offset); int (*connect)(struct dsa_switch *ds); void (*disconnect)(struct dsa_switch *ds); unsigned int needed_headroom; unsigned int needed_tailroom; const char *name; enum dsa_tag_protocol proto; /* Some tagging protocols either mangle or shift the destination MAC * address, in which case the DSA conduit would drop packets on ingress * if what it understands out of the destination MAC address is not in * its RX filter. */ bool promisc_on_conduit; }; struct dsa_lag { struct net_device *dev; unsigned int id; struct mutex fdb_lock; struct list_head fdbs; refcount_t refcount; }; struct dsa_switch_tree { struct list_head list; /* List of switch ports */ struct list_head ports; /* Notifier chain for switch-wide events */ struct raw_notifier_head nh; /* Tree identifier */ unsigned int index; /* Number of switches attached to this tree */ struct kref refcount; /* Maps offloaded LAG netdevs to a zero-based linear ID for * drivers that need it. */ struct dsa_lag **lags; /* Tagging protocol operations */ const struct dsa_device_ops *tag_ops; /* Default tagging protocol preferred by the switches in this * tree. */ enum dsa_tag_protocol default_proto; /* Has this tree been applied to the hardware? */ bool setup; /* * Configuration data for the platform device that owns * this dsa switch tree instance. */ struct dsa_platform_data *pd; /* List of DSA links composing the routing table */ struct list_head rtable; /* Length of "lags" array */ unsigned int lags_len; /* Track the largest switch index within a tree */ unsigned int last_switch; }; /* LAG IDs are one-based, the dst->lags array is zero-based */ #define dsa_lags_foreach_id(_id, _dst) \ for ((_id) = 1; (_id) <= (_dst)->lags_len; (_id)++) \ if ((_dst)->lags[(_id) - 1]) #define dsa_lag_foreach_port(_dp, _dst, _lag) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_offloads_lag((_dp), (_lag))) #define dsa_hsr_foreach_port(_dp, _ds, _hsr) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds) && (_dp)->hsr_dev == (_hsr)) static inline struct dsa_lag *dsa_lag_by_id(struct dsa_switch_tree *dst, unsigned int id) { /* DSA LAG IDs are one-based, dst->lags is zero-based */ return dst->lags[id - 1]; } static inline int dsa_lag_id(struct dsa_switch_tree *dst, struct net_device *lag_dev) { unsigned int id; dsa_lags_foreach_id(id, dst) { struct dsa_lag *lag = dsa_lag_by_id(dst, id); if (lag->dev == lag_dev) return lag->id; } return -ENODEV; } /* TC matchall action types */ enum dsa_port_mall_action_type { DSA_PORT_MALL_MIRROR, DSA_PORT_MALL_POLICER, }; /* TC mirroring entry */ struct dsa_mall_mirror_tc_entry { u8 to_local_port; bool ingress; }; /* TC port policer entry */ struct dsa_mall_policer_tc_entry { u32 burst; u64 rate_bytes_per_sec; }; /* TC matchall entry */ struct dsa_mall_tc_entry { struct list_head list; unsigned long cookie; enum dsa_port_mall_action_type type; union { struct dsa_mall_mirror_tc_entry mirror; struct dsa_mall_policer_tc_entry policer; }; }; struct dsa_bridge { struct net_device *dev; unsigned int num; bool tx_fwd_offload; refcount_t refcount; }; struct dsa_port { /* A CPU port is physically connected to a conduit device. A user port * exposes a network device to user-space, called 'user' here. */ union { struct net_device *conduit; struct net_device *user; }; /* Copy of the tagging protocol operations, for quicker access * in the data path. Valid only for the CPU ports. */ const struct dsa_device_ops *tag_ops; /* Copies for faster access in conduit receive hot path */ struct dsa_switch_tree *dst; struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev); struct dsa_switch *ds; unsigned int index; enum { DSA_PORT_TYPE_UNUSED = 0, DSA_PORT_TYPE_CPU, DSA_PORT_TYPE_DSA, DSA_PORT_TYPE_USER, } type; const char *name; struct dsa_port *cpu_dp; u8 mac[ETH_ALEN]; u8 stp_state; /* Warning: the following bit fields are not atomic, and updating them * can only be done from code paths where concurrency is not possible * (probe time or under rtnl_lock). */ u8 vlan_filtering:1; /* Managed by DSA on user ports and by drivers on CPU and DSA ports */ u8 learning:1; u8 lag_tx_enabled:1; /* conduit state bits, valid only on CPU ports */ u8 conduit_admin_up:1; u8 conduit_oper_up:1; /* Valid only on user ports */ u8 cpu_port_in_lag:1; u8 setup:1; struct device_node *dn; unsigned int ageing_time; struct dsa_bridge *bridge; struct devlink_port devlink_port; struct phylink *pl; struct phylink_config pl_config; struct dsa_lag *lag; struct net_device *hsr_dev; struct list_head list; /* * Original copy of the conduit netdev ethtool_ops */ const struct ethtool_ops *orig_ethtool_ops; /* List of MAC addresses that must be forwarded on this port. * These are only valid on CPU ports and DSA links. */ struct mutex addr_lists_lock; struct list_head fdbs; struct list_head mdbs; struct mutex vlans_lock; union { /* List of VLANs that CPU and DSA ports are members of. * Access to this is serialized by the sleepable @vlans_lock. */ struct list_head vlans; /* List of VLANs that user ports are members of. * Access to this is serialized by netif_addr_lock_bh(). */ struct list_head user_vlans; }; }; static inline struct dsa_port * dsa_phylink_to_port(struct phylink_config *config) { return container_of(config, struct dsa_port, pl_config); } /* TODO: ideally DSA ports would have a single dp->link_dp member, * and no dst->rtable nor this struct dsa_link would be needed, * but this would require some more complex tree walking, * so keep it stupid at the moment and list them all. */ struct dsa_link { struct dsa_port *dp; struct dsa_port *link_dp; struct list_head list; }; enum dsa_db_type { DSA_DB_PORT, DSA_DB_LAG, DSA_DB_BRIDGE, }; struct dsa_db { enum dsa_db_type type; union { const struct dsa_port *dp; struct dsa_lag lag; struct dsa_bridge bridge; }; }; struct dsa_mac_addr { unsigned char addr[ETH_ALEN]; u16 vid; refcount_t refcount; struct list_head list; struct dsa_db db; }; struct dsa_vlan { u16 vid; refcount_t refcount; struct list_head list; }; struct dsa_switch { struct device *dev; /* * Parent switch tree, and switch index. */ struct dsa_switch_tree *dst; unsigned int index; /* Warning: the following bit fields are not atomic, and updating them * can only be done from code paths where concurrency is not possible * (probe time or under rtnl_lock). */ u32 setup:1; /* Disallow bridge core from requesting different VLAN awareness * settings on ports if not hardware-supported */ u32 vlan_filtering_is_global:1; /* Keep VLAN filtering enabled on ports not offloading any upper */ u32 needs_standalone_vlan_filtering:1; /* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges * that have vlan_filtering=0. All drivers should ideally set this (and * then the option would get removed), but it is unknown whether this * would break things or not. */ u32 configure_vlan_while_not_filtering:1; /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. * DEPRECATED: Do NOT set this field in new drivers. Instead look at * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. * Useful if the switch cannot preserve the VLAN tag as seen on the * wire for user port ingress, and chooses to send all frames as * VLAN-tagged to the CPU, including those which were originally * untagged. */ u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. */ u32 assisted_learning_on_cpu_port:1; /* In case vlan_filtering_is_global is set, the VLAN awareness state * should be retrieved from here and not from the per-port settings. */ u32 vlan_filtering:1; /* For switches that only have the MRU configurable. To ensure the * configured MTU is not exceeded, normalization of MRU on all bridged * interfaces is needed. */ u32 mtu_enforcement_ingress:1; /* Drivers that isolate the FDBs of multiple bridges must set this * to true to receive the bridge as an argument in .port_fdb_{add,del} * and .port_mdb_{add,del}. Otherwise, the bridge.num will always be * passed as zero. */ u32 fdb_isolation:1; /* Drivers that have global DSCP mapping settings must set this to * true to automatically apply the settings to all ports. */ u32 dscp_prio_mapping_is_global:1; /* Listener for switch fabric events */ struct notifier_block nb; /* * Give the switch driver somewhere to hang its private data * structure. */ void *priv; void *tagger_data; /* * Configuration data for this switch. */ struct dsa_chip_data *cd; /* * The switch operations. */ const struct dsa_switch_ops *ops; /* * Allow a DSA switch driver to override the phylink MAC ops */ const struct phylink_mac_ops *phylink_mac_ops; /* * User mii_bus and devices for the individual ports. */ u32 phys_mii_mask; struct mii_bus *user_mii_bus; /* Ageing Time limits in msecs */ unsigned int ageing_time_min; unsigned int ageing_time_max; /* Storage for drivers using tag_8021q */ struct dsa_8021q_context *tag_8021q_ctx; /* devlink used to represent this switch device */ struct devlink *devlink; /* Number of switch port queues */ unsigned int num_tx_queues; /* Drivers that benefit from having an ID associated with each * offloaded LAG should set this to the maximum number of * supported IDs. DSA will then maintain a mapping of _at * least_ these many IDs, accessible to drivers via * dsa_lag_id(). */ unsigned int num_lag_ids; /* Drivers that support bridge forwarding offload or FDB isolation * should set this to the maximum number of bridges spanning the same * switch tree (or all trees, in the case of cross-tree bridging * support) that can be offloaded. */ unsigned int max_num_bridges; unsigned int num_ports; }; static inline struct dsa_port *dsa_to_port(struct dsa_switch *ds, int p) { struct dsa_switch_tree *dst = ds->dst; struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dp->ds == ds && dp->index == p) return dp; return NULL; } static inline bool dsa_port_is_dsa(struct dsa_port *port) { return port->type == DSA_PORT_TYPE_DSA; } static inline bool dsa_port_is_cpu(struct dsa_port *port) { return port->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_port_is_user(struct dsa_port *dp) { return dp->type == DSA_PORT_TYPE_USER; } static inline bool dsa_port_is_unused(struct dsa_port *dp) { return dp->type == DSA_PORT_TYPE_UNUSED; } static inline bool dsa_port_conduit_is_operational(struct dsa_port *dp) { return dsa_port_is_cpu(dp) && dp->conduit_admin_up && dp->conduit_oper_up; } static inline bool dsa_is_unused_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_UNUSED; } static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_CPU; } static inline bool dsa_is_dsa_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_DSA; } static inline bool dsa_is_user_port(struct dsa_switch *ds, int p) { return dsa_to_port(ds, p)->type == DSA_PORT_TYPE_USER; } #define dsa_tree_for_each_user_port(_dp, _dst) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) #define dsa_tree_for_each_user_port_continue_reverse(_dp, _dst) \ list_for_each_entry_continue_reverse((_dp), &(_dst)->ports, list) \ if (dsa_port_is_user((_dp))) #define dsa_tree_for_each_cpu_port(_dp, _dst) \ list_for_each_entry((_dp), &(_dst)->ports, list) \ if (dsa_port_is_cpu((_dp))) #define dsa_switch_for_each_port(_dp, _ds) \ list_for_each_entry((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) #define dsa_switch_for_each_port_safe(_dp, _next, _ds) \ list_for_each_entry_safe((_dp), (_next), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) #define dsa_switch_for_each_port_continue_reverse(_dp, _ds) \ list_for_each_entry_continue_reverse((_dp), &(_ds)->dst->ports, list) \ if ((_dp)->ds == (_ds)) #define dsa_switch_for_each_available_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (!dsa_port_is_unused((_dp))) #define dsa_switch_for_each_user_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_user((_dp))) #define dsa_switch_for_each_user_port_continue_reverse(_dp, _ds) \ dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ if (dsa_port_is_user((_dp))) #define dsa_switch_for_each_cpu_port(_dp, _ds) \ dsa_switch_for_each_port((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) #define dsa_switch_for_each_cpu_port_continue_reverse(_dp, _ds) \ dsa_switch_for_each_port_continue_reverse((_dp), (_ds)) \ if (dsa_port_is_cpu((_dp))) static inline u32 dsa_user_ports(struct dsa_switch *ds) { struct dsa_port *dp; u32 mask = 0; dsa_switch_for_each_user_port(dp, ds) mask |= BIT(dp->index); return mask; } static inline u32 dsa_cpu_ports(struct dsa_switch *ds) { struct dsa_port *cpu_dp; u32 mask = 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) mask |= BIT(cpu_dp->index); return mask; } /* Return the local port used to reach an arbitrary switch device */ static inline unsigned int dsa_routing_port(struct dsa_switch *ds, int device) { struct dsa_switch_tree *dst = ds->dst; struct dsa_link *dl; list_for_each_entry(dl, &dst->rtable, list) if (dl->dp->ds == ds && dl->link_dp->ds->index == device) return dl->dp->index; return ds->num_ports; } /* Return the local port used to reach an arbitrary switch port */ static inline unsigned int dsa_towards_port(struct dsa_switch *ds, int device, int port) { if (device == ds->index) return port; else return dsa_routing_port(ds, device); } /* Return the local port used to reach the dedicated CPU port */ static inline unsigned int dsa_upstream_port(struct dsa_switch *ds, int port) { const struct dsa_port *dp = dsa_to_port(ds, port); const struct dsa_port *cpu_dp = dp->cpu_dp; if (!cpu_dp) return port; return dsa_towards_port(ds, cpu_dp->ds->index, cpu_dp->index); } /* Return true if this is the local port used to reach the CPU port */ static inline bool dsa_is_upstream_port(struct dsa_switch *ds, int port) { if (dsa_is_unused_port(ds, port)) return false; return port == dsa_upstream_port(ds, port); } /* Return true if this is a DSA port leading away from the CPU */ static inline bool dsa_is_downstream_port(struct dsa_switch *ds, int port) { return dsa_is_dsa_port(ds, port) && !dsa_is_upstream_port(ds, port); } /* Return the local port used to reach the CPU port */ static inline unsigned int dsa_switch_upstream_port(struct dsa_switch *ds) { struct dsa_port *dp; dsa_switch_for_each_available_port(dp, ds) { return dsa_upstream_port(ds, dp->index); } return ds->num_ports; } /* Return true if @upstream_ds is an upstream switch of @downstream_ds, meaning * that the routing port from @downstream_ds to @upstream_ds is also the port * which @downstream_ds uses to reach its dedicated CPU. */ static inline bool dsa_switch_is_upstream_of(struct dsa_switch *upstream_ds, struct dsa_switch *downstream_ds) { int routing_port; if (upstream_ds == downstream_ds) return true; routing_port = dsa_routing_port(downstream_ds, upstream_ds->index); return dsa_is_upstream_port(downstream_ds, routing_port); } static inline bool dsa_port_is_vlan_filtering(const struct dsa_port *dp) { const struct dsa_switch *ds = dp->ds; if (ds->vlan_filtering_is_global) return ds->vlan_filtering; else return dp->vlan_filtering; } static inline unsigned int dsa_port_lag_id_get(struct dsa_port *dp) { return dp->lag ? dp->lag->id : 0; } static inline struct net_device *dsa_port_lag_dev_get(struct dsa_port *dp) { return dp->lag ? dp->lag->dev : NULL; } static inline bool dsa_port_offloads_lag(struct dsa_port *dp, const struct dsa_lag *lag) { return dsa_port_lag_dev_get(dp) == lag->dev; } static inline struct net_device *dsa_port_to_conduit(const struct dsa_port *dp) { if (dp->cpu_port_in_lag) return dsa_port_lag_dev_get(dp->cpu_dp); return dp->cpu_dp->conduit; } static inline struct net_device *dsa_port_to_bridge_port(const struct dsa_port *dp) { if (!dp->bridge) return NULL; if (dp->lag) return dp->lag->dev; else if (dp->hsr_dev) return dp->hsr_dev; return dp->user; } static inline struct net_device * dsa_port_bridge_dev_get(const struct dsa_port *dp) { return dp->bridge ? dp->bridge->dev : NULL; } static inline unsigned int dsa_port_bridge_num_get(struct dsa_port *dp) { return dp->bridge ? dp->bridge->num : 0; } static inline bool dsa_port_bridge_same(const struct dsa_port *a, const struct dsa_port *b) { struct net_device *br_a = dsa_port_bridge_dev_get(a); struct net_device *br_b = dsa_port_bridge_dev_get(b); /* Standalone ports are not in the same bridge with one another */ return (!br_a || !br_b) ? false : (br_a == br_b); } static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, const struct net_device *dev) { return dsa_port_to_bridge_port(dp) == dev; } static inline bool dsa_port_offloads_bridge_dev(struct dsa_port *dp, const struct net_device *bridge_dev) { /* DSA ports connected to a bridge, and event was emitted * for the bridge. */ return dsa_port_bridge_dev_get(dp) == bridge_dev; } static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, const struct dsa_bridge *bridge) { return dsa_port_bridge_dev_get(dp) == bridge->dev; } /* Returns true if any port of this tree offloads the given net_device */ static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, const struct net_device *dev) { struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dsa_port_offloads_bridge_port(dp, dev)) return true; return false; } /* Returns true if any port of this tree offloads the given bridge */ static inline bool dsa_tree_offloads_bridge_dev(struct dsa_switch_tree *dst, const struct net_device *bridge_dev) { struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) if (dsa_port_offloads_bridge_dev(dp, bridge_dev)) return true; return false; } static inline bool dsa_port_tree_same(const struct dsa_port *a, const struct dsa_port *b) { return a->ds->dst == b->ds->dst; } typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid, bool is_static, void *data); struct dsa_switch_ops { /* * Tagging protocol helpers called for the CPU ports and DSA links. * @get_tag_protocol retrieves the initial tagging protocol and is * mandatory. Switches which can operate using multiple tagging * protocols should implement @change_tag_protocol and report in * @get_tag_protocol the tagger in current use. */ enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds, int port, enum dsa_tag_protocol mprot); int (*change_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); /* * Method for switch drivers to connect to the tagging protocol driver * in current use. The switch driver can provide handlers for certain * types of packets for switch management. */ int (*connect_tag_protocol)(struct dsa_switch *ds, enum dsa_tag_protocol proto); int (*port_change_conduit)(struct dsa_switch *ds, int port, struct net_device *conduit, struct netlink_ext_ack *extack); /* Optional switch-wide initialization and destruction methods */ int (*setup)(struct dsa_switch *ds); void (*teardown)(struct dsa_switch *ds); /* Per-port initialization and destruction methods. Mandatory if the * driver registers devlink port regions, optional otherwise. */ int (*port_setup)(struct dsa_switch *ds, int port); void (*port_teardown)(struct dsa_switch *ds, int port); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); /* * Access to the switch's PHY registers. */ int (*phy_read)(struct dsa_switch *ds, int port, int regnum); int (*phy_write)(struct dsa_switch *ds, int port, int regnum, u16 val); /* * PHYLINK integration */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); void (*phylink_fixed_state)(struct dsa_switch *ds, int port, struct phylink_link_state *state); /* * Port statistics counters. */ void (*get_strings)(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data); void (*get_ethtool_stats)(struct dsa_switch *ds, int port, uint64_t *data); int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); void (*get_ethtool_phy_stats)(struct dsa_switch *ds, int port, uint64_t *data); void (*get_eth_phy_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_phy_stats *phy_stats); void (*get_eth_mac_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_mac_stats *mac_stats); void (*get_eth_ctrl_stats)(struct dsa_switch *ds, int port, struct ethtool_eth_ctrl_stats *ctrl_stats); void (*get_rmon_stats)(struct dsa_switch *ds, int port, struct ethtool_rmon_stats *rmon_stats, const struct ethtool_rmon_hist_range **ranges); void (*get_ts_stats)(struct dsa_switch *ds, int port, struct ethtool_ts_stats *ts_stats); void (*get_stats64)(struct dsa_switch *ds, int port, struct rtnl_link_stats64 *s); void (*get_pause_stats)(struct dsa_switch *ds, int port, struct ethtool_pause_stats *pause_stats); void (*self_test)(struct dsa_switch *ds, int port, struct ethtool_test *etest, u64 *data); /* * ethtool Wake-on-LAN */ void (*get_wol)(struct dsa_switch *ds, int port, struct ethtool_wolinfo *w); int (*set_wol)(struct dsa_switch *ds, int port, struct ethtool_wolinfo *w); /* * ethtool timestamp info */ int (*get_ts_info)(struct dsa_switch *ds, int port, struct kernel_ethtool_ts_info *ts); /* * ethtool MAC merge layer */ int (*get_mm)(struct dsa_switch *ds, int port, struct ethtool_mm_state *state); int (*set_mm)(struct dsa_switch *ds, int port, struct ethtool_mm_cfg *cfg, struct netlink_ext_ack *extack); void (*get_mm_stats)(struct dsa_switch *ds, int port, struct ethtool_mm_stats *stats); /* * DCB ops */ int (*port_get_default_prio)(struct dsa_switch *ds, int port); int (*port_set_default_prio)(struct dsa_switch *ds, int port, u8 prio); int (*port_get_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp); int (*port_add_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, u8 prio); int (*port_del_dscp_prio)(struct dsa_switch *ds, int port, u8 dscp, u8 prio); int (*port_set_apptrust)(struct dsa_switch *ds, int port, const u8 *sel, int nsel); int (*port_get_apptrust)(struct dsa_switch *ds, int port, u8 *sel, int *nsel); /* * Suspend and resume */ int (*suspend)(struct dsa_switch *ds); int (*resume)(struct dsa_switch *ds); /* * Port enable/disable */ int (*port_enable)(struct dsa_switch *ds, int port, struct phy_device *phy); void (*port_disable)(struct dsa_switch *ds, int port); /* * Notification for MAC address changes on user ports. Drivers can * currently only veto operations. They should not use the method to * program the hardware, since the operation is not rolled back in case * of other errors. */ int (*port_set_mac_address)(struct dsa_switch *ds, int port, const unsigned char *addr); /* * Compatibility between device trees defining multiple CPU ports and * drivers which are not OK to use by default the numerically smallest * CPU port of a switch for its local ports. This can return NULL, * meaning "don't know/don't care". */ struct dsa_port *(*preferred_default_local_cpu_port)(struct dsa_switch *ds); /* * Port's MAC EEE settings */ bool (*support_eee)(struct dsa_switch *ds, int port); int (*set_mac_eee)(struct dsa_switch *ds, int port, struct ethtool_keee *e); /* EEPROM access */ int (*get_eeprom_len)(struct dsa_switch *ds); int (*get_eeprom)(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data); int (*set_eeprom)(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data); /* * Register access. */ int (*get_regs_len)(struct dsa_switch *ds, int port); void (*get_regs)(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *p); /* * Upper device tracking. */ int (*port_prechangeupper)(struct dsa_switch *ds, int port, struct netdev_notifier_changeupper_info *info); /* * Bridge integration */ int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct dsa_bridge bridge, bool *tx_fwd_offload, struct netlink_ext_ack *extack); void (*port_bridge_leave)(struct dsa_switch *ds, int port, struct dsa_bridge bridge); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); int (*port_mst_state_set)(struct dsa_switch *ds, int port, const struct switchdev_mst_state *state); void (*port_fast_age)(struct dsa_switch *ds, int port); int (*port_vlan_fast_age)(struct dsa_switch *ds, int port, u16 vid); int (*port_pre_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); int (*port_bridge_flags)(struct dsa_switch *ds, int port, struct switchdev_brport_flags flags, struct netlink_ext_ack *extack); void (*port_set_host_flood)(struct dsa_switch *ds, int port, bool uc, bool mc); /* * VLAN support */ int (*port_vlan_filtering)(struct dsa_switch *ds, int port, bool vlan_filtering, struct netlink_ext_ack *extack); int (*port_vlan_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack); int (*port_vlan_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan); int (*vlan_msti_set)(struct dsa_switch *ds, struct dsa_bridge bridge, const struct switchdev_vlan_msti *msti); /* * Forwarding database */ int (*port_fdb_add)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); int (*port_fdb_del)(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); int (*port_fdb_dump)(struct dsa_switch *ds, int port, dsa_fdb_dump_cb_t *cb, void *data); int (*lag_fdb_add)(struct dsa_switch *ds, struct dsa_lag lag, const unsigned char *addr, u16 vid, struct dsa_db db); int (*lag_fdb_del)(struct dsa_switch *ds, struct dsa_lag lag, const unsigned char *addr, u16 vid, struct dsa_db db); /* * Multicast database */ int (*port_mdb_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); int (*port_mdb_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); /* * RXNFC */ int (*get_rxnfc)(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc, u32 *rule_locs); int (*set_rxnfc)(struct dsa_switch *ds, int port, struct ethtool_rxnfc *nfc); /* * TC integration */ int (*cls_flower_add)(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); int (*cls_flower_del)(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); int (*cls_flower_stats)(struct dsa_switch *ds, int port, struct flow_cls_offload *cls, bool ingress); int (*port_mirror_add)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror, bool ingress, struct netlink_ext_ack *extack); void (*port_mirror_del)(struct dsa_switch *ds, int port, struct dsa_mall_mirror_tc_entry *mirror); int (*port_policer_add)(struct dsa_switch *ds, int port, struct dsa_mall_policer_tc_entry *policer); void (*port_policer_del)(struct dsa_switch *ds, int port); int (*port_setup_tc)(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data); /* * Cross-chip operations */ int (*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge, struct netlink_ext_ack *extack); void (*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index, int sw_index, int port, struct dsa_bridge bridge); int (*crosschip_lag_change)(struct dsa_switch *ds, int sw_index, int port); int (*crosschip_lag_join)(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag, struct netdev_lag_upper_info *info, struct netlink_ext_ack *extack); int (*crosschip_lag_leave)(struct dsa_switch *ds, int sw_index, int port, struct dsa_lag lag); /* * PTP functionality */ int (*port_hwtstamp_get)(struct dsa_switch *ds, int port, struct ifreq *ifr); int (*port_hwtstamp_set)(struct dsa_switch *ds, int port, struct ifreq *ifr); void (*port_txtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb); bool (*port_rxtstamp)(struct dsa_switch *ds, int port, struct sk_buff *skb, unsigned int type); /* Devlink parameters, etc */ int (*devlink_param_get)(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int (*devlink_param_set)(struct dsa_switch *ds, u32 id, struct devlink_param_gset_ctx *ctx); int (*devlink_info_get)(struct dsa_switch *ds, struct devlink_info_req *req, struct netlink_ext_ack *extack); int (*devlink_sb_pool_get)(struct dsa_switch *ds, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); int (*devlink_sb_pool_set)(struct dsa_switch *ds, unsigned int sb_index, u16 pool_index, u32 size, enum devlink_sb_threshold_type threshold_type, struct netlink_ext_ack *extack); int (*devlink_sb_port_pool_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 pool_index, u32 *p_threshold); int (*devlink_sb_port_pool_set)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 pool_index, u32 threshold, struct netlink_ext_ack *extack); int (*devlink_sb_tc_pool_bind_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u16 *p_pool_index, u32 *p_threshold); int (*devlink_sb_tc_pool_bind_set)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u16 pool_index, u32 threshold, struct netlink_ext_ack *extack); int (*devlink_sb_occ_snapshot)(struct dsa_switch *ds, unsigned int sb_index); int (*devlink_sb_occ_max_clear)(struct dsa_switch *ds, unsigned int sb_index); int (*devlink_sb_occ_port_pool_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 pool_index, u32 *p_cur, u32 *p_max); int (*devlink_sb_occ_tc_port_bind_get)(struct dsa_switch *ds, int port, unsigned int sb_index, u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); /* * MTU change functionality. Switches can also adjust their MRU through * this method. By MTU, one understands the SDU (L2 payload) length. * If the switch needs to account for the DSA tag on the CPU port, this * method needs to do so privately. */ int (*port_change_mtu)(struct dsa_switch *ds, int port, int new_mtu); int (*port_max_mtu)(struct dsa_switch *ds, int port); /* * LAG integration */ int (*port_lag_change)(struct dsa_switch *ds, int port); int (*port_lag_join)(struct dsa_switch *ds, int port, struct dsa_lag lag, struct netdev_lag_upper_info *info, struct netlink_ext_ack *extack); int (*port_lag_leave)(struct dsa_switch *ds, int port, struct dsa_lag lag); /* * HSR integration */ int (*port_hsr_join)(struct dsa_switch *ds, int port, struct net_device *hsr, struct netlink_ext_ack *extack); int (*port_hsr_leave)(struct dsa_switch *ds, int port, struct net_device *hsr); /* * MRP integration */ int (*port_mrp_add)(struct dsa_switch *ds, int port, const struct switchdev_obj_mrp *mrp); int (*port_mrp_del)(struct dsa_switch *ds, int port, const struct switchdev_obj_mrp *mrp); int (*port_mrp_add_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); int (*port_mrp_del_ring_role)(struct dsa_switch *ds, int port, const struct switchdev_obj_ring_role_mrp *mrp); /* * tag_8021q operations */ int (*tag_8021q_vlan_add)(struct dsa_switch *ds, int port, u16 vid, u16 flags); int (*tag_8021q_vlan_del)(struct dsa_switch *ds, int port, u16 vid); /* * DSA conduit tracking operations */ void (*conduit_state_change)(struct dsa_switch *ds, const struct net_device *conduit, bool operational); }; #define DSA_DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes) \ DEVLINK_PARAM_DRIVER(_id, _name, _type, _cmodes, \ dsa_devlink_param_get, dsa_devlink_param_set, NULL) int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx); int dsa_devlink_param_set(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx, struct netlink_ext_ack *extack); int dsa_devlink_params_register(struct dsa_switch *ds, const struct devlink_param *params, size_t params_count); void dsa_devlink_params_unregister(struct dsa_switch *ds, const struct devlink_param *params, size_t params_count); int dsa_devlink_resource_register(struct dsa_switch *ds, const char *resource_name, u64 resource_size, u64 resource_id, u64 parent_resource_id, const struct devlink_resource_size_params *size_params); void dsa_devlink_resources_unregister(struct dsa_switch *ds); void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds, u64 resource_id, devlink_resource_occ_get_t *occ_get, void *occ_get_priv); void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds, u64 resource_id); struct devlink_region * dsa_devlink_region_create(struct dsa_switch *ds, const struct devlink_region_ops *ops, u32 region_max_snapshots, u64 region_size); struct devlink_region * dsa_devlink_port_region_create(struct dsa_switch *ds, int port, const struct devlink_port_region_ops *ops, u32 region_max_snapshots, u64 region_size); void dsa_devlink_region_destroy(struct devlink_region *region); struct dsa_port *dsa_port_from_netdev(struct net_device *netdev); struct dsa_devlink_priv { struct dsa_switch *ds; }; static inline struct dsa_switch *dsa_devlink_to_ds(struct devlink *dl) { struct dsa_devlink_priv *dl_priv = devlink_priv(dl); return dl_priv->ds; } static inline struct dsa_switch *dsa_devlink_port_to_ds(struct devlink_port *port) { struct devlink *dl = port->devlink; struct dsa_devlink_priv *dl_priv = devlink_priv(dl); return dl_priv->ds; } static inline int dsa_devlink_port_to_port(struct devlink_port *port) { return port->index; } struct dsa_switch_driver { struct list_head list; const struct dsa_switch_ops *ops; }; bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); bool dsa_mdb_present_in_other_db(struct dsa_switch *ds, int port, const struct switchdev_obj_port_mdb *mdb, struct dsa_db db); /* Keep inline for faster access in hot path */ static inline bool netdev_uses_dsa(const struct net_device *dev) { #if IS_ENABLED(CONFIG_NET_DSA) return dev->dsa_ptr && dev->dsa_ptr->rcv; #endif return false; } /* All DSA tags that push the EtherType to the right (basically all except tail * tags, which don't break dissection) can be treated the same from the * perspective of the flow dissector. * * We need to return: * - offset: the (B - A) difference between: * A. the position of the real EtherType and * B. the current skb->data (aka ETH_HLEN bytes into the frame, aka 2 bytes * after the normal EtherType was supposed to be) * The offset in bytes is exactly equal to the tagger overhead (and half of * that, in __be16 shorts). * * - proto: the value of the real EtherType. */ static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb, __be16 *proto, int *offset) { #if IS_ENABLED(CONFIG_NET_DSA) const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops; int tag_len = ops->needed_headroom; *offset = tag_len; *proto = ((__be16 *)skb->data)[(tag_len / 2) - 1]; #endif } void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds); void dsa_switch_shutdown(struct dsa_switch *ds); struct dsa_switch *dsa_switch_find(int tree_index, int sw_index); void dsa_flush_workqueue(void); #ifdef CONFIG_PM_SLEEP int dsa_switch_suspend(struct dsa_switch *ds); int dsa_switch_resume(struct dsa_switch *ds); #else static inline int dsa_switch_suspend(struct dsa_switch *ds) { return 0; } static inline int dsa_switch_resume(struct dsa_switch *ds) { return 0; } #endif /* CONFIG_PM_SLEEP */ #if IS_ENABLED(CONFIG_NET_DSA) bool dsa_user_dev_check(const struct net_device *dev); #else static inline bool dsa_user_dev_check(const struct net_device *dev) { return false; } #endif netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); bool dsa_supports_eee(struct dsa_switch *ds, int port); #endif
6 5 3 1 1 1 1 1 5 6 6 6 6 6 6 5 6 6 6 6 6 6 6 6 6 5 6 6 6 6 5 5 5 5 5 1 7 7 1 6 6 1 1 1 2 1 1 1 1 1 1 6 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <linux/blkdev.h> #include <linux/module.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/time.h> #include <linux/init.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/backing-dev.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/statfs.h> #include <linux/compat.h> #include <linux/parser.h> #include <linux/ctype.h> #include <linux/namei.h> #include <linux/miscdevice.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/crc32c.h> #include <linux/btrfs.h> #include <linux/security.h> #include <linux/fs_parser.h> #include "messages.h" #include "delayed-inode.h" #include "ctree.h" #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" #include "direct-io.h" #include "props.h" #include "xattr.h" #include "bio.h" #include "export.h" #include "compression.h" #include "dev-replace.h" #include "free-space-cache.h" #include "backref.h" #include "space-info.h" #include "sysfs.h" #include "zoned.h" #include "tests/btrfs-tests.h" #include "block-group.h" #include "discard.h" #include "qgroup.h" #include "raid56.h" #include "fs.h" #include "accessors.h" #include "defrag.h" #include "dir-item.h" #include "ioctl.h" #include "scrub.h" #include "verity.h" #include "super.h" #include "extent-tree.h" #define CREATE_TRACE_POINTS #include <trace/events/btrfs.h> static const struct super_operations btrfs_super_ops; static struct file_system_type btrfs_fs_type; static void btrfs_put_super(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); btrfs_info(fs_info, "last unmount of filesystem %pU", fs_info->fs_devices->fsid); close_ctree(fs_info); } /* Store the mount options related information. */ struct btrfs_fs_context { char *subvol_name; u64 subvol_objectid; u64 max_inline; u32 commit_interval; u32 metadata_ratio; u32 thread_pool_size; unsigned long long mount_opt; unsigned long compress_type:4; int compress_level; refcount_t refs; }; enum { Opt_acl, Opt_clear_cache, Opt_commit_interval, Opt_compress, Opt_compress_force, Opt_compress_force_type, Opt_compress_type, Opt_degraded, Opt_device, Opt_fatal_errors, Opt_flushoncommit, Opt_max_inline, Opt_barrier, Opt_datacow, Opt_datasum, Opt_defrag, Opt_discard, Opt_discard_mode, Opt_ratio, Opt_rescan_uuid_tree, Opt_skip_balance, Opt_space_cache, Opt_space_cache_version, Opt_ssd, Opt_ssd_spread, Opt_subvol, Opt_subvol_empty, Opt_subvolid, Opt_thread_pool, Opt_treelog, Opt_user_subvol_rm_allowed, Opt_norecovery, /* Rescue options */ Opt_rescue, Opt_usebackuproot, Opt_nologreplay, /* Debugging options */ Opt_enospc_debug, #ifdef CONFIG_BTRFS_DEBUG Opt_fragment, Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY Opt_ref_verify, #endif Opt_err, }; enum { Opt_fatal_errors_panic, Opt_fatal_errors_bug, }; static const struct constant_table btrfs_parameter_fatal_errors[] = { { "panic", Opt_fatal_errors_panic }, { "bug", Opt_fatal_errors_bug }, {} }; enum { Opt_discard_sync, Opt_discard_async, }; static const struct constant_table btrfs_parameter_discard[] = { { "sync", Opt_discard_sync }, { "async", Opt_discard_async }, {} }; enum { Opt_space_cache_v1, Opt_space_cache_v2, }; static const struct constant_table btrfs_parameter_space_cache[] = { { "v1", Opt_space_cache_v1 }, { "v2", Opt_space_cache_v2 }, {} }; enum { Opt_rescue_usebackuproot, Opt_rescue_nologreplay, Opt_rescue_ignorebadroots, Opt_rescue_ignoredatacsums, Opt_rescue_ignoremetacsums, Opt_rescue_ignoresuperflags, Opt_rescue_parameter_all, }; static const struct constant_table btrfs_parameter_rescue[] = { { "usebackuproot", Opt_rescue_usebackuproot }, { "nologreplay", Opt_rescue_nologreplay }, { "ignorebadroots", Opt_rescue_ignorebadroots }, { "ibadroots", Opt_rescue_ignorebadroots }, { "ignoredatacsums", Opt_rescue_ignoredatacsums }, { "ignoremetacsums", Opt_rescue_ignoremetacsums}, { "ignoresuperflags", Opt_rescue_ignoresuperflags}, { "idatacsums", Opt_rescue_ignoredatacsums }, { "imetacsums", Opt_rescue_ignoremetacsums}, { "isuperflags", Opt_rescue_ignoresuperflags}, { "all", Opt_rescue_parameter_all }, {} }; #ifdef CONFIG_BTRFS_DEBUG enum { Opt_fragment_parameter_data, Opt_fragment_parameter_metadata, Opt_fragment_parameter_all, }; static const struct constant_table btrfs_parameter_fragment[] = { { "data", Opt_fragment_parameter_data }, { "metadata", Opt_fragment_parameter_metadata }, { "all", Opt_fragment_parameter_all }, {} }; #endif static const struct fs_parameter_spec btrfs_fs_parameters[] = { fsparam_flag_no("acl", Opt_acl), fsparam_flag_no("autodefrag", Opt_defrag), fsparam_flag_no("barrier", Opt_barrier), fsparam_flag("clear_cache", Opt_clear_cache), fsparam_u32("commit", Opt_commit_interval), fsparam_flag("compress", Opt_compress), fsparam_string("compress", Opt_compress_type), fsparam_flag("compress-force", Opt_compress_force), fsparam_string("compress-force", Opt_compress_force_type), fsparam_flag_no("datacow", Opt_datacow), fsparam_flag_no("datasum", Opt_datasum), fsparam_flag("degraded", Opt_degraded), fsparam_string("device", Opt_device), fsparam_flag_no("discard", Opt_discard), fsparam_enum("discard", Opt_discard_mode, btrfs_parameter_discard), fsparam_enum("fatal_errors", Opt_fatal_errors, btrfs_parameter_fatal_errors), fsparam_flag_no("flushoncommit", Opt_flushoncommit), fsparam_string("max_inline", Opt_max_inline), fsparam_u32("metadata_ratio", Opt_ratio), fsparam_flag("rescan_uuid_tree", Opt_rescan_uuid_tree), fsparam_flag("skip_balance", Opt_skip_balance), fsparam_flag_no("space_cache", Opt_space_cache), fsparam_enum("space_cache", Opt_space_cache_version, btrfs_parameter_space_cache), fsparam_flag_no("ssd", Opt_ssd), fsparam_flag_no("ssd_spread", Opt_ssd_spread), fsparam_string("subvol", Opt_subvol), fsparam_flag("subvol=", Opt_subvol_empty), fsparam_u64("subvolid", Opt_subvolid), fsparam_u32("thread_pool", Opt_thread_pool), fsparam_flag_no("treelog", Opt_treelog), fsparam_flag("user_subvol_rm_allowed", Opt_user_subvol_rm_allowed), /* Rescue options. */ fsparam_enum("rescue", Opt_rescue, btrfs_parameter_rescue), /* Deprecated, with alias rescue=nologreplay */ __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL), /* Deprecated, with alias rescue=usebackuproot */ __fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL), /* For compatibility only, alias for "rescue=nologreplay". */ fsparam_flag("norecovery", Opt_norecovery), /* Debugging options. */ fsparam_flag_no("enospc_debug", Opt_enospc_debug), #ifdef CONFIG_BTRFS_DEBUG fsparam_enum("fragment", Opt_fragment, btrfs_parameter_fragment), #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY fsparam_flag("ref_verify", Opt_ref_verify), #endif {} }; /* No support for restricting writes to btrfs devices yet... */ static inline blk_mode_t btrfs_open_mode(struct fs_context *fc) { return sb_open_mode(fc->sb_flags) & ~BLK_OPEN_RESTRICT_WRITES; } static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct btrfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; int opt; opt = fs_parse(fc, btrfs_fs_parameters, param, &result); if (opt < 0) return opt; switch (opt) { case Opt_degraded: btrfs_set_opt(ctx->mount_opt, DEGRADED); break; case Opt_subvol_empty: /* * This exists because we used to allow it on accident, so we're * keeping it to maintain ABI. See 37becec95ac3 ("Btrfs: allow * empty subvol= again"). */ break; case Opt_subvol: kfree(ctx->subvol_name); ctx->subvol_name = kstrdup(param->string, GFP_KERNEL); if (!ctx->subvol_name) return -ENOMEM; break; case Opt_subvolid: ctx->subvol_objectid = result.uint_64; /* subvolid=0 means give me the original fs_tree. */ if (!ctx->subvol_objectid) ctx->subvol_objectid = BTRFS_FS_TREE_OBJECTID; break; case Opt_device: { struct btrfs_device *device; blk_mode_t mode = btrfs_open_mode(fc); mutex_lock(&uuid_mutex); device = btrfs_scan_one_device(param->string, mode, false); mutex_unlock(&uuid_mutex); if (IS_ERR(device)) return PTR_ERR(device); break; } case Opt_datasum: if (result.negated) { btrfs_set_opt(ctx->mount_opt, NODATASUM); } else { btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } break; case Opt_datacow: if (result.negated) { btrfs_clear_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); btrfs_set_opt(ctx->mount_opt, NODATACOW); btrfs_set_opt(ctx->mount_opt, NODATASUM); } else { btrfs_clear_opt(ctx->mount_opt, NODATACOW); } break; case Opt_compress_force: case Opt_compress_force_type: btrfs_set_opt(ctx->mount_opt, FORCE_COMPRESS); fallthrough; case Opt_compress: case Opt_compress_type: /* * Provide the same semantics as older kernels that don't use fs * context, specifying the "compress" option clears * "force-compress" without the need to pass * "compress-force=[no|none]" before specifying "compress". */ if (opt != Opt_compress_force && opt != Opt_compress_force_type) btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); if (opt == Opt_compress || opt == Opt_compress_force) { ctx->compress_type = BTRFS_COMPRESS_ZLIB; ctx->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "zlib", 4) == 0) { ctx->compress_type = BTRFS_COMPRESS_ZLIB; ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZLIB, param->string + 4); btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "lzo", 3) == 0) { ctx->compress_type = BTRFS_COMPRESS_LZO; ctx->compress_level = 0; btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "zstd", 4) == 0) { ctx->compress_type = BTRFS_COMPRESS_ZSTD; ctx->compress_level = btrfs_compress_str2level(BTRFS_COMPRESS_ZSTD, param->string + 4); btrfs_set_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, NODATACOW); btrfs_clear_opt(ctx->mount_opt, NODATASUM); } else if (strncmp(param->string, "no", 2) == 0) { ctx->compress_level = 0; ctx->compress_type = 0; btrfs_clear_opt(ctx->mount_opt, COMPRESS); btrfs_clear_opt(ctx->mount_opt, FORCE_COMPRESS); } else { btrfs_err(NULL, "unrecognized compression value %s", param->string); return -EINVAL; } break; case Opt_ssd: if (result.negated) { btrfs_set_opt(ctx->mount_opt, NOSSD); btrfs_clear_opt(ctx->mount_opt, SSD); btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD); } else { btrfs_set_opt(ctx->mount_opt, SSD); btrfs_clear_opt(ctx->mount_opt, NOSSD); } break; case Opt_ssd_spread: if (result.negated) { btrfs_clear_opt(ctx->mount_opt, SSD_SPREAD); } else { btrfs_set_opt(ctx->mount_opt, SSD); btrfs_set_opt(ctx->mount_opt, SSD_SPREAD); btrfs_clear_opt(ctx->mount_opt, NOSSD); } break; case Opt_barrier: if (result.negated) btrfs_set_opt(ctx->mount_opt, NOBARRIER); else btrfs_clear_opt(ctx->mount_opt, NOBARRIER); break; case Opt_thread_pool: if (result.uint_32 == 0) { btrfs_err(NULL, "invalid value 0 for thread_pool"); return -EINVAL; } ctx->thread_pool_size = result.uint_32; break; case Opt_max_inline: ctx->max_inline = memparse(param->string, NULL); break; case Opt_acl: if (result.negated) { fc->sb_flags &= ~SB_POSIXACL; } else { #ifdef CONFIG_BTRFS_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #else btrfs_err(NULL, "support for ACL not compiled in"); return -EINVAL; #endif } /* * VFS limits the ability to toggle ACL on and off via remount, * despite every file system allowing this. This seems to be * an oversight since we all do, but it'll fail if we're * remounting. So don't set the mask here, we'll check it in * btrfs_reconfigure and do the toggling ourselves. */ if (fc->purpose != FS_CONTEXT_FOR_RECONFIGURE) fc->sb_flags_mask |= SB_POSIXACL; break; case Opt_treelog: if (result.negated) btrfs_set_opt(ctx->mount_opt, NOTREELOG); else btrfs_clear_opt(ctx->mount_opt, NOTREELOG); break; case Opt_nologreplay: btrfs_warn(NULL, "'nologreplay' is deprecated, use 'rescue=nologreplay' instead"); btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; case Opt_norecovery: btrfs_info(NULL, "'norecovery' is for compatibility only, recommended to use 'rescue=nologreplay'"); btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; case Opt_flushoncommit: if (result.negated) btrfs_clear_opt(ctx->mount_opt, FLUSHONCOMMIT); else btrfs_set_opt(ctx->mount_opt, FLUSHONCOMMIT); break; case Opt_ratio: ctx->metadata_ratio = result.uint_32; break; case Opt_discard: if (result.negated) { btrfs_clear_opt(ctx->mount_opt, DISCARD_SYNC); btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC); btrfs_set_opt(ctx->mount_opt, NODISCARD); } else { btrfs_set_opt(ctx->mount_opt, DISCARD_SYNC); btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC); } break; case Opt_discard_mode: switch (result.uint_32) { case Opt_discard_sync: btrfs_clear_opt(ctx->mount_opt, DISCARD_ASYNC); btrfs_set_opt(ctx->mount_opt, DISCARD_SYNC); break; case Opt_discard_async: btrfs_clear_opt(ctx->mount_opt, DISCARD_SYNC); btrfs_set_opt(ctx->mount_opt, DISCARD_ASYNC); break; default: btrfs_err(NULL, "unrecognized discard mode value %s", param->key); return -EINVAL; } btrfs_clear_opt(ctx->mount_opt, NODISCARD); break; case Opt_space_cache: if (result.negated) { btrfs_set_opt(ctx->mount_opt, NOSPACECACHE); btrfs_clear_opt(ctx->mount_opt, SPACE_CACHE); btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE); } else { btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE); btrfs_set_opt(ctx->mount_opt, SPACE_CACHE); } break; case Opt_space_cache_version: switch (result.uint_32) { case Opt_space_cache_v1: btrfs_set_opt(ctx->mount_opt, SPACE_CACHE); btrfs_clear_opt(ctx->mount_opt, FREE_SPACE_TREE); break; case Opt_space_cache_v2: btrfs_clear_opt(ctx->mount_opt, SPACE_CACHE); btrfs_set_opt(ctx->mount_opt, FREE_SPACE_TREE); break; default: btrfs_err(NULL, "unrecognized space_cache value %s", param->key); return -EINVAL; } break; case Opt_rescan_uuid_tree: btrfs_set_opt(ctx->mount_opt, RESCAN_UUID_TREE); break; case Opt_clear_cache: btrfs_set_opt(ctx->mount_opt, CLEAR_CACHE); break; case Opt_user_subvol_rm_allowed: btrfs_set_opt(ctx->mount_opt, USER_SUBVOL_RM_ALLOWED); break; case Opt_enospc_debug: if (result.negated) btrfs_clear_opt(ctx->mount_opt, ENOSPC_DEBUG); else btrfs_set_opt(ctx->mount_opt, ENOSPC_DEBUG); break; case Opt_defrag: if (result.negated) btrfs_clear_opt(ctx->mount_opt, AUTO_DEFRAG); else btrfs_set_opt(ctx->mount_opt, AUTO_DEFRAG); break; case Opt_usebackuproot: btrfs_warn(NULL, "'usebackuproot' is deprecated, use 'rescue=usebackuproot' instead"); btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT); /* If we're loading the backup roots we can't trust the space cache. */ btrfs_set_opt(ctx->mount_opt, CLEAR_CACHE); break; case Opt_skip_balance: btrfs_set_opt(ctx->mount_opt, SKIP_BALANCE); break; case Opt_fatal_errors: switch (result.uint_32) { case Opt_fatal_errors_panic: btrfs_set_opt(ctx->mount_opt, PANIC_ON_FATAL_ERROR); break; case Opt_fatal_errors_bug: btrfs_clear_opt(ctx->mount_opt, PANIC_ON_FATAL_ERROR); break; default: btrfs_err(NULL, "unrecognized fatal_errors value %s", param->key); return -EINVAL; } break; case Opt_commit_interval: ctx->commit_interval = result.uint_32; if (ctx->commit_interval == 0) ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; break; case Opt_rescue: switch (result.uint_32) { case Opt_rescue_usebackuproot: btrfs_set_opt(ctx->mount_opt, USEBACKUPROOT); break; case Opt_rescue_nologreplay: btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; case Opt_rescue_ignorebadroots: btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS); break; case Opt_rescue_ignoredatacsums: btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); break; case Opt_rescue_ignoremetacsums: btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS); break; case Opt_rescue_ignoresuperflags: btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS); break; case Opt_rescue_parameter_all: btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS); btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS); btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS); btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS); btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; default: btrfs_info(NULL, "unrecognized rescue option '%s'", param->key); return -EINVAL; } break; #ifdef CONFIG_BTRFS_DEBUG case Opt_fragment: switch (result.uint_32) { case Opt_fragment_parameter_all: btrfs_set_opt(ctx->mount_opt, FRAGMENT_DATA); btrfs_set_opt(ctx->mount_opt, FRAGMENT_METADATA); break; case Opt_fragment_parameter_metadata: btrfs_set_opt(ctx->mount_opt, FRAGMENT_METADATA); break; case Opt_fragment_parameter_data: btrfs_set_opt(ctx->mount_opt, FRAGMENT_DATA); break; default: btrfs_info(NULL, "unrecognized fragment option '%s'", param->key); return -EINVAL; } break; #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY case Opt_ref_verify: btrfs_set_opt(ctx->mount_opt, REF_VERIFY); break; #endif default: btrfs_err(NULL, "unrecognized mount option '%s'", param->key); return -EINVAL; } return 0; } /* * Some options only have meaning at mount time and shouldn't persist across * remounts, or be displayed. Clear these at the end of mount and remount code * paths. */ static void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info) { btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT); btrfs_clear_opt(fs_info->mount_opt, CLEAR_CACHE); btrfs_clear_opt(fs_info->mount_opt, NOSPACECACHE); } static bool check_ro_option(const struct btrfs_fs_info *fs_info, unsigned long long mount_opt, unsigned long long opt, const char *opt_name) { if (mount_opt & opt) { btrfs_err(fs_info, "%s must be used with ro mount option", opt_name); return true; } return false; } bool btrfs_check_options(const struct btrfs_fs_info *info, unsigned long long *mount_opt, unsigned long flags) { bool ret = true; if (!(flags & SB_RDONLY) && (check_ro_option(info, *mount_opt, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") || check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums") || check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREMETACSUMS, "ignoremetacsums") || check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNORESUPERFLAGS, "ignoresuperflags"))) ret = false; if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && !btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE) && !btrfs_raw_test_opt(*mount_opt, CLEAR_CACHE)) { btrfs_err(info, "cannot disable free-space-tree"); ret = false; } if (btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE) && !btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) { btrfs_err(info, "cannot disable free-space-tree with block-group-tree feature"); ret = false; } if (btrfs_check_mountopts_zoned(info, mount_opt)) ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); btrfs_warn(info, "space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } return ret; } /* * This is subtle, we only call this during open_ctree(). We need to pre-load * the mount options with the on-disk settings. Before the new mount API took * effect we would do this on mount and remount. With the new mount API we'll * only do this on the initial mount. * * This isn't a change in behavior, because we're using the current state of the * file system to set the current mount options. If you mounted with special * options to disable these features and then remounted we wouldn't revert the * settings, because mounting without these features cleared the on-disk * settings, so this being called on re-mount is not needed. */ void btrfs_set_free_space_cache_settings(struct btrfs_fs_info *fs_info) { if (fs_info->sectorsize < PAGE_SIZE) { btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); if (!btrfs_test_opt(fs_info, FREE_SPACE_TREE)) { btrfs_info(fs_info, "forcing free space tree for sector size %u with page size %lu", fs_info->sectorsize, PAGE_SIZE); btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); } } /* * At this point our mount options are populated, so we only mess with * these settings if we don't have any settings already. */ if (btrfs_test_opt(fs_info, FREE_SPACE_TREE)) return; if (btrfs_is_zoned(fs_info) && btrfs_free_space_cache_v1_active(fs_info)) { btrfs_info(fs_info, "zoned: clearing existing space cache"); btrfs_set_super_cache_generation(fs_info->super_copy, 0); return; } if (btrfs_test_opt(fs_info, SPACE_CACHE)) return; if (btrfs_test_opt(fs_info, NOSPACECACHE)) return; /* * At this point we don't have explicit options set by the user, set * them ourselves based on the state of the file system. */ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); else if (btrfs_free_space_cache_v1_active(fs_info)) btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE); } static void set_device_specific_options(struct btrfs_fs_info *fs_info) { if (!btrfs_test_opt(fs_info, NOSSD) && !fs_info->fs_devices->rotating) btrfs_set_opt(fs_info->mount_opt, SSD); /* * For devices supporting discard turn on discard=async automatically, * unless it's already set or disabled. This could be turned off by * nodiscard for the same mount. * * The zoned mode piggy backs on the discard functionality for * resetting a zone. There is no reason to delay the zone reset as it is * fast enough. So, do not enable async discard for zoned mode. */ if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) || btrfs_test_opt(fs_info, DISCARD_ASYNC) || btrfs_test_opt(fs_info, NODISCARD)) && fs_info->fs_devices->discardable && !btrfs_is_zoned(fs_info)) btrfs_set_opt(fs_info->mount_opt, DISCARD_ASYNC); } char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, u64 subvol_objectid) { struct btrfs_root *root = fs_info->tree_root; struct btrfs_root *fs_root = NULL; struct btrfs_root_ref *root_ref; struct btrfs_inode_ref *inode_ref; struct btrfs_key key; struct btrfs_path *path = NULL; char *name = NULL, *ptr; u64 dirid; int len; int ret; path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; goto err; } name = kmalloc(PATH_MAX, GFP_KERNEL); if (!name) { ret = -ENOMEM; goto err; } ptr = name + PATH_MAX - 1; ptr[0] = '\0'; /* * Walk up the subvolume trees in the tree of tree roots by root * backrefs until we hit the top-level subvolume. */ while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { key.objectid = subvol_objectid; key.type = BTRFS_ROOT_BACKREF_KEY; key.offset = (u64)-1; ret = btrfs_search_backwards(root, &key, path); if (ret < 0) { goto err; } else if (ret > 0) { ret = -ENOENT; goto err; } subvol_objectid = key.offset; root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_root_ref); len = btrfs_root_ref_name_len(path->nodes[0], root_ref); ptr -= len + 1; if (ptr < name) { ret = -ENAMETOOLONG; goto err; } read_extent_buffer(path->nodes[0], ptr + 1, (unsigned long)(root_ref + 1), len); ptr[0] = '/'; dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); btrfs_release_path(path); fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true); if (IS_ERR(fs_root)) { ret = PTR_ERR(fs_root); fs_root = NULL; goto err; } /* * Walk up the filesystem tree by inode refs until we hit the * root directory. */ while (dirid != BTRFS_FIRST_FREE_OBJECTID) { key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; ret = btrfs_search_backwards(fs_root, &key, path); if (ret < 0) { goto err; } else if (ret > 0) { ret = -ENOENT; goto err; } dirid = key.offset; inode_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(path->nodes[0], inode_ref); ptr -= len + 1; if (ptr < name) { ret = -ENAMETOOLONG; goto err; } read_extent_buffer(path->nodes[0], ptr + 1, (unsigned long)(inode_ref + 1), len); ptr[0] = '/'; btrfs_release_path(path); } btrfs_put_root(fs_root); fs_root = NULL; } btrfs_free_path(path); if (ptr == name + PATH_MAX - 1) { name[0] = '/'; name[1] = '\0'; } else { memmove(name, ptr, name + PATH_MAX - ptr); } return name; err: btrfs_put_root(fs_root); btrfs_free_path(path); kfree(name); return ERR_PTR(ret); } static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) { struct btrfs_root *root = fs_info->tree_root; struct btrfs_dir_item *di; struct btrfs_path *path; struct btrfs_key location; struct fscrypt_str name = FSTR_INIT("default", 7); u64 dir_id; path = btrfs_alloc_path(); if (!path) return -ENOMEM; /* * Find the "default" dir item which points to the root item that we * will mount by default if we haven't been given a specific subvolume * to mount. */ dir_id = btrfs_super_root_dir(fs_info->super_copy); di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); if (IS_ERR(di)) { btrfs_free_path(path); return PTR_ERR(di); } if (!di) { /* * Ok the default dir item isn't there. This is weird since * it's always been there, but don't freak out, just try and * mount the top-level subvolume. */ btrfs_free_path(path); *objectid = BTRFS_FS_TREE_OBJECTID; return 0; } btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); btrfs_free_path(path); *objectid = location.objectid; return 0; } static int btrfs_fill_super(struct super_block *sb, struct btrfs_fs_devices *fs_devices) { struct btrfs_inode *inode; struct btrfs_fs_info *fs_info = btrfs_sb(sb); int err; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_magic = BTRFS_SUPER_MAGIC; sb->s_op = &btrfs_super_ops; sb->s_d_op = &btrfs_dentry_operations; sb->s_export_op = &btrfs_export_ops; #ifdef CONFIG_FS_VERITY sb->s_vop = &btrfs_verityops; #endif sb->s_xattr = btrfs_xattr_handlers; sb->s_time_gran = 1; sb->s_iflags |= SB_I_CGROUPWB | SB_I_ALLOW_HSM; err = super_setup_bdi(sb); if (err) { btrfs_err(fs_info, "super_setup_bdi failed"); return err; } err = open_ctree(sb, fs_devices); if (err) { btrfs_err(fs_info, "open_ctree failed: %d", err); return err; } inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { err = PTR_ERR(inode); btrfs_handle_fs_error(fs_info, err, NULL); goto fail_close; } sb->s_root = d_make_root(&inode->vfs_inode); if (!sb->s_root) { err = -ENOMEM; goto fail_close; } sb->s_flags |= SB_ACTIVE; return 0; fail_close: close_ctree(fs_info); return err; } int btrfs_sync_fs(struct super_block *sb, int wait) { struct btrfs_trans_handle *trans; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_root *root = fs_info->tree_root; trace_btrfs_sync_fs(fs_info, wait); if (!wait) { filemap_flush(fs_info->btree_inode->i_mapping); return 0; } btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL); trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ if (PTR_ERR(trans) == -ENOENT) { /* * Exit unless we have some pending changes * that need to go through commit */ if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags)) return 0; /* * A non-blocking test if the fs is frozen. We must not * start a new transaction here otherwise a deadlock * happens. The pending operations are delayed to the * next commit after thawing. */ if (sb_start_write_trylock(sb)) sb_end_write(sb); else return 0; trans = btrfs_start_transaction(root, 0); } if (IS_ERR(trans)) return PTR_ERR(trans); } return btrfs_commit_transaction(trans); } static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed) { seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s); *printed = true; } static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) { struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); const char *compress_type; const char *subvol_name; bool printed = false; if (btrfs_test_opt(info, DEGRADED)) seq_puts(seq, ",degraded"); if (btrfs_test_opt(info, NODATASUM)) seq_puts(seq, ",nodatasum"); if (btrfs_test_opt(info, NODATACOW)) seq_puts(seq, ",nodatacow"); if (btrfs_test_opt(info, NOBARRIER)) seq_puts(seq, ",nobarrier"); if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) seq_printf(seq, ",max_inline=%llu", info->max_inline); if (info->thread_pool_size != min_t(unsigned long, num_online_cpus() + 2, 8)) seq_printf(seq, ",thread_pool=%u", info->thread_pool_size); if (btrfs_test_opt(info, COMPRESS)) { compress_type = btrfs_compress_type2str(info->compress_type); if (btrfs_test_opt(info, FORCE_COMPRESS)) seq_printf(seq, ",compress-force=%s", compress_type); else seq_printf(seq, ",compress=%s", compress_type); if (info->compress_level) seq_printf(seq, ":%d", info->compress_level); } if (btrfs_test_opt(info, NOSSD)) seq_puts(seq, ",nossd"); if (btrfs_test_opt(info, SSD_SPREAD)) seq_puts(seq, ",ssd_spread"); else if (btrfs_test_opt(info, SSD)) seq_puts(seq, ",ssd"); if (btrfs_test_opt(info, NOTREELOG)) seq_puts(seq, ",notreelog"); if (btrfs_test_opt(info, NOLOGREPLAY)) print_rescue_option(seq, "nologreplay", &printed); if (btrfs_test_opt(info, USEBACKUPROOT)) print_rescue_option(seq, "usebackuproot", &printed); if (btrfs_test_opt(info, IGNOREBADROOTS)) print_rescue_option(seq, "ignorebadroots", &printed); if (btrfs_test_opt(info, IGNOREDATACSUMS)) print_rescue_option(seq, "ignoredatacsums", &printed); if (btrfs_test_opt(info, IGNOREMETACSUMS)) print_rescue_option(seq, "ignoremetacsums", &printed); if (btrfs_test_opt(info, IGNORESUPERFLAGS)) print_rescue_option(seq, "ignoresuperflags", &printed); if (btrfs_test_opt(info, FLUSHONCOMMIT)) seq_puts(seq, ",flushoncommit"); if (btrfs_test_opt(info, DISCARD_SYNC)) seq_puts(seq, ",discard"); if (btrfs_test_opt(info, DISCARD_ASYNC)) seq_puts(seq, ",discard=async"); if (!(info->sb->s_flags & SB_POSIXACL)) seq_puts(seq, ",noacl"); if (btrfs_free_space_cache_v1_active(info)) seq_puts(seq, ",space_cache"); else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) seq_puts(seq, ",space_cache=v2"); else seq_puts(seq, ",nospace_cache"); if (btrfs_test_opt(info, RESCAN_UUID_TREE)) seq_puts(seq, ",rescan_uuid_tree"); if (btrfs_test_opt(info, CLEAR_CACHE)) seq_puts(seq, ",clear_cache"); if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED)) seq_puts(seq, ",user_subvol_rm_allowed"); if (btrfs_test_opt(info, ENOSPC_DEBUG)) seq_puts(seq, ",enospc_debug"); if (btrfs_test_opt(info, AUTO_DEFRAG)) seq_puts(seq, ",autodefrag"); if (btrfs_test_opt(info, SKIP_BALANCE)) seq_puts(seq, ",skip_balance"); if (info->metadata_ratio) seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio); if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR)) seq_puts(seq, ",fatal_errors=panic"); if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) seq_printf(seq, ",commit=%u", info->commit_interval); #ifdef CONFIG_BTRFS_DEBUG if (btrfs_test_opt(info, FRAGMENT_DATA)) seq_puts(seq, ",fragment=data"); if (btrfs_test_opt(info, FRAGMENT_METADATA)) seq_puts(seq, ",fragment=metadata"); #endif if (btrfs_test_opt(info, REF_VERIFY)) seq_puts(seq, ",ref_verify"); seq_printf(seq, ",subvolid=%llu", btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); subvol_name = btrfs_get_subvol_name_from_objectid(info, btrfs_root_id(BTRFS_I(d_inode(dentry))->root)); if (!IS_ERR(subvol_name)) { seq_show_option(seq, "subvol", subvol_name); kfree(subvol_name); } return 0; } /* * subvolumes are identified by ino 256 */ static inline int is_subvolume_inode(struct inode *inode) { if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) return 1; return 0; } static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, struct vfsmount *mnt) { struct dentry *root; int ret; if (!subvol_name) { if (!subvol_objectid) { ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), &subvol_objectid); if (ret) { root = ERR_PTR(ret); goto out; } } subvol_name = btrfs_get_subvol_name_from_objectid( btrfs_sb(mnt->mnt_sb), subvol_objectid); if (IS_ERR(subvol_name)) { root = ERR_CAST(subvol_name); subvol_name = NULL; goto out; } } root = mount_subtree(mnt, subvol_name); /* mount_subtree() drops our reference on the vfsmount. */ mnt = NULL; if (!IS_ERR(root)) { struct super_block *s = root->d_sb; struct btrfs_fs_info *fs_info = btrfs_sb(s); struct inode *root_inode = d_inode(root); u64 root_objectid = btrfs_root_id(BTRFS_I(root_inode)->root); ret = 0; if (!is_subvolume_inode(root_inode)) { btrfs_err(fs_info, "'%s' is not a valid subvolume", subvol_name); ret = -EINVAL; } if (subvol_objectid && root_objectid != subvol_objectid) { /* * This will also catch a race condition where a * subvolume which was passed by ID is renamed and * another subvolume is renamed over the old location. */ btrfs_err(fs_info, "subvol '%s' does not match subvolid %llu", subvol_name, subvol_objectid); ret = -EINVAL; } if (ret) { dput(root); root = ERR_PTR(ret); deactivate_locked_super(s); } } out: mntput(mnt); kfree(subvol_name); return root; } static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, u32 new_pool_size, u32 old_pool_size) { if (new_pool_size == old_pool_size) return; fs_info->thread_pool_size = new_pool_size; btrfs_info(fs_info, "resize thread pool %d -> %d", old_pool_size, new_pool_size); btrfs_workqueue_set_max(fs_info->workers, new_pool_size); btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); workqueue_set_max_active(fs_info->endio_workers, new_pool_size); workqueue_set_max_active(fs_info->endio_meta_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); } static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, unsigned long long old_opts, int flags) { if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || (flags & SB_RDONLY))) { /* wait for any defraggers to finish */ wait_event(fs_info->transaction_wait, (atomic_read(&fs_info->defrag_running) == 0)); if (flags & SB_RDONLY) sync_filesystem(fs_info->sb); } } static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, unsigned long long old_opts) { const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); /* * We need to cleanup all defragable inodes if the autodefragment is * close or the filesystem is read only. */ if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) { btrfs_cleanup_defrag_inodes(fs_info); } /* If we toggled discard async */ if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_discard_resume(fs_info); else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && !btrfs_test_opt(fs_info, DISCARD_ASYNC)) btrfs_discard_cleanup(fs_info); /* If we toggled space cache */ if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) btrfs_set_free_space_cache_v1_active(fs_info, cache_opt); } static int btrfs_remount_rw(struct btrfs_fs_info *fs_info) { int ret; if (BTRFS_FS_ERROR(fs_info)) { btrfs_err(fs_info, "remounting read-write after error is not allowed"); return -EINVAL; } if (fs_info->fs_devices->rw_devices == 0) return -EACCES; if (!btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "too many missing devices, writable remount is not allowed"); return -EACCES; } if (btrfs_super_log_root(fs_info->super_copy) != 0) { btrfs_warn(fs_info, "mount required to replay tree-log, cannot remount read-write"); return -EINVAL; } /* * NOTE: when remounting with a change that does writes, don't put it * anywhere above this point, as we are not sure to be safe to write * until we pass the above checks. */ ret = btrfs_start_pre_rw_mount(fs_info); if (ret) return ret; btrfs_clear_sb_rdonly(fs_info->sb); set_bit(BTRFS_FS_OPEN, &fs_info->flags); /* * If we've gone from readonly -> read-write, we need to get our * sync/async discard lists in the right state. */ btrfs_discard_resume(fs_info); return 0; } static int btrfs_remount_ro(struct btrfs_fs_info *fs_info) { /* * This also happens on 'umount -rf' or on shutdown, when the * filesystem is busy. */ cancel_work_sync(&fs_info->async_reclaim_work); cancel_work_sync(&fs_info->async_data_reclaim_work); btrfs_discard_cleanup(fs_info); /* Wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); /* Avoid complains from lockdep et al. */ up(&fs_info->uuid_tree_rescan_sem); btrfs_set_sb_rdonly(fs_info->sb); /* * Setting SB_RDONLY will put the cleaner thread to sleep at the next * loop if it's already active. If it's already asleep, we'll leave * unused block groups on disk until we're mounted read-write again * unless we clean them up here. */ btrfs_delete_unused_bgs(fs_info); /* * The cleaner task could be already running before we set the flag * BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). We must make * sure that after we finish the remount, i.e. after we call * btrfs_commit_super(), the cleaner can no longer start a transaction * - either because it was dropping a dead root, running delayed iputs * or deleting an unused block group (the cleaner picked a block * group from the list of unused block groups before we were able to * in the previous call to btrfs_delete_unused_bgs()). */ wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, TASK_UNINTERRUPTIBLE); /* * We've set the superblock to RO mode, so we might have made the * cleaner task sleep without running all pending delayed iputs. Go * through all the delayed iputs here, so that if an unmount happens * without remounting RW we don't end up at finishing close_ctree() * with a non-empty list of delayed iputs. */ btrfs_run_delayed_iputs(fs_info); btrfs_dev_replace_suspend_for_unmount(fs_info); btrfs_scrub_cancel(fs_info); btrfs_pause_balance(fs_info); /* * Pause the qgroup rescan worker if it is running. We don't want it to * be still running after we are in RO mode, as after that, by the time * we unmount, it might have left a transaction open, so we would leak * the transaction and/or crash. */ btrfs_qgroup_wait_for_completion(fs_info, false); return btrfs_commit_super(fs_info); } static void btrfs_ctx_to_info(struct btrfs_fs_info *fs_info, struct btrfs_fs_context *ctx) { fs_info->max_inline = ctx->max_inline; fs_info->commit_interval = ctx->commit_interval; fs_info->metadata_ratio = ctx->metadata_ratio; fs_info->thread_pool_size = ctx->thread_pool_size; fs_info->mount_opt = ctx->mount_opt; fs_info->compress_type = ctx->compress_type; fs_info->compress_level = ctx->compress_level; } static void btrfs_info_to_ctx(struct btrfs_fs_info *fs_info, struct btrfs_fs_context *ctx) { ctx->max_inline = fs_info->max_inline; ctx->commit_interval = fs_info->commit_interval; ctx->metadata_ratio = fs_info->metadata_ratio; ctx->thread_pool_size = fs_info->thread_pool_size; ctx->mount_opt = fs_info->mount_opt; ctx->compress_type = fs_info->compress_type; ctx->compress_level = fs_info->compress_level; } #define btrfs_info_if_set(fs_info, old_ctx, opt, fmt, args...) \ do { \ if ((!old_ctx || !btrfs_raw_test_opt(old_ctx->mount_opt, opt)) && \ btrfs_raw_test_opt(fs_info->mount_opt, opt)) \ btrfs_info(fs_info, fmt, ##args); \ } while (0) #define btrfs_info_if_unset(fs_info, old_ctx, opt, fmt, args...) \ do { \ if ((old_ctx && btrfs_raw_test_opt(old_ctx->mount_opt, opt)) && \ !btrfs_raw_test_opt(fs_info->mount_opt, opt)) \ btrfs_info(fs_info, fmt, ##args); \ } while (0) static void btrfs_emit_options(struct btrfs_fs_info *info, struct btrfs_fs_context *old) { btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts"); btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations"); btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme"); btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers"); btrfs_info_if_set(info, old, NOTREELOG, "disabling tree log"); btrfs_info_if_set(info, old, NOLOGREPLAY, "disabling log replay at mount time"); btrfs_info_if_set(info, old, FLUSHONCOMMIT, "turning on flush-on-commit"); btrfs_info_if_set(info, old, DISCARD_SYNC, "turning on sync discard"); btrfs_info_if_set(info, old, DISCARD_ASYNC, "turning on async discard"); btrfs_info_if_set(info, old, FREE_SPACE_TREE, "enabling free space tree"); btrfs_info_if_set(info, old, SPACE_CACHE, "enabling disk space caching"); btrfs_info_if_set(info, old, CLEAR_CACHE, "force clearing of disk cache"); btrfs_info_if_set(info, old, AUTO_DEFRAG, "enabling auto defrag"); btrfs_info_if_set(info, old, FRAGMENT_DATA, "fragmenting data"); btrfs_info_if_set(info, old, FRAGMENT_METADATA, "fragmenting metadata"); btrfs_info_if_set(info, old, REF_VERIFY, "doing ref verification"); btrfs_info_if_set(info, old, USEBACKUPROOT, "trying to use backup root at mount time"); btrfs_info_if_set(info, old, IGNOREBADROOTS, "ignoring bad roots"); btrfs_info_if_set(info, old, IGNOREDATACSUMS, "ignoring data csums"); btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums"); btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers"); btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log"); btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching"); btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree"); btrfs_info_if_unset(info, old, AUTO_DEFRAG, "disabling auto defrag"); btrfs_info_if_unset(info, old, COMPRESS, "use no compression"); /* Did the compression settings change? */ if (btrfs_test_opt(info, COMPRESS) && (!old || old->compress_type != info->compress_type || old->compress_level != info->compress_level || (!btrfs_raw_test_opt(old->mount_opt, FORCE_COMPRESS) && btrfs_raw_test_opt(info->mount_opt, FORCE_COMPRESS)))) { const char *compress_type = btrfs_compress_type2str(info->compress_type); btrfs_info(info, "%s %s compression, level %d", btrfs_test_opt(info, FORCE_COMPRESS) ? "force" : "use", compress_type, info->compress_level); } if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) btrfs_info(info, "max_inline set to %llu", info->max_inline); } static int btrfs_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_fs_context *ctx = fc->fs_private; struct btrfs_fs_context old_ctx; int ret = 0; bool mount_reconfigure = (fc->s_fs_info != NULL); btrfs_info_to_ctx(fs_info, &old_ctx); /* * This is our "bind mount" trick, we don't want to allow the user to do * anything other than mount a different ro/rw and a different subvol, * all of the mount options should be maintained. */ if (mount_reconfigure) ctx->mount_opt = old_ctx.mount_opt; sync_filesystem(sb); set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); if (!btrfs_check_options(fs_info, &ctx->mount_opt, fc->sb_flags)) return -EINVAL; ret = btrfs_check_features(fs_info, !(fc->sb_flags & SB_RDONLY)); if (ret < 0) return ret; btrfs_ctx_to_info(fs_info, ctx); btrfs_remount_begin(fs_info, old_ctx.mount_opt, fc->sb_flags); btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_ctx.thread_pool_size); if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && (!sb_rdonly(sb) || (fc->sb_flags & SB_RDONLY))) { btrfs_warn(fs_info, "remount supports changing free space tree only from RO to RW"); /* Make sure free space cache options match the state on disk. */ if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); } if (btrfs_free_space_cache_v1_active(fs_info)) { btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE); btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE); } } ret = 0; if (!sb_rdonly(sb) && (fc->sb_flags & SB_RDONLY)) ret = btrfs_remount_ro(fs_info); else if (sb_rdonly(sb) && !(fc->sb_flags & SB_RDONLY)) ret = btrfs_remount_rw(fs_info); if (ret) goto restore; /* * If we set the mask during the parameter parsing VFS would reject the * remount. Here we can set the mask and the value will be updated * appropriately. */ if ((fc->sb_flags & SB_POSIXACL) != (sb->s_flags & SB_POSIXACL)) fc->sb_flags_mask |= SB_POSIXACL; btrfs_emit_options(fs_info, &old_ctx); wake_up_process(fs_info->transaction_kthread); btrfs_remount_cleanup(fs_info, old_ctx.mount_opt); btrfs_clear_oneshot_options(fs_info); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); return 0; restore: btrfs_ctx_to_info(fs_info, &old_ctx); btrfs_remount_cleanup(fs_info, old_ctx.mount_opt); clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); return ret; } /* Used to sort the devices by max_avail(descending sort) */ static int btrfs_cmp_device_free_bytes(const void *a, const void *b) { const struct btrfs_device_info *dev_info1 = a; const struct btrfs_device_info *dev_info2 = b; if (dev_info1->max_avail > dev_info2->max_avail) return -1; else if (dev_info1->max_avail < dev_info2->max_avail) return 1; return 0; } /* * sort the devices by max_avail, in which max free extent size of each device * is stored.(Descending Sort) */ static inline void btrfs_descending_sort_devices( struct btrfs_device_info *devices, size_t nr_devices) { sort(devices, nr_devices, sizeof(struct btrfs_device_info), btrfs_cmp_device_free_bytes, NULL); } /* * The helper to calc the free space on the devices that can be used to store * file data. */ static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, u64 *free_bytes) { struct btrfs_device_info *devices_info; struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; u64 type; u64 avail_space; u64 min_stripe_size; int num_stripes = 1; int i = 0, nr_devices; const struct btrfs_raid_attr *rattr; /* * We aren't under the device list lock, so this is racy-ish, but good * enough for our purposes. */ nr_devices = fs_info->fs_devices->open_devices; if (!nr_devices) { smp_mb(); nr_devices = fs_info->fs_devices->open_devices; ASSERT(nr_devices); if (!nr_devices) { *free_bytes = 0; return 0; } } devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), GFP_KERNEL); if (!devices_info) return -ENOMEM; /* calc min stripe number for data space allocation */ type = btrfs_data_alloc_profile(fs_info); rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)]; if (type & BTRFS_BLOCK_GROUP_RAID0) num_stripes = nr_devices; else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK) num_stripes = rattr->ncopies; else if (type & BTRFS_BLOCK_GROUP_RAID10) num_stripes = 4; /* Adjust for more than 1 stripe per device */ min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN; rcu_read_lock(); list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state) || !device->bdev || test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) continue; if (i >= nr_devices) break; avail_space = device->total_bytes - device->bytes_used; /* align with stripe_len */ avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN); /* * Ensure we have at least min_stripe_size on top of the * reserved space on the device. */ if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size) continue; avail_space -= BTRFS_DEVICE_RANGE_RESERVED; devices_info[i].dev = device; devices_info[i].max_avail = avail_space; i++; } rcu_read_unlock(); nr_devices = i; btrfs_descending_sort_devices(devices_info, nr_devices); i = nr_devices - 1; avail_space = 0; while (nr_devices >= rattr->devs_min) { num_stripes = min(num_stripes, nr_devices); if (devices_info[i].max_avail >= min_stripe_size) { int j; u64 alloc_size; avail_space += devices_info[i].max_avail * num_stripes; alloc_size = devices_info[i].max_avail; for (j = i + 1 - num_stripes; j <= i; j++) devices_info[j].max_avail -= alloc_size; } i--; nr_devices--; } kfree(devices_info); *free_bytes = avail_space; return 0; } /* * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. * * If there's a redundant raid level at DATA block groups, use the respective * multiplier to scale the sizes. * * Unused device space usage is based on simulating the chunk allocator * algorithm that respects the device sizes and order of allocations. This is * a close approximation of the actual use but there are other factors that may * change the result (like a new metadata chunk). * * If metadata is exhausted, f_bavail will be 0. */ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); struct btrfs_super_block *disk_super = fs_info->super_copy; struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; u64 total_free_meta = 0; u32 bits = fs_info->sectorsize_bits; __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; u64 thresh = 0; int mixed = 0; list_for_each_entry(found, &fs_info->space_info, list) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) { int i; total_free_data += found->disk_total - found->disk_used; total_free_data -= btrfs_account_ro_block_groups_free_space(found); for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { if (!list_empty(&found->block_groups[i])) factor = btrfs_bg_type_to_factor( btrfs_raid_array[i].bg_flag); } } /* * Metadata in mixed block group profiles are accounted in data */ if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) { if (found->flags & BTRFS_BLOCK_GROUP_DATA) mixed = 1; else total_free_meta += found->disk_total - found->disk_used; } total_used += found->disk_used; } buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); buf->f_blocks >>= bits; buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits); /* Account global block reserve as used, it's in logical size already */ spin_lock(&block_rsv->lock); /* Mixed block groups accounting is not byte-accurate, avoid overflow */ if (buf->f_bfree >= block_rsv->size >> bits) buf->f_bfree -= block_rsv->size >> bits; else buf->f_bfree = 0; spin_unlock(&block_rsv->lock); buf->f_bavail = div_u64(total_free_data, factor); ret = btrfs_calc_avail_data_space(fs_info, &total_free_data); if (ret) return ret; buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail = buf->f_bavail >> bits; /* * We calculate the remaining metadata space minus global reserve. If * this is (supposedly) smaller than zero, there's no space. But this * does not hold in practice, the exhausted state happens where's still * some positive delta. So we apply some guesswork and compare the * delta to a 4M threshold. (Practically observed delta was ~2M.) * * We probably cannot calculate the exact threshold value because this * depends on the internal reservations requested by various * operations, so some operations that consume a few metadata will * succeed even if the Avail is zero. But this is better than the other * way around. */ thresh = SZ_4M; /* * We only want to claim there's no available space if we can no longer * allocate chunks for our metadata profile and our global reserve will * not fit in the free metadata space. If we aren't ->full then we * still can allocate chunks and thus are fine using the currently * calculated f_bavail. */ if (!mixed && block_rsv->space_info->full && (total_free_meta < thresh || total_free_meta - thresh < block_rsv->size)) buf->f_bavail = 0; buf->f_type = BTRFS_SUPER_MAGIC; buf->f_bsize = fs_info->sectorsize; buf->f_namelen = BTRFS_NAME_LEN; /* We treat it as constant endianness (it doesn't matter _which_) because we want the fsid to come out the same whether mounted on a big-endian or little-endian host */ buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); /* Mask in the root object ID too, to disambiguate subvols */ buf->f_fsid.val[0] ^= btrfs_root_id(BTRFS_I(d_inode(dentry))->root) >> 32; buf->f_fsid.val[1] ^= btrfs_root_id(BTRFS_I(d_inode(dentry))->root); return 0; } static int btrfs_fc_test_super(struct super_block *sb, struct fs_context *fc) { struct btrfs_fs_info *p = fc->s_fs_info; struct btrfs_fs_info *fs_info = btrfs_sb(sb); return fs_info->fs_devices == p->fs_devices; } static int btrfs_get_tree_super(struct fs_context *fc) { struct btrfs_fs_info *fs_info = fc->s_fs_info; struct btrfs_fs_context *ctx = fc->fs_private; struct btrfs_fs_devices *fs_devices = NULL; struct block_device *bdev; struct btrfs_device *device; struct super_block *sb; blk_mode_t mode = btrfs_open_mode(fc); int ret; btrfs_ctx_to_info(fs_info, ctx); mutex_lock(&uuid_mutex); /* * With 'true' passed to btrfs_scan_one_device() (mount time) we expect * either a valid device or an error. */ device = btrfs_scan_one_device(fc->source, mode, true); ASSERT(device != NULL); if (IS_ERR(device)) { mutex_unlock(&uuid_mutex); return PTR_ERR(device); } fs_devices = device->fs_devices; fs_info->fs_devices = fs_devices; ret = btrfs_open_devices(fs_devices, mode, &btrfs_fs_type); mutex_unlock(&uuid_mutex); if (ret) return ret; if (!(fc->sb_flags & SB_RDONLY) && fs_devices->rw_devices == 0) { ret = -EACCES; goto error; } bdev = fs_devices->latest_dev->bdev; /* * From now on the error handling is not straightforward. * * If successful, this will transfer the fs_info into the super block, * and fc->s_fs_info will be NULL. However if there's an existing * super, we'll still have fc->s_fs_info populated. If we error * completely out it'll be cleaned up when we drop the fs_context, * otherwise it's tied to the lifetime of the super_block. */ sb = sget_fc(fc, btrfs_fc_test_super, set_anon_super_fc); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto error; } set_device_specific_options(fs_info); if (sb->s_root) { btrfs_close_devices(fs_devices); /* * At this stage we may have RO flag mismatch between * fc->sb_flags and sb->s_flags. Caller should detect such * mismatch and reconfigure with sb->s_umount rwsem held if * needed. */ } else { snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev); shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id); btrfs_sb(sb)->bdev_holder = &btrfs_fs_type; ret = btrfs_fill_super(sb, fs_devices); if (ret) { deactivate_locked_super(sb); return ret; } } btrfs_clear_oneshot_options(fs_info); fc->root = dget(sb->s_root); return 0; error: btrfs_close_devices(fs_devices); return ret; } /* * Ever since commit 0723a0473fb4 ("btrfs: allow mounting btrfs subvolumes * with different ro/rw options") the following works: * * (i) mount /dev/sda3 -o subvol=foo,ro /mnt/foo * (ii) mount /dev/sda3 -o subvol=bar,rw /mnt/bar * * which looks nice and innocent but is actually pretty intricate and deserves * a long comment. * * On another filesystem a subvolume mount is close to something like: * * (iii) # create rw superblock + initial mount * mount -t xfs /dev/sdb /opt/ * * # create ro bind mount * mount --bind -o ro /opt/foo /mnt/foo * * # unmount initial mount * umount /opt * * Of course, there's some special subvolume sauce and there's the fact that the * sb->s_root dentry is really swapped after mount_subtree(). But conceptually * it's very close and will help us understand the issue. * * The old mount API didn't cleanly distinguish between a mount being made ro * and a superblock being made ro. The only way to change the ro state of * either object was by passing ms_rdonly. If a new mount was created via * mount(2) such as: * * mount("/dev/sdb", "/mnt", "xfs", ms_rdonly, null); * * the MS_RDONLY flag being specified had two effects: * * (1) MNT_READONLY was raised -> the resulting mount got * @mnt->mnt_flags |= MNT_READONLY raised. * * (2) MS_RDONLY was passed to the filesystem's mount method and the filesystems * made the superblock ro. Note, how SB_RDONLY has the same value as * ms_rdonly and is raised whenever MS_RDONLY is passed through mount(2). * * Creating a subtree mount via (iii) ends up leaving a rw superblock with a * subtree mounted ro. * * But consider the effect on the old mount API on btrfs subvolume mounting * which combines the distinct step in (iii) into a single step. * * By issuing (i) both the mount and the superblock are turned ro. Now when (ii) * is issued the superblock is ro and thus even if the mount created for (ii) is * rw it wouldn't help. Hence, btrfs needed to transition the superblock from ro * to rw for (ii) which it did using an internal remount call. * * IOW, subvolume mounting was inherently complicated due to the ambiguity of * MS_RDONLY in mount(2). Note, this ambiguity has mount(8) always translate * "ro" to MS_RDONLY. IOW, in both (i) and (ii) "ro" becomes MS_RDONLY when * passed by mount(8) to mount(2). * * Enter the new mount API. The new mount API disambiguates making a mount ro * and making a superblock ro. * * (3) To turn a mount ro the MOUNT_ATTR_ONLY flag can be used with either * fsmount() or mount_setattr() this is a pure VFS level change for a * specific mount or mount tree that is never seen by the filesystem itself. * * (4) To turn a superblock ro the "ro" flag must be used with * fsconfig(FSCONFIG_SET_FLAG, "ro"). This option is seen by the filesystem * in fc->sb_flags. * * But, currently the util-linux mount command already utilizes the new mount * API and is still setting fsconfig(FSCONFIG_SET_FLAG, "ro") no matter if it's * btrfs or not, setting the whole super block RO. To make per-subvolume mounting * work with different options work we need to keep backward compatibility. */ static int btrfs_reconfigure_for_mount(struct fs_context *fc, struct vfsmount *mnt) { int ret = 0; if (fc->sb_flags & SB_RDONLY) return ret; down_write(&mnt->mnt_sb->s_umount); if (!(fc->sb_flags & SB_RDONLY) && (mnt->mnt_sb->s_flags & SB_RDONLY)) ret = btrfs_reconfigure(fc); up_write(&mnt->mnt_sb->s_umount); return ret; } static int btrfs_get_tree_subvol(struct fs_context *fc) { struct btrfs_fs_info *fs_info = NULL; struct btrfs_fs_context *ctx = fc->fs_private; struct fs_context *dup_fc; struct dentry *dentry; struct vfsmount *mnt; int ret = 0; /* * Setup a dummy root and fs_info for test/set super. This is because * we don't actually fill this stuff out until open_ctree, but we need * then open_ctree will properly initialize the file system specific * settings later. btrfs_init_fs_info initializes the static elements * of the fs_info (locks and such) to make cleanup easier if we find a * superblock with our given fs_devices later on at sget() time. */ fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); if (!fs_info) return -ENOMEM; fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); if (!fs_info->super_copy || !fs_info->super_for_commit) { btrfs_free_fs_info(fs_info); return -ENOMEM; } btrfs_init_fs_info(fs_info); dup_fc = vfs_dup_fs_context(fc); if (IS_ERR(dup_fc)) { btrfs_free_fs_info(fs_info); return PTR_ERR(dup_fc); } /* * When we do the sget_fc this gets transferred to the sb, so we only * need to set it on the dup_fc as this is what creates the super block. */ dup_fc->s_fs_info = fs_info; /* * We'll do the security settings in our btrfs_get_tree_super() mount * loop, they were duplicated into dup_fc, we can drop the originals * here. */ security_free_mnt_opts(&fc->security); fc->security = NULL; mnt = fc_mount(dup_fc); if (IS_ERR(mnt)) { put_fs_context(dup_fc); return PTR_ERR(mnt); } ret = btrfs_reconfigure_for_mount(dup_fc, mnt); put_fs_context(dup_fc); if (ret) { mntput(mnt); return ret; } /* * This free's ->subvol_name, because if it isn't set we have to * allocate a buffer to hold the subvol_name, so we just drop our * reference to it here. */ dentry = mount_subvol(ctx->subvol_name, ctx->subvol_objectid, mnt); ctx->subvol_name = NULL; if (IS_ERR(dentry)) return PTR_ERR(dentry); fc->root = dentry; return 0; } static int btrfs_get_tree(struct fs_context *fc) { /* * Since we use mount_subtree to mount the default/specified subvol, we * have to do mounts in two steps. * * First pass through we call btrfs_get_tree_subvol(), this is just a * wrapper around fc_mount() to call back into here again, and this time * we'll call btrfs_get_tree_super(). This will do the open_ctree() and * everything to open the devices and file system. Then we return back * with a fully constructed vfsmount in btrfs_get_tree_subvol(), and * from there we can do our mount_subvol() call, which will lookup * whichever subvol we're mounting and setup this fc with the * appropriate dentry for the subvol. */ if (fc->s_fs_info) return btrfs_get_tree_super(fc); return btrfs_get_tree_subvol(fc); } static void btrfs_kill_super(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); kill_anon_super(sb); btrfs_free_fs_info(fs_info); } static void btrfs_free_fs_context(struct fs_context *fc) { struct btrfs_fs_context *ctx = fc->fs_private; struct btrfs_fs_info *fs_info = fc->s_fs_info; if (fs_info) btrfs_free_fs_info(fs_info); if (ctx && refcount_dec_and_test(&ctx->refs)) { kfree(ctx->subvol_name); kfree(ctx); } } static int btrfs_dup_fs_context(struct fs_context *fc, struct fs_context *src_fc) { struct btrfs_fs_context *ctx = src_fc->fs_private; /* * Give a ref to our ctx to this dup, as we want to keep it around for * our original fc so we can have the subvolume name or objectid. * * We unset ->source in the original fc because the dup needs it for * mounting, and then once we free the dup it'll free ->source, so we * need to make sure we're only pointing to it in one fc. */ refcount_inc(&ctx->refs); fc->fs_private = ctx; fc->source = src_fc->source; src_fc->source = NULL; return 0; } static const struct fs_context_operations btrfs_fs_context_ops = { .parse_param = btrfs_parse_param, .reconfigure = btrfs_reconfigure, .get_tree = btrfs_get_tree, .dup = btrfs_dup_fs_context, .free = btrfs_free_fs_context, }; static int btrfs_init_fs_context(struct fs_context *fc) { struct btrfs_fs_context *ctx; ctx = kzalloc(sizeof(struct btrfs_fs_context), GFP_KERNEL); if (!ctx) return -ENOMEM; refcount_set(&ctx->refs, 1); fc->fs_private = ctx; fc->ops = &btrfs_fs_context_ops; if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { btrfs_info_to_ctx(btrfs_sb(fc->root->d_sb), ctx); } else { ctx->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); ctx->max_inline = BTRFS_DEFAULT_MAX_INLINE; ctx->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL; } #ifdef CONFIG_BTRFS_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #endif fc->sb_flags |= SB_I_VERSION; return 0; } static struct file_system_type btrfs_fs_type = { .owner = THIS_MODULE, .name = "btrfs", .init_fs_context = btrfs_init_fs_context, .parameters = btrfs_fs_parameters, .kill_sb = btrfs_kill_super, .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP | FS_MGTIME, }; MODULE_ALIAS_FS("btrfs"); static int btrfs_control_open(struct inode *inode, struct file *file) { /* * The control file's private_data is used to hold the * transaction when it is started and is used to keep * track of whether a transaction is already in progress. */ file->private_data = NULL; return 0; } /* * Used by /dev/btrfs-control for devices ioctls. */ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct btrfs_ioctl_vol_args *vol; struct btrfs_device *device = NULL; dev_t devt = 0; int ret = -ENOTTY; if (!capable(CAP_SYS_ADMIN)) return -EPERM; vol = memdup_user((void __user *)arg, sizeof(*vol)); if (IS_ERR(vol)) return PTR_ERR(vol); ret = btrfs_check_ioctl_vol_args_path(vol); if (ret < 0) goto out; switch (cmd) { case BTRFS_IOC_SCAN_DEV: mutex_lock(&uuid_mutex); /* * Scanning outside of mount can return NULL which would turn * into 0 error code. */ device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false); ret = PTR_ERR_OR_ZERO(device); mutex_unlock(&uuid_mutex); break; case BTRFS_IOC_FORGET_DEV: if (vol->name[0] != 0) { ret = lookup_bdev(vol->name, &devt); if (ret) break; } ret = btrfs_forget_devices(devt); break; case BTRFS_IOC_DEVICES_READY: mutex_lock(&uuid_mutex); /* * Scanning outside of mount can return NULL which would turn * into 0 error code. */ device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false); if (IS_ERR_OR_NULL(device)) { mutex_unlock(&uuid_mutex); if (IS_ERR(device)) ret = PTR_ERR(device); else ret = 0; break; } ret = !(device->fs_devices->num_devices == device->fs_devices->total_devices); mutex_unlock(&uuid_mutex); break; case BTRFS_IOC_GET_SUPPORTED_FEATURES: ret = btrfs_ioctl_get_supported_features((void __user*)arg); break; } out: kfree(vol); return ret; } static int btrfs_freeze(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); set_bit(BTRFS_FS_FROZEN, &fs_info->flags); /* * We don't need a barrier here, we'll wait for any transaction that * could be in progress on other threads (and do delayed iputs that * we want to avoid on a frozen filesystem), or do the commit * ourselves. */ return btrfs_commit_current_transaction(fs_info->tree_root); } static int check_dev_super(struct btrfs_device *dev) { struct btrfs_fs_info *fs_info = dev->fs_info; struct btrfs_super_block *sb; u64 last_trans; u16 csum_type; int ret = 0; /* This should be called with fs still frozen. */ ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags)); /* Missing dev, no need to check. */ if (!dev->bdev) return 0; /* Only need to check the primary super block. */ sb = btrfs_read_dev_one_super(dev->bdev, 0, true); if (IS_ERR(sb)) return PTR_ERR(sb); /* Verify the checksum. */ csum_type = btrfs_super_csum_type(sb); if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { btrfs_err(fs_info, "csum type changed, has %u expect %u", csum_type, btrfs_super_csum_type(fs_info->super_copy)); ret = -EUCLEAN; goto out; } if (btrfs_check_super_csum(fs_info, sb)) { btrfs_err(fs_info, "csum for on-disk super block no longer matches"); ret = -EUCLEAN; goto out; } /* Btrfs_validate_super() includes fsid check against super->fsid. */ ret = btrfs_validate_super(fs_info, sb, 0); if (ret < 0) goto out; last_trans = btrfs_get_last_trans_committed(fs_info); if (btrfs_super_generation(sb) != last_trans) { btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", btrfs_super_generation(sb), last_trans); ret = -EUCLEAN; goto out; } out: btrfs_release_disk_super(sb); return ret; } static int btrfs_unfreeze(struct super_block *sb) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); struct btrfs_device *device; int ret = 0; /* * Make sure the fs is not changed by accident (like hibernation then * modified by other OS). * If we found anything wrong, we mark the fs error immediately. * * And since the fs is frozen, no one can modify the fs yet, thus * we don't need to hold device_list_mutex. */ list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { ret = check_dev_super(device); if (ret < 0) { btrfs_handle_fs_error(fs_info, ret, "super block on devid %llu got modified unexpectedly", device->devid); break; } } clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); /* * We still return 0, to allow VFS layer to unfreeze the fs even the * above checks failed. Since the fs is either fine or read-only, we're * safe to continue, without causing further damage. */ return 0; } static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); /* * There should be always a valid pointer in latest_dev, it may be stale * for a short moment in case it's being deleted but still valid until * the end of RCU grace period. */ rcu_read_lock(); seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\"); rcu_read_unlock(); return 0; } static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_control *sc) { struct btrfs_fs_info *fs_info = btrfs_sb(sb); const s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps); trace_btrfs_extent_map_shrinker_count(fs_info, nr); return nr; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) { const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan); struct btrfs_fs_info *fs_info = btrfs_sb(sb); btrfs_free_extent_maps(fs_info, nr_to_scan); /* The extent map shrinker runs asynchronously, so always return 0. */ return 0; } static const struct super_operations btrfs_super_ops = { .drop_inode = btrfs_drop_inode, .evict_inode = btrfs_evict_inode, .put_super = btrfs_put_super, .sync_fs = btrfs_sync_fs, .show_options = btrfs_show_options, .show_devname = btrfs_show_devname, .alloc_inode = btrfs_alloc_inode, .destroy_inode = btrfs_destroy_inode, .free_inode = btrfs_free_inode, .statfs = btrfs_statfs, .freeze_fs = btrfs_freeze, .unfreeze_fs = btrfs_unfreeze, .nr_cached_objects = btrfs_nr_cached_objects, .free_cached_objects = btrfs_free_cached_objects, }; static const struct file_operations btrfs_ctl_fops = { .open = btrfs_control_open, .unlocked_ioctl = btrfs_control_ioctl, .compat_ioctl = compat_ptr_ioctl, .owner = THIS_MODULE, .llseek = noop_llseek, }; static struct miscdevice btrfs_misc = { .minor = BTRFS_MINOR, .name = "btrfs-control", .fops = &btrfs_ctl_fops }; MODULE_ALIAS_MISCDEV(BTRFS_MINOR); MODULE_ALIAS("devname:btrfs-control"); static int __init btrfs_interface_init(void) { return misc_register(&btrfs_misc); } static __cold void btrfs_interface_exit(void) { misc_deregister(&btrfs_misc); } static int __init btrfs_print_mod_info(void) { static const char options[] = "" #ifdef CONFIG_BTRFS_EXPERIMENTAL ", experimental=on" #endif #ifdef CONFIG_BTRFS_DEBUG ", debug=on" #endif #ifdef CONFIG_BTRFS_ASSERT ", assert=on" #endif #ifdef CONFIG_BTRFS_FS_REF_VERIFY ", ref-verify=on" #endif #ifdef CONFIG_BLK_DEV_ZONED ", zoned=yes" #else ", zoned=no" #endif #ifdef CONFIG_FS_VERITY ", fsverity=yes" #else ", fsverity=no" #endif ; #ifdef CONFIG_BTRFS_EXPERIMENTAL if (btrfs_get_mod_read_policy() == NULL) pr_info("Btrfs loaded%s\n", options); else pr_info("Btrfs loaded%s, read_policy=%s\n", options, btrfs_get_mod_read_policy()); #else pr_info("Btrfs loaded%s\n", options); #endif return 0; } static int register_btrfs(void) { return register_filesystem(&btrfs_fs_type); } static void unregister_btrfs(void) { unregister_filesystem(&btrfs_fs_type); } /* Helper structure for long init/exit functions. */ struct init_sequence { int (*init_func)(void); /* Can be NULL if the init_func doesn't need cleanup. */ void (*exit_func)(void); }; static const struct init_sequence mod_init_seq[] = { { .init_func = btrfs_props_init, .exit_func = NULL, }, { .init_func = btrfs_init_sysfs, .exit_func = btrfs_exit_sysfs, }, { .init_func = btrfs_init_compress, .exit_func = btrfs_exit_compress, }, { .init_func = btrfs_init_cachep, .exit_func = btrfs_destroy_cachep, }, { .init_func = btrfs_init_dio, .exit_func = btrfs_destroy_dio, }, { .init_func = btrfs_transaction_init, .exit_func = btrfs_transaction_exit, }, { .init_func = btrfs_ctree_init, .exit_func = btrfs_ctree_exit, }, { .init_func = btrfs_free_space_init, .exit_func = btrfs_free_space_exit, }, { .init_func = extent_state_init_cachep, .exit_func = extent_state_free_cachep, }, { .init_func = extent_buffer_init_cachep, .exit_func = extent_buffer_free_cachep, }, { .init_func = btrfs_bioset_init, .exit_func = btrfs_bioset_exit, }, { .init_func = extent_map_init, .exit_func = extent_map_exit, #ifdef CONFIG_BTRFS_EXPERIMENTAL }, { .init_func = btrfs_read_policy_init, .exit_func = NULL, #endif }, { .init_func = ordered_data_init, .exit_func = ordered_data_exit, }, { .init_func = btrfs_delayed_inode_init, .exit_func = btrfs_delayed_inode_exit, }, { .init_func = btrfs_auto_defrag_init, .exit_func = btrfs_auto_defrag_exit, }, { .init_func = btrfs_delayed_ref_init, .exit_func = btrfs_delayed_ref_exit, }, { .init_func = btrfs_prelim_ref_init, .exit_func = btrfs_prelim_ref_exit, }, { .init_func = btrfs_interface_init, .exit_func = btrfs_interface_exit, }, { .init_func = btrfs_print_mod_info, .exit_func = NULL, }, { .init_func = btrfs_run_sanity_tests, .exit_func = NULL, }, { .init_func = register_btrfs, .exit_func = unregister_btrfs, } }; static bool mod_init_result[ARRAY_SIZE(mod_init_seq)]; static __always_inline void btrfs_exit_btrfs_fs(void) { int i; for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) { if (!mod_init_result[i]) continue; if (mod_init_seq[i].exit_func) mod_init_seq[i].exit_func(); mod_init_result[i] = false; } } static void __exit exit_btrfs_fs(void) { btrfs_exit_btrfs_fs(); btrfs_cleanup_fs_uuids(); } static int __init init_btrfs_fs(void) { int ret; int i; for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) { ASSERT(!mod_init_result[i]); ret = mod_init_seq[i].init_func(); if (ret < 0) { btrfs_exit_btrfs_fs(); return ret; } mod_init_result[i] = true; } return 0; } late_initcall(init_btrfs_fs); module_exit(exit_btrfs_fs) MODULE_DESCRIPTION("B-Tree File System (BTRFS)"); MODULE_LICENSE("GPL"); MODULE_SOFTDEP("pre: crc32c"); MODULE_SOFTDEP("pre: xxhash64"); MODULE_SOFTDEP("pre: sha256"); MODULE_SOFTDEP("pre: blake2b-256");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ * Copyright (C) 2021, Alibaba Cloud */ #ifndef __EROFS_INTERNAL_H #define __EROFS_INTERNAL_H #include <linux/fs.h> #include <linux/dax.h> #include <linux/dcache.h> #include <linux/mm.h> #include <linux/module.h> #include <linux/pagemap.h> #include <linux/bio.h> #include <linux/magic.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/iomap.h> #include "erofs_fs.h" __printf(2, 3) void _erofs_printk(struct super_block *sb, const char *fmt, ...); #define erofs_err(sb, fmt, ...) \ _erofs_printk(sb, KERN_ERR fmt "\n", ##__VA_ARGS__) #define erofs_info(sb, fmt, ...) \ _erofs_printk(sb, KERN_INFO fmt "\n", ##__VA_ARGS__) #ifdef CONFIG_EROFS_FS_DEBUG #define DBG_BUGON BUG_ON #else #define DBG_BUGON(x) ((void)(x)) #endif /* !CONFIG_EROFS_FS_DEBUG */ /* EROFS_SUPER_MAGIC_V1 to represent the whole file system */ #define EROFS_SUPER_MAGIC EROFS_SUPER_MAGIC_V1 typedef u64 erofs_nid_t; typedef u64 erofs_off_t; typedef u64 erofs_blk_t; struct erofs_device_info { char *path; struct erofs_fscache *fscache; struct file *file; struct dax_device *dax_dev; u64 dax_part_off; erofs_blk_t blocks; erofs_blk_t uniaddr; }; enum { EROFS_SYNC_DECOMPRESS_AUTO, EROFS_SYNC_DECOMPRESS_FORCE_ON, EROFS_SYNC_DECOMPRESS_FORCE_OFF }; struct erofs_mount_opts { /* current strategy of how to use managed cache */ unsigned char cache_strategy; /* strategy of sync decompression (0 - auto, 1 - force on, 2 - force off) */ unsigned int sync_decompress; /* threshold for decompression synchronously */ unsigned int max_sync_decompress_pages; unsigned int mount_opt; }; struct erofs_dev_context { struct idr tree; struct rw_semaphore rwsem; unsigned int extra_devices; bool flatdev; }; /* all filesystem-wide lz4 configurations */ struct erofs_sb_lz4_info { /* # of pages needed for EROFS lz4 rolling decompression */ u16 max_distance_pages; /* maximum possible blocks for pclusters in the filesystem */ u16 max_pclusterblks; }; struct erofs_domain { refcount_t ref; struct list_head list; struct fscache_volume *volume; char *domain_id; }; struct erofs_fscache { struct fscache_cookie *cookie; struct inode *inode; /* anonymous inode for the blob */ /* used for share domain mode */ struct erofs_domain *domain; struct list_head node; refcount_t ref; char *name; }; struct erofs_xattr_prefix_item { struct erofs_xattr_long_prefix *prefix; u8 infix_len; }; struct erofs_sb_info { struct erofs_device_info dif0; struct erofs_mount_opts opt; /* options */ #ifdef CONFIG_EROFS_FS_ZIP /* list for all registered superblocks, mainly for shrinker */ struct list_head list; struct mutex umount_mutex; /* managed XArray arranged in physical block number */ struct xarray managed_pslots; unsigned int shrinker_run_no; u16 available_compr_algs; /* pseudo inode to manage cached pages */ struct inode *managed_cache; struct erofs_sb_lz4_info lz4; #endif /* CONFIG_EROFS_FS_ZIP */ struct inode *packed_inode; struct erofs_dev_context *devs; u64 total_blocks; u32 meta_blkaddr; #ifdef CONFIG_EROFS_FS_XATTR u32 xattr_blkaddr; u32 xattr_prefix_start; u8 xattr_prefix_count; struct erofs_xattr_prefix_item *xattr_prefixes; unsigned int xattr_filter_reserved; #endif u16 device_id_mask; /* valid bits of device id to be used */ unsigned char islotbits; /* inode slot unit size in bit shift */ unsigned char blkszbits; /* filesystem block size in bit shift */ u32 sb_size; /* total superblock size */ u32 fixed_nsec; s64 epoch; /* what we really care is nid, rather than ino.. */ erofs_nid_t root_nid; erofs_nid_t packed_nid; /* used for statfs, f_files - f_favail */ u64 inos; u32 feature_compat; u32 feature_incompat; /* sysfs support */ struct kobject s_kobj; /* /sys/fs/erofs/<devname> */ struct completion s_kobj_unregister; /* fscache support */ struct fscache_volume *volume; struct erofs_domain *domain; char *fsid; char *domain_id; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) #define EROFS_I_SB(inode) ((struct erofs_sb_info *)(inode)->i_sb->s_fs_info) /* Mount flags set via mount options or defaults */ #define EROFS_MOUNT_XATTR_USER 0x00000010 #define EROFS_MOUNT_POSIX_ACL 0x00000020 #define EROFS_MOUNT_DAX_ALWAYS 0x00000040 #define EROFS_MOUNT_DAX_NEVER 0x00000080 #define EROFS_MOUNT_DIRECT_IO 0x00000100 #define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option) #define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) #define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option) static inline bool erofs_is_fileio_mode(struct erofs_sb_info *sbi) { return IS_ENABLED(CONFIG_EROFS_FS_BACKED_BY_FILE) && sbi->dif0.file; } static inline bool erofs_is_fscache_mode(struct super_block *sb) { return IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && !erofs_is_fileio_mode(EROFS_SB(sb)) && !sb->s_bdev; } enum { EROFS_ZIP_CACHE_DISABLED, EROFS_ZIP_CACHE_READAHEAD, EROFS_ZIP_CACHE_READAROUND }; struct erofs_buf { struct address_space *mapping; struct file *file; struct page *page; void *base; }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) #define erofs_blknr(sb, pos) ((erofs_blk_t)((pos) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, pos) ((pos) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) #define EROFS_FEATURE_FUNCS(name, compat, feature) \ static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ { \ return sbi->feature_##compat & EROFS_FEATURE_##feature; \ } EROFS_FEATURE_FUNCS(zero_padding, incompat, INCOMPAT_ZERO_PADDING) EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) EROFS_FEATURE_FUNCS(ztailpacking, incompat, INCOMPAT_ZTAILPACKING) EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS) EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE) EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES) EROFS_FEATURE_FUNCS(48bit, incompat, INCOMPAT_48BIT) EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER) /* atomic flag definitions */ #define EROFS_I_EA_INITED_BIT 0 #define EROFS_I_Z_INITED_BIT 1 /* bitlock definitions (arranged in reverse order) */ #define EROFS_I_BL_XATTR_BIT (BITS_PER_LONG - 1) #define EROFS_I_BL_Z_BIT (BITS_PER_LONG - 2) struct erofs_inode { erofs_nid_t nid; /* atomic flags (including bitlocks) */ unsigned long flags; unsigned char datalayout; unsigned char inode_isize; bool dot_omitted; unsigned int xattr_isize; unsigned int xattr_name_filter; unsigned int xattr_shared_count; unsigned int *xattr_shared_xattrs; union { erofs_blk_t startblk; struct { unsigned short chunkformat; unsigned char chunkbits; }; #ifdef CONFIG_EROFS_FS_ZIP struct { unsigned short z_advise; unsigned char z_algorithmtype[2]; unsigned char z_lclusterbits; union { u64 z_tailextent_headlcn; u64 z_extents; }; erofs_off_t z_fragmentoff; unsigned short z_idata_size; }; #endif /* CONFIG_EROFS_FS_ZIP */ }; /* the corresponding vfs inode */ struct inode vfs_inode; }; #define EROFS_I(ptr) container_of(ptr, struct erofs_inode, vfs_inode) static inline erofs_off_t erofs_iloc(struct inode *inode) { struct erofs_sb_info *sbi = EROFS_I_SB(inode); return erofs_pos(inode->i_sb, sbi->meta_blkaddr) + (EROFS_I(inode)->nid << sbi->islotbits); } static inline unsigned int erofs_inode_version(unsigned int ifmt) { return (ifmt >> EROFS_I_VERSION_BIT) & EROFS_I_VERSION_MASK; } static inline unsigned int erofs_inode_datalayout(unsigned int ifmt) { return (ifmt >> EROFS_I_DATALAYOUT_BIT) & EROFS_I_DATALAYOUT_MASK; } /* reclaiming is never triggered when allocating new folios. */ static inline struct folio *erofs_grab_folio_nowait(struct address_space *as, pgoff_t index) { return __filemap_get_folio(as, index, FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, readahead_gfp_mask(as) & ~__GFP_RECLAIM); } /* Has a disk mapping */ #define EROFS_MAP_MAPPED 0x0001 /* Located in metadata (could be copied from bd_inode) */ #define EROFS_MAP_META 0x0002 /* The extent is encoded */ #define EROFS_MAP_ENCODED 0x0004 /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED 0x0008 /* Located in the special packed inode */ #define EROFS_MAP_FRAGMENT 0x0010 /* The extent refers to partial decompressed data */ #define EROFS_MAP_PARTIAL_REF 0x0020 struct erofs_map_blocks { struct erofs_buf buf; erofs_off_t m_pa, m_la; u64 m_plen, m_llen; unsigned short m_deviceid; char m_algorithmformat; unsigned int m_flags; }; /* * Used to get the exact decompressed length, e.g. fiemap (consider lookback * approach instead if possible since it's more metadata lightweight.) */ #define EROFS_GET_BLOCKS_FIEMAP 0x0001 /* Used to map the whole extent if non-negligible data is requested for LZMA */ #define EROFS_GET_BLOCKS_READMORE 0x0002 /* Used to map tail extent for tailpacking inline or fragment pcluster */ #define EROFS_GET_BLOCKS_FINDTAIL 0x0004 enum { Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, Z_EROFS_COMPRESSION_INTERLACED, Z_EROFS_COMPRESSION_RUNTIME_MAX }; struct erofs_map_dev { struct super_block *m_sb; struct erofs_device_info *m_dif; struct block_device *m_bdev; erofs_off_t m_pa; unsigned int m_deviceid; }; extern const struct super_operations erofs_sops; extern const struct address_space_operations erofs_aops; extern const struct address_space_operations erofs_fileio_aops; extern const struct address_space_operations z_erofs_aops; extern const struct address_space_operations erofs_fscache_access_aops; extern const struct inode_operations erofs_generic_iops; extern const struct inode_operations erofs_symlink_iops; extern const struct inode_operations erofs_fast_symlink_iops; extern const struct inode_operations erofs_dir_iops; extern const struct file_operations erofs_file_fops; extern const struct file_operations erofs_dir_fops; extern const struct iomap_ops z_erofs_iomap_report_ops; /* flags for erofs_fscache_register_cookie() */ #define EROFS_REG_COOKIE_SHARE 0x0001 #define EROFS_REG_COOKIE_NEED_NOEXIST 0x0002 void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp); void erofs_unmap_metabuf(struct erofs_buf *buf); void erofs_put_metabuf(struct erofs_buf *buf); void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap); void erofs_init_metabuf(struct erofs_buf *buf, struct super_block *sb); void *erofs_read_metabuf(struct erofs_buf *buf, struct super_block *sb, erofs_off_t offset, bool need_kmap); int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); int erofs_map_blocks(struct inode *inode, struct erofs_map_blocks *map); void erofs_onlinefolio_init(struct folio *folio); void erofs_onlinefolio_split(struct folio *folio); void erofs_onlinefolio_end(struct folio *folio, int err); struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid); int erofs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags); int erofs_namei(struct inode *dir, const struct qstr *name, erofs_nid_t *nid, unsigned int *d_type); static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) { int retried = 0; while (1) { void *p = vm_map_ram(pages, count, -1); /* retry two more times (totally 3 times) */ if (p || ++retried >= 3) return p; vm_unmap_aliases(); } return NULL; } int erofs_register_sysfs(struct super_block *sb); void erofs_unregister_sysfs(struct super_block *sb); int __init erofs_init_sysfs(void); void erofs_exit_sysfs(void); struct page *__erofs_allocpage(struct page **pagepool, gfp_t gfp, bool tryrsv); static inline struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) { return __erofs_allocpage(pagepool, gfp, false); } static inline void erofs_pagepool_add(struct page **pagepool, struct page *page) { set_page_private(page, (unsigned long)*pagepool); *pagepool = page; } void erofs_release_pages(struct page **pagepool); #ifdef CONFIG_EROFS_FS_ZIP #define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) extern atomic_long_t erofs_global_shrink_cnt; void erofs_shrinker_register(struct super_block *sb); void erofs_shrinker_unregister(struct super_block *sb); int __init erofs_init_shrinker(void); void erofs_exit_shrinker(void); int __init z_erofs_init_subsystem(void); void z_erofs_exit_subsystem(void); int z_erofs_init_super(struct super_block *sb); unsigned long z_erofs_shrink_scan(struct erofs_sb_info *sbi, unsigned long nr_shrink); int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags); void *z_erofs_get_gbuf(unsigned int requiredpages); void z_erofs_put_gbuf(void *ptr); int z_erofs_gbuf_growsize(unsigned int nrpages); int __init z_erofs_gbuf_init(void); void z_erofs_gbuf_exit(void); int z_erofs_parse_cfgs(struct super_block *sb, struct erofs_super_block *dsb); #else static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} static inline int erofs_init_shrinker(void) { return 0; } static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_subsystem(void) { return 0; } static inline void z_erofs_exit_subsystem(void) {} static inline int z_erofs_init_super(struct super_block *sb) { return 0; } #endif /* !CONFIG_EROFS_FS_ZIP */ #ifdef CONFIG_EROFS_FS_BACKED_BY_FILE struct bio *erofs_fileio_bio_alloc(struct erofs_map_dev *mdev); void erofs_fileio_submit_bio(struct bio *bio); #else static inline struct bio *erofs_fileio_bio_alloc(struct erofs_map_dev *mdev) { return NULL; } static inline void erofs_fileio_submit_bio(struct bio *bio) {} #endif #ifdef CONFIG_EROFS_FS_ONDEMAND int erofs_fscache_register_fs(struct super_block *sb); void erofs_fscache_unregister_fs(struct super_block *sb); struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, char *name, unsigned int flags); void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev); void erofs_fscache_submit_bio(struct bio *bio); #else static inline int erofs_fscache_register_fs(struct super_block *sb) { return -EOPNOTSUPP; } static inline void erofs_fscache_unregister_fs(struct super_block *sb) {} static inline struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, char *name, unsigned int flags) { return ERR_PTR(-EOPNOTSUPP); } static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) { } static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) { return NULL; } static inline void erofs_fscache_submit_bio(struct bio *bio) {} #endif #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* __EROFS_INTERNAL_H */
4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 /* * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * Copyright (c) 2005 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/rbtree.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/completion.h> #include <linux/slab.h> #include <linux/module.h> #include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> #include <rdma/iw_portmap.h> #include <rdma/rdma_netlink.h> #include "iwcm.h" MODULE_AUTHOR("Tom Tucker"); MODULE_DESCRIPTION("iWARP CM"); MODULE_LICENSE("Dual BSD/GPL"); static const char * const iwcm_rej_reason_strs[] = { [ECONNRESET] = "reset by remote host", [ECONNREFUSED] = "refused by remote application", [ETIMEDOUT] = "setup timeout", }; const char *__attribute_const__ iwcm_reject_msg(int reason) { size_t index; /* iWARP uses negative errnos */ index = -reason; if (index < ARRAY_SIZE(iwcm_rej_reason_strs) && iwcm_rej_reason_strs[index]) return iwcm_rej_reason_strs[index]; else return "unrecognized reason"; } EXPORT_SYMBOL(iwcm_reject_msg); static struct rdma_nl_cbs iwcm_nl_cb_table[RDMA_NL_IWPM_NUM_OPS] = { [RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb}, [RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb}, [RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb}, [RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb}, [RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb}, [RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb}, [RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}, [RDMA_NL_IWPM_HELLO] = {.dump = iwpm_hello_cb} }; static struct workqueue_struct *iwcm_wq; struct iwcm_work { struct work_struct work; struct iwcm_id_private *cm_id; struct list_head list; struct iw_cm_event event; struct list_head free_list; }; static unsigned int default_backlog = 256; static struct ctl_table_header *iwcm_ctl_table_hdr; static struct ctl_table iwcm_ctl_table[] = { { .procname = "default_backlog", .data = &default_backlog, .maxlen = sizeof(default_backlog), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_INT_MAX, }, }; /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: * LISTENING IDS: Get enough elements preallocated to handle the * listen backlog. * ACTIVE IDS: 4: CONNECT_REPLY, ESTABLISHED, DISCONNECT, CLOSE * PASSIVE IDS: 3: ESTABLISHED, DISCONNECT, CLOSE * * Allocating them in connect and listen avoids having to deal * with allocation failures on the event upcall from the provider (which * is called in the interrupt context). * * One exception is when creating the cm_id for incoming connection requests. * There are two cases: * 1) in the event upcall, cm_event_handler(), for a listening cm_id. If * the backlog is exceeded, then no more connection request events will * be processed. cm_event_handler() returns -ENOMEM in this case. Its up * to the provider to reject the connection request. * 2) in the connection request workqueue handler, cm_conn_req_handler(). * If work elements cannot be allocated for the new connect request cm_id, * then IWCM will call the provider reject method. This is ok since * cm_conn_req_handler() runs in the workqueue thread context. */ static struct iwcm_work *get_work(struct iwcm_id_private *cm_id_priv) { struct iwcm_work *work; if (list_empty(&cm_id_priv->work_free_list)) return NULL; work = list_first_entry(&cm_id_priv->work_free_list, struct iwcm_work, free_list); list_del_init(&work->free_list); return work; } static void put_work(struct iwcm_work *work) { list_add(&work->free_list, &work->cm_id->work_free_list); } static void dealloc_work_entries(struct iwcm_id_private *cm_id_priv) { struct list_head *e, *tmp; list_for_each_safe(e, tmp, &cm_id_priv->work_free_list) { list_del(e); kfree(list_entry(e, struct iwcm_work, free_list)); } } static int alloc_work_entries(struct iwcm_id_private *cm_id_priv, int count) { struct iwcm_work *work; BUG_ON(!list_empty(&cm_id_priv->work_free_list)); while (count--) { work = kmalloc(sizeof(struct iwcm_work), GFP_KERNEL); if (!work) { dealloc_work_entries(cm_id_priv); return -ENOMEM; } work->cm_id = cm_id_priv; INIT_LIST_HEAD(&work->list); put_work(work); } return 0; } /* * Save private data from incoming connection requests to * iw_cm_event, so the low level driver doesn't have to. Adjust * the event ptr to point to the local copy. */ static int copy_private_data(struct iw_cm_event *event) { void *p; p = kmemdup(event->private_data, event->private_data_len, GFP_ATOMIC); if (!p) return -ENOMEM; event->private_data = p; return 0; } static void free_cm_id(struct iwcm_id_private *cm_id_priv) { dealloc_work_entries(cm_id_priv); kfree(cm_id_priv); } /* * Release a reference on cm_id. If the last reference is being * released, free the cm_id and return 'true'. */ static bool iwcm_deref_id(struct iwcm_id_private *cm_id_priv) { if (refcount_dec_and_test(&cm_id_priv->refcount)) { BUG_ON(!list_empty(&cm_id_priv->work_list)); free_cm_id(cm_id_priv); return true; } return false; } static void add_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); refcount_inc(&cm_id_priv->refcount); } static void rem_ref(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); (void)iwcm_deref_id(cm_id_priv); } static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *event); struct iw_cm_id *iw_create_cm_id(struct ib_device *device, iw_cm_handler cm_handler, void *context) { struct iwcm_id_private *cm_id_priv; cm_id_priv = kzalloc(sizeof(*cm_id_priv), GFP_KERNEL); if (!cm_id_priv) return ERR_PTR(-ENOMEM); cm_id_priv->state = IW_CM_STATE_IDLE; cm_id_priv->id.device = device; cm_id_priv->id.cm_handler = cm_handler; cm_id_priv->id.context = context; cm_id_priv->id.event_handler = cm_event_handler; cm_id_priv->id.add_ref = add_ref; cm_id_priv->id.rem_ref = rem_ref; spin_lock_init(&cm_id_priv->lock); refcount_set(&cm_id_priv->refcount, 1); init_waitqueue_head(&cm_id_priv->connect_wait); init_completion(&cm_id_priv->destroy_comp); INIT_LIST_HEAD(&cm_id_priv->work_list); INIT_LIST_HEAD(&cm_id_priv->work_free_list); return &cm_id_priv->id; } EXPORT_SYMBOL(iw_create_cm_id); static int iwcm_modify_qp_err(struct ib_qp *qp) { struct ib_qp_attr qp_attr; if (!qp) return -EINVAL; qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * This is really the RDMAC CLOSING state. It is most similar to the * IB SQD QP state. */ static int iwcm_modify_qp_sqd(struct ib_qp *qp) { struct ib_qp_attr qp_attr; BUG_ON(qp == NULL); qp_attr.qp_state = IB_QPS_SQD; return ib_modify_qp(qp, &qp_attr, IB_QP_STATE); } /* * CM_ID <-- CLOSING * * Block if a passive or active connection is currently being processed. Then * process the event as follows: * - If we are ESTABLISHED, move to CLOSING and modify the QP state * based on the abrupt flag * - If the connection is already in the CLOSING or IDLE state, the peer is * disconnecting concurrently with us and we've already seen the * DISCONNECT event -- ignore the request and return 0 * - Disconnect on a listening endpoint returns -EINVAL */ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* Wait if we're currently in a connect or accept downcall */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_CLOSING; /* QP could be <nul> for user-mode client */ if (cm_id_priv->qp) qp = cm_id_priv->qp; else ret = -EINVAL; break; case IW_CM_STATE_LISTEN: ret = -EINVAL; break; case IW_CM_STATE_CLOSING: /* remote peer closed first */ case IW_CM_STATE_IDLE: /* accept or connect returned !0 */ break; case IW_CM_STATE_CONN_RECV: /* * App called disconnect before/without calling accept after * connect_request event delivered. */ break; case IW_CM_STATE_CONN_SENT: /* Can only get here if wait above fails */ default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) { if (abrupt) ret = iwcm_modify_qp_err(qp); else ret = iwcm_modify_qp_sqd(qp); /* * If both sides are disconnecting the QP could * already be in ERR or SQD states */ ret = 0; } return ret; } EXPORT_SYMBOL(iw_cm_disconnect); /* * CM_ID <-- DESTROYING * * Clean up all resources associated with the connection and release * the initial reference taken by iw_create_cm_id. * * Returns true if and only if the last cm_id_priv reference has been dropped. */ static bool destroy_cm_id(struct iw_cm_id *cm_id) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; unsigned long flags; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); /* * Wait if we're currently in a connect or accept downcall. A * listening endpoint should never block here. */ wait_event(cm_id_priv->connect_wait, !test_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags)); /* * Since we're deleting the cm_id, drop any events that * might arrive before the last dereference. */ set_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; switch (cm_id_priv->state) { case IW_CM_STATE_LISTEN: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* destroy the listening endpoint */ cm_id->device->ops.iw_destroy_listen(cm_id); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_ESTABLISHED: cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); /* Abrupt close of the connection */ (void)iwcm_modify_qp_err(qp); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_IDLE: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_DESTROYING; break; case IW_CM_STATE_CONN_RECV: /* * App called destroy before/without calling accept after * receiving connection request event notification or * returned non zero from the event callback function. * In either case, must tell the provider to reject. */ cm_id_priv->state = IW_CM_STATE_DESTROYING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); cm_id->device->ops.iw_reject(cm_id, NULL, 0); spin_lock_irqsave(&cm_id_priv->lock, flags); break; case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_DESTROYING: default: BUG(); break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id_priv->id.device->ops.iw_rem_ref(qp); if (cm_id->mapped) { iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr); iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM); } return iwcm_deref_id(cm_id_priv); } /* * This function is only called by the application thread and cannot * be called by the event thread. The function will wait for all * references to be released on the cm_id and then kfree the cm_id * object. */ void iw_destroy_cm_id(struct iw_cm_id *cm_id) { if (!destroy_cm_id(cm_id)) flush_workqueue(iwcm_wq); } EXPORT_SYMBOL(iw_destroy_cm_id); /** * iw_cm_check_wildcard - If IP address is 0 then use original * @pm_addr: sockaddr containing the ip to check for wildcard * @cm_addr: sockaddr containing the actual IP address * @cm_outaddr: sockaddr to set IP addr which leaving port * * Checks the pm_addr for wildcard and then sets cm_outaddr's * IP to the actual (cm_addr). */ static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr, struct sockaddr_storage *cm_addr, struct sockaddr_storage *cm_outaddr) { if (pm_addr->ss_family == AF_INET) { struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr; if (pm4_addr->sin_addr.s_addr == htonl(INADDR_ANY)) { struct sockaddr_in *cm4_addr = (struct sockaddr_in *)cm_addr; struct sockaddr_in *cm4_outaddr = (struct sockaddr_in *)cm_outaddr; cm4_outaddr->sin_addr = cm4_addr->sin_addr; } } else { struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr; if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) { struct sockaddr_in6 *cm6_addr = (struct sockaddr_in6 *)cm_addr; struct sockaddr_in6 *cm6_outaddr = (struct sockaddr_in6 *)cm_outaddr; cm6_outaddr->sin6_addr = cm6_addr->sin6_addr; } } } /** * iw_cm_map - Use portmapper to map the ports * @cm_id: connection manager pointer * @active: Indicates the active side when true * returns nonzero for error only if iwpm_create_mapinfo() fails * * Tries to add a mapping for a port using the Portmapper. If * successful in mapping the IP/Port it will check the remote * mapped IP address for a wildcard IP address and replace the * zero IP address with the remote_addr. */ static int iw_cm_map(struct iw_cm_id *cm_id, bool active) { const char *devname = dev_name(&cm_id->device->dev); const char *ifname = cm_id->device->iw_ifname; struct iwpm_dev_data pm_reg_msg = {}; struct iwpm_sa_data pm_msg; int status; if (strlen(devname) >= sizeof(pm_reg_msg.dev_name) || strlen(ifname) >= sizeof(pm_reg_msg.if_name)) return -EINVAL; cm_id->m_local_addr = cm_id->local_addr; cm_id->m_remote_addr = cm_id->remote_addr; strcpy(pm_reg_msg.dev_name, devname); strcpy(pm_reg_msg.if_name, ifname); if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) || !iwpm_valid_pid()) return 0; cm_id->mapped = true; pm_msg.loc_addr = cm_id->local_addr; pm_msg.rem_addr = cm_id->remote_addr; pm_msg.flags = (cm_id->device->iw_driver_flags & IW_F_NO_PORT_MAP) ? IWPM_FLAGS_NO_PORT_MAP : 0; if (active) status = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_IWCM); else status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM); if (!status) { cm_id->m_local_addr = pm_msg.mapped_loc_addr; if (active) { cm_id->m_remote_addr = pm_msg.mapped_rem_addr; iw_cm_check_wildcard(&pm_msg.mapped_rem_addr, &cm_id->remote_addr, &cm_id->m_remote_addr); } } return iwpm_create_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr, RDMA_NL_IWCM, pm_msg.flags); } /* * CM_ID <-- LISTEN * * Start listening for connect requests. Generates one CONNECT_REQUEST * event for each inbound connect request. */ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); if (!backlog) backlog = default_backlog; ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: cm_id_priv->state = IW_CM_STATE_LISTEN; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, false); if (!ret) ret = cm_id->device->ops.iw_create_listen(cm_id, backlog); if (ret) cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); break; default: ret = -EINVAL; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } EXPORT_SYMBOL(iw_cm_listen); /* * CM_ID <-- IDLE * * Rejects an inbound connection request. No events are generated. */ int iw_cm_reject(struct iw_cm_id *cm_id, const void *private_data, u8 private_data_len) { struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id_priv->state = IW_CM_STATE_IDLE; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->ops.iw_reject(cm_id, private_data, private_data_len); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_reject); /* * CM_ID <-- ESTABLISHED * * Accepts an inbound connection request and generates an ESTABLISHED * event. Callers of iw_cm_disconnect and iw_destroy_cm_id will block * until the ESTABLISHED event is received from the provider. */ int iw_cm_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; struct ib_qp *qp; unsigned long flags; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_CONN_RECV) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } /* Get the ib_qp given the QPN */ qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { spin_unlock_irqrestore(&cm_id_priv->lock, flags); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return -EINVAL; } cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id->device->ops.iw_accept(cm_id, iw_param); if (ret) { /* An error on accept precludes provider events */ BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_IDLE; spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); } return ret; } EXPORT_SYMBOL(iw_cm_accept); /* * Active Side: CM_ID <-- CONN_SENT * * If successful, results in the generation of a CONNECT_REPLY * event. iw_cm_disconnect and iw_cm_destroy will block until the * CONNECT_REPLY event is received from the provider. */ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param) { struct iwcm_id_private *cm_id_priv; int ret; unsigned long flags; struct ib_qp *qp = NULL; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); ret = alloc_work_entries(cm_id_priv, 4); if (ret) return ret; set_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state != IW_CM_STATE_IDLE) { ret = -EINVAL; goto err; } /* Get the ib_qp given the QPN */ qp = cm_id->device->ops.iw_get_qp(cm_id->device, iw_param->qpn); if (!qp) { ret = -EINVAL; goto err; } cm_id->device->ops.iw_add_ref(qp); cm_id_priv->qp = qp; cm_id_priv->state = IW_CM_STATE_CONN_SENT; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = iw_cm_map(cm_id, true); if (!ret) ret = cm_id->device->ops.iw_connect(cm_id, iw_param); if (!ret) return 0; /* success */ spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; err: spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id->device->ops.iw_rem_ref(qp); clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); wake_up_all(&cm_id_priv->connect_wait); return ret; } EXPORT_SYMBOL(iw_cm_connect); /* * Passive Side: new CM_ID <-- CONN_RECV * * Handles an inbound connect request. The function creates a new * iw_cm_id to represent the new connection and inherits the client * callback function and other attributes from the listening parent. * * The work item contains a pointer to the listen_cm_id and the event. The * listen_cm_id contains the client cm_handler, context and * device. These are copied when the device is cloned. The event * contains the new four tuple. * * An error on the child should not affect the parent, so this * function does not return a value. */ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; struct iw_cm_id *cm_id; struct iwcm_id_private *cm_id_priv; int ret; /* * The provider should never generate a connection request * event with a bad status. */ BUG_ON(iw_event->status); cm_id = iw_create_cm_id(listen_id_priv->id.device, listen_id_priv->id.cm_handler, listen_id_priv->id.context); /* If the cm_id could not be created, ignore the request */ if (IS_ERR(cm_id)) goto out; cm_id->provider_data = iw_event->provider_data; cm_id->m_local_addr = iw_event->local_addr; cm_id->m_remote_addr = iw_event->remote_addr; cm_id->local_addr = listen_id_priv->id.local_addr; ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr, &iw_event->remote_addr, &cm_id->remote_addr, RDMA_NL_IWCM); if (ret) { cm_id->remote_addr = iw_event->remote_addr; } else { iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr, &iw_event->local_addr, &cm_id->local_addr); iw_event->local_addr = cm_id->local_addr; iw_event->remote_addr = cm_id->remote_addr; } cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; /* * We could be destroying the listening id. If so, ignore this * upcall. */ spin_lock_irqsave(&listen_id_priv->lock, flags); if (listen_id_priv->state != IW_CM_STATE_LISTEN) { spin_unlock_irqrestore(&listen_id_priv->lock, flags); iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } spin_unlock_irqrestore(&listen_id_priv->lock, flags); ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); goto out; } /* Call the client CM handler */ ret = cm_id->cm_handler(cm_id, iw_event); if (ret) { iw_cm_reject(cm_id, NULL, 0); iw_destroy_cm_id(cm_id); } out: if (iw_event->private_data_len) kfree(iw_event->private_data); } /* * Passive Side: CM_ID <-- ESTABLISHED * * The provider generated an ESTABLISHED event which means that * the MPA negotion has completed successfully and we are now in MPA * FPDU mode. * * This event can only be received in the CONN_RECV state. If the * remote peer closed, the ESTABLISHED event would be received followed * by the CLOSE event. If the app closes, it will block until we wake * it up after processing this event. */ static int cm_conn_est_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * We clear the CONNECT_WAIT bit here to allow the callback * function to call iw_cm_disconnect. Calling iw_destroy_cm_id * from a callback handler is not allowed. */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_RECV); cm_id_priv->state = IW_CM_STATE_ESTABLISHED; spin_unlock_irqrestore(&cm_id_priv->lock, flags); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * Active Side: CM_ID <-- ESTABLISHED * * The app has called connect and is waiting for the established event to * post it's requests to the server. This event will wake up anyone * blocked in iw_cm_disconnect or iw_destroy_id. */ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { struct ib_qp *qp = NULL; unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); /* * Clear the connect wait bit so a callback function calling * iw_cm_disconnect will not wait and deadlock this thread */ clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags); BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT); if (iw_event->status == 0) { cm_id_priv->id.m_local_addr = iw_event->local_addr; cm_id_priv->id.m_remote_addr = iw_event->remote_addr; iw_event->local_addr = cm_id_priv->id.local_addr; iw_event->remote_addr = cm_id_priv->id.remote_addr; cm_id_priv->state = IW_CM_STATE_ESTABLISHED; } else { /* REJECTED or RESET */ qp = cm_id_priv->qp; cm_id_priv->qp = NULL; cm_id_priv->state = IW_CM_STATE_IDLE; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id_priv->id.device->ops.iw_rem_ref(qp); ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); if (iw_event->private_data_len) kfree(iw_event->private_data); /* Wake up waiters on connect complete */ wake_up_all(&cm_id_priv->connect_wait); return ret; } /* * CM_ID <-- CLOSING * * If in the ESTABLISHED state, move to CLOSING. */ static void cm_disconnect_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { unsigned long flags; spin_lock_irqsave(&cm_id_priv->lock, flags); if (cm_id_priv->state == IW_CM_STATE_ESTABLISHED) cm_id_priv->state = IW_CM_STATE_CLOSING; spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * CM_ID <-- IDLE * * If in the ESTBLISHED or CLOSING states, the QP will have have been * moved by the provider to the ERR state. Disassociate the CM_ID from * the QP, move to IDLE, and remove the 'connected' reference. * * If in some other state, the cm_id was destroyed asynchronously. * This is the last reference that will result in waking up * the app thread blocked in iw_destroy_cm_id. */ static int cm_close_handler(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { struct ib_qp *qp; unsigned long flags; int ret = 0, notify_event = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); qp = cm_id_priv->qp; cm_id_priv->qp = NULL; switch (cm_id_priv->state) { case IW_CM_STATE_ESTABLISHED: case IW_CM_STATE_CLOSING: cm_id_priv->state = IW_CM_STATE_IDLE; notify_event = 1; break; case IW_CM_STATE_DESTROYING: break; default: BUG(); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (qp) cm_id_priv->id.device->ops.iw_rem_ref(qp); if (notify_event) ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, iw_event); return ret; } static int process_event(struct iwcm_id_private *cm_id_priv, struct iw_cm_event *iw_event) { int ret = 0; switch (iw_event->event) { case IW_CM_EVENT_CONNECT_REQUEST: cm_conn_req_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CONNECT_REPLY: ret = cm_conn_rep_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_ESTABLISHED: ret = cm_conn_est_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_DISCONNECT: cm_disconnect_handler(cm_id_priv, iw_event); break; case IW_CM_EVENT_CLOSE: ret = cm_close_handler(cm_id_priv, iw_event); break; default: BUG(); } return ret; } /* * Process events on the work_list for the cm_id. If the callback * function requests that the cm_id be deleted, a flag is set in the * cm_id flags to indicate that when the last reference is * removed, the cm_id is to be destroyed. This is necessary to * distinguish between an object that will be destroyed by the app * thread asleep on the destroy_comp list vs. an object destroyed * here synchronously when the last reference is removed. */ static void cm_work_handler(struct work_struct *_work) { struct iwcm_work *work = container_of(_work, struct iwcm_work, work); struct iw_cm_event levent; struct iwcm_id_private *cm_id_priv = work->cm_id; unsigned long flags; int ret = 0; spin_lock_irqsave(&cm_id_priv->lock, flags); while (!list_empty(&cm_id_priv->work_list)) { work = list_first_entry(&cm_id_priv->work_list, struct iwcm_work, list); list_del_init(&work->list); levent = work->event; put_work(work); spin_unlock_irqrestore(&cm_id_priv->lock, flags); if (!test_bit(IWCM_F_DROP_EVENTS, &cm_id_priv->flags)) { ret = process_event(cm_id_priv, &levent); if (ret) WARN_ON_ONCE(destroy_cm_id(&cm_id_priv->id)); } else pr_debug("dropping event %d\n", levent.event); if (iwcm_deref_id(cm_id_priv)) return; spin_lock_irqsave(&cm_id_priv->lock, flags); } spin_unlock_irqrestore(&cm_id_priv->lock, flags); } /* * This function is called on interrupt context. Schedule events on * the iwcm_wq thread to allow callback functions to downcall into * the CM and/or block. Events are queued to a per-CM_ID * work_list. If this is the first event on the work_list, the work * element is also queued on the iwcm_wq thread. * * Each event holds a reference on the cm_id. Until the last posted * event has been delivered and processed, the cm_id cannot be * deleted. * * Returns: * 0 - the event was handled. * -ENOMEM - the event was not handled due to lack of resources. */ static int cm_event_handler(struct iw_cm_id *cm_id, struct iw_cm_event *iw_event) { struct iwcm_work *work; struct iwcm_id_private *cm_id_priv; unsigned long flags; int ret = 0; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); spin_lock_irqsave(&cm_id_priv->lock, flags); work = get_work(cm_id_priv); if (!work) { ret = -ENOMEM; goto out; } INIT_WORK(&work->work, cm_work_handler); work->cm_id = cm_id_priv; work->event = *iw_event; if ((work->event.event == IW_CM_EVENT_CONNECT_REQUEST || work->event.event == IW_CM_EVENT_CONNECT_REPLY) && work->event.private_data_len) { ret = copy_private_data(&work->event); if (ret) { put_work(work); goto out; } } refcount_inc(&cm_id_priv->refcount); list_add_tail(&work->list, &cm_id_priv->work_list); queue_work(iwcm_wq, &work->work); out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_init_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE| IB_ACCESS_REMOTE_READ; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } static int iwcm_init_qp_rts_attr(struct iwcm_id_private *cm_id_priv, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { unsigned long flags; int ret; spin_lock_irqsave(&cm_id_priv->lock, flags); switch (cm_id_priv->state) { case IW_CM_STATE_IDLE: case IW_CM_STATE_CONN_SENT: case IW_CM_STATE_CONN_RECV: case IW_CM_STATE_ESTABLISHED: *qp_attr_mask = 0; ret = 0; break; default: ret = -EINVAL; break; } spin_unlock_irqrestore(&cm_id_priv->lock, flags); return ret; } int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr, int *qp_attr_mask) { struct iwcm_id_private *cm_id_priv; int ret; cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); switch (qp_attr->qp_state) { case IB_QPS_INIT: case IB_QPS_RTR: ret = iwcm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask); break; case IB_QPS_RTS: ret = iwcm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask); break; default: ret = -EINVAL; break; } return ret; } EXPORT_SYMBOL(iw_cm_init_qp_attr); static int __init iw_cm_init(void) { int ret; ret = iwpm_init(RDMA_NL_IWCM); if (ret) return ret; iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) goto err_alloc; iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", iwcm_ctl_table); if (!iwcm_ctl_table_hdr) { pr_err("iw_cm: couldn't register sysctl paths\n"); goto err_sysctl; } rdma_nl_register(RDMA_NL_IWCM, iwcm_nl_cb_table); return 0; err_sysctl: destroy_workqueue(iwcm_wq); err_alloc: iwpm_exit(RDMA_NL_IWCM); return -ENOMEM; } static void __exit iw_cm_cleanup(void) { rdma_nl_unregister(RDMA_NL_IWCM); unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); iwpm_exit(RDMA_NL_IWCM); } MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_IWCM, 2); module_init(iw_cm_init); module_exit(iw_cm_cleanup);
1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 1 2 1 1 1 1 1 1 1 1 1 1 1 1 3 3 2 1 3 13 1 1 5 1 2 1 3 1 1 5 2 1 5 1 1 4 1 1 12 7 1 2 1 1 1 1 1 1 1 7 18 4 6 4 2 1 1 5 1 1 3 4 3 2 3 3 2 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 2 2 2 2 2 1 1 18 10 2 1 2 2 2 1 1 1 1 1 1 1 10 3 3 1 1 3 2 2 1 1 1 2 1 3 6 5 4 4 5 4 1 1 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 1 4 1 3 3 1 2 1 1 1 13 9 9 13 22 21 22 22 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 // SPDX-License-Identifier: GPL-2.0 /* * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * KVM Xen emulation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "x86.h" #include "xen.h" #include "hyperv.h" #include "irq.h" #include <linux/eventfd.h> #include <linux/kvm_host.h> #include <linux/sched/stat.h> #include <trace/events/kvm.h> #include <xen/interface/xen.h> #include <xen/interface/vcpu.h> #include <xen/interface/version.h> #include <xen/interface/event_channel.h> #include <xen/interface/sched.h> #include <asm/xen/cpuid.h> #include <asm/pvclock.h> #include "cpuid.h" #include "trace.h" static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm); static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data); static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r); DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); static int kvm_xen_shared_info_init(struct kvm *kvm) { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; struct pvclock_wall_clock *wc; u32 *wc_sec_hi; u32 wc_version; u64 wall_nsec; int ret = 0; int idx = srcu_read_lock(&kvm->srcu); read_lock_irq(&gpc->lock); while (!kvm_gpc_check(gpc, PAGE_SIZE)) { read_unlock_irq(&gpc->lock); ret = kvm_gpc_refresh(gpc, PAGE_SIZE); if (ret) goto out; read_lock_irq(&gpc->lock); } /* * This code mirrors kvm_write_wall_clock() except that it writes * directly through the pfn cache and doesn't mark the page dirty. */ wall_nsec = kvm_get_wall_clock_epoch(kvm); /* Paranoia checks on the 32-bit struct layout */ BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); #ifdef CONFIG_X86_64 /* Paranoia checks on the 64-bit struct layout */ BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; wc_sec_hi = &shinfo->wc_sec_hi; wc = &shinfo->wc; } else #endif { struct compat_shared_info *shinfo = gpc->khva; wc_sec_hi = &shinfo->arch.wc_sec_hi; wc = &shinfo->wc; } /* Increment and ensure an odd value */ wc_version = wc->version = (wc->version + 1) | 1; smp_wmb(); wc->nsec = do_div(wall_nsec, NSEC_PER_SEC); wc->sec = (u32)wall_nsec; *wc_sec_hi = wall_nsec >> 32; smp_wmb(); wc->version = wc_version + 1; read_unlock_irq(&gpc->lock); kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); out: srcu_read_unlock(&kvm->srcu, idx); return ret; } void kvm_xen_inject_timer_irqs(struct kvm_vcpu *vcpu) { if (atomic_read(&vcpu->arch.xen.timer_pending) > 0) { struct kvm_xen_evtchn e; e.vcpu_id = vcpu->vcpu_id; e.vcpu_idx = vcpu->vcpu_idx; e.port = vcpu->arch.xen.timer_virq; e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; kvm_xen_set_evtchn(&e, vcpu->kvm); vcpu->arch.xen.timer_expires = 0; atomic_set(&vcpu->arch.xen.timer_pending, 0); } } static enum hrtimer_restart xen_timer_callback(struct hrtimer *timer) { struct kvm_vcpu *vcpu = container_of(timer, struct kvm_vcpu, arch.xen.timer); struct kvm_xen_evtchn e; int rc; if (atomic_read(&vcpu->arch.xen.timer_pending)) return HRTIMER_NORESTART; e.vcpu_id = vcpu->vcpu_id; e.vcpu_idx = vcpu->vcpu_idx; e.port = vcpu->arch.xen.timer_virq; e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; rc = kvm_xen_set_evtchn_fast(&e, vcpu->kvm); if (rc != -EWOULDBLOCK) { vcpu->arch.xen.timer_expires = 0; return HRTIMER_NORESTART; } atomic_inc(&vcpu->arch.xen.timer_pending); kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); return HRTIMER_NORESTART; } static int xen_get_guest_pvclock(struct kvm_vcpu *vcpu, struct pvclock_vcpu_time_info *hv_clock, struct gfn_to_pfn_cache *gpc, unsigned int offset) { unsigned long flags; int r; read_lock_irqsave(&gpc->lock, flags); while (!kvm_gpc_check(gpc, offset + sizeof(*hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); r = kvm_gpc_refresh(gpc, offset + sizeof(*hv_clock)); if (r) return r; read_lock_irqsave(&gpc->lock, flags); } memcpy(hv_clock, gpc->khva + offset, sizeof(*hv_clock)); read_unlock_irqrestore(&gpc->lock, flags); /* * Sanity check TSC shift+multiplier to verify the guest's view of time * is more or less consistent. */ if (hv_clock->tsc_shift != vcpu->arch.pvclock_tsc_shift || hv_clock->tsc_to_system_mul != vcpu->arch.pvclock_tsc_mul) return -EINVAL; return 0; } static void kvm_xen_start_timer(struct kvm_vcpu *vcpu, u64 guest_abs, bool linux_wa) { struct kvm_vcpu_xen *xen = &vcpu->arch.xen; int64_t kernel_now, delta; uint64_t guest_now; int r = -EOPNOTSUPP; /* * The guest provides the requested timeout in absolute nanoseconds * of the KVM clock — as *it* sees it, based on the scaled TSC and * the pvclock information provided by KVM. * * The kernel doesn't support hrtimers based on CLOCK_MONOTONIC_RAW * so use CLOCK_MONOTONIC. In the timescales covered by timers, the * difference won't matter much as there is no cumulative effect. * * Calculate the time for some arbitrary point in time around "now" * in terms of both kvmclock and CLOCK_MONOTONIC. Calculate the * delta between the kvmclock "now" value and the guest's requested * timeout, apply the "Linux workaround" described below, and add * the resulting delta to the CLOCK_MONOTONIC "now" value, to get * the absolute CLOCK_MONOTONIC time at which the timer should * fire. */ do { struct pvclock_vcpu_time_info hv_clock; uint64_t host_tsc, guest_tsc; if (!static_cpu_has(X86_FEATURE_CONSTANT_TSC) || !vcpu->kvm->arch.use_master_clock) break; /* * If both Xen PV clocks are active, arbitrarily try to use the * compat clock first, but also try to use the non-compat clock * if the compat clock is unusable. The two PV clocks hold the * same information, but it's possible one (or both) is stale * and/or currently unreachable. */ if (xen->vcpu_info_cache.active) r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_info_cache, offsetof(struct compat_vcpu_info, time)); if (r && xen->vcpu_time_info_cache.active) r = xen_get_guest_pvclock(vcpu, &hv_clock, &xen->vcpu_time_info_cache, 0); if (r) break; if (!IS_ENABLED(CONFIG_64BIT) || !kvm_get_monotonic_and_clockread(&kernel_now, &host_tsc)) { /* * Don't fall back to get_kvmclock_ns() because it's * broken; it has a systemic error in its results * because it scales directly from host TSC to * nanoseconds, and doesn't scale first to guest TSC * and *then* to nanoseconds as the guest does. * * There is a small error introduced here because time * continues to elapse between the ktime_get() and the * subsequent rdtsc(). But not the systemic drift due * to get_kvmclock_ns(). */ kernel_now = ktime_get(); /* This is CLOCK_MONOTONIC */ host_tsc = rdtsc(); } /* Calculate the guest kvmclock as the guest would do it. */ guest_tsc = kvm_read_l1_tsc(vcpu, host_tsc); guest_now = __pvclock_read_cycles(&hv_clock, guest_tsc); } while (0); if (r) { /* * Without CONSTANT_TSC, get_kvmclock_ns() is the only option. * * Also if the guest PV clock hasn't been set up yet, as is * likely to be the case during migration when the vCPU has * not been run yet. It would be possible to calculate the * scaling factors properly in that case but there's not much * point in doing so. The get_kvmclock_ns() drift accumulates * over time, so it's OK to use it at startup. Besides, on * migration there's going to be a little bit of skew in the * precise moment at which timers fire anyway. Often they'll * be in the "past" by the time the VM is running again after * migration. */ guest_now = get_kvmclock_ns(vcpu->kvm); kernel_now = ktime_get(); } delta = guest_abs - guest_now; /* * Xen has a 'Linux workaround' in do_set_timer_op() which checks for * negative absolute timeout values (caused by integer overflow), and * for values about 13 days in the future (2^50ns) which would be * caused by jiffies overflow. For those cases, Xen sets the timeout * 100ms in the future (not *too* soon, since if a guest really did * set a long timeout on purpose we don't want to keep churning CPU * time by waking it up). Emulate Xen's workaround when starting the * timer in response to __HYPERVISOR_set_timer_op. */ if (linux_wa && unlikely((int64_t)guest_abs < 0 || (delta > 0 && (uint32_t) (delta >> 50) != 0))) { delta = 100 * NSEC_PER_MSEC; guest_abs = guest_now + delta; } /* * Avoid races with the old timer firing. Checking timer_expires * to avoid calling hrtimer_cancel() will only have false positives * so is fine. */ if (vcpu->arch.xen.timer_expires) hrtimer_cancel(&vcpu->arch.xen.timer); atomic_set(&vcpu->arch.xen.timer_pending, 0); vcpu->arch.xen.timer_expires = guest_abs; if (delta <= 0) xen_timer_callback(&vcpu->arch.xen.timer); else hrtimer_start(&vcpu->arch.xen.timer, ktime_add_ns(kernel_now, delta), HRTIMER_MODE_ABS_HARD); } static void kvm_xen_stop_timer(struct kvm_vcpu *vcpu) { hrtimer_cancel(&vcpu->arch.xen.timer); vcpu->arch.xen.timer_expires = 0; atomic_set(&vcpu->arch.xen.timer_pending, 0); } static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) { struct kvm_vcpu_xen *vx = &v->arch.xen; struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; size_t user_len, user_len1, user_len2; struct vcpu_runstate_info rs; unsigned long flags; size_t times_ofs; uint8_t *update_bit = NULL; uint64_t entry_time; uint64_t *rs_times; int *rs_state; /* * The only difference between 32-bit and 64-bit versions of the * runstate struct is the alignment of uint64_t in 32-bit, which * means that the 64-bit version has an additional 4 bytes of * padding after the first field 'state'. Let's be really really * paranoid about that, and matching it with our internal data * structures that we memcpy into it... */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); #ifdef CONFIG_X86_64 /* * The 64-bit structure has 4 bytes of padding before 'state_entry_time' * so each subsequent field is shifted by 4, and it's 4 bytes longer. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != offsetof(struct compat_vcpu_runstate_info, time) + 4); BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); #endif /* * The state field is in the same place at the start of both structs, * and is the same size (int) as vx->current_runstate. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != offsetof(struct compat_vcpu_runstate_info, state)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) != sizeof(vx->current_runstate)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); /* * The state_entry_time field is 64 bits in both versions, and the * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 * is little-endian means that it's in the last *byte* of the word. * That detail is important later. */ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != sizeof(uint64_t)); BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); /* * The time array is four 64-bit quantities in both versions, matching * the vx->runstate_times and immediately following state_entry_time. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof_field(struct compat_vcpu_runstate_info, time)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { user_len = sizeof(struct vcpu_runstate_info); times_ofs = offsetof(struct vcpu_runstate_info, state_entry_time); } else { user_len = sizeof(struct compat_vcpu_runstate_info); times_ofs = offsetof(struct compat_vcpu_runstate_info, state_entry_time); } /* * There are basically no alignment constraints. The guest can set it * up so it crosses from one page to the next, and at arbitrary byte * alignment (and the 32-bit ABI doesn't align the 64-bit integers * anyway, even if the overall struct had been 64-bit aligned). */ if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); user_len2 = user_len - user_len1; } else { user_len1 = user_len; user_len2 = 0; } BUG_ON(user_len1 + user_len2 != user_len); retry: /* * Attempt to obtain the GPC lock on *both* (if there are two) * gfn_to_pfn caches that cover the region. */ if (atomic) { local_irq_save(flags); if (!read_trylock(&gpc1->lock)) { local_irq_restore(flags); return; } } else { read_lock_irqsave(&gpc1->lock, flags); } while (!kvm_gpc_check(gpc1, user_len1)) { read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) return; if (kvm_gpc_refresh(gpc1, user_len1)) return; read_lock_irqsave(&gpc1->lock, flags); } if (likely(!user_len2)) { /* * Set up three pointers directly to the runstate_info * struct in the guest (via the GPC). * * • @rs_state → state field * • @rs_times → state_entry_time field. * • @update_bit → last byte of state_entry_time, which * contains the XEN_RUNSTATE_UPDATE bit. */ rs_state = gpc1->khva; rs_times = gpc1->khva + times_ofs; if (v->kvm->arch.xen.runstate_update_flag) update_bit = ((void *)(&rs_times[1])) - 1; } else { /* * The guest's runstate_info is split across two pages and we * need to hold and validate both GPCs simultaneously. We can * declare a lock ordering GPC1 > GPC2 because nothing else * takes them more than one at a time. Set a subclass on the * gpc1 lock to make lockdep shut up about it. */ lock_set_subclass(&gpc1->lock.dep_map, 1, _THIS_IP_); if (atomic) { if (!read_trylock(&gpc2->lock)) { read_unlock_irqrestore(&gpc1->lock, flags); return; } } else { read_lock(&gpc2->lock); } if (!kvm_gpc_check(gpc2, user_len2)) { read_unlock(&gpc2->lock); read_unlock_irqrestore(&gpc1->lock, flags); /* When invoked from kvm_sched_out() we cannot sleep */ if (atomic) return; /* * Use kvm_gpc_activate() here because if the runstate * area was configured in 32-bit mode and only extends * to the second page now because the guest changed to * 64-bit mode, the second GPC won't have been set up. */ if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, user_len2)) return; /* * We dropped the lock on GPC1 so we have to go all the * way back and revalidate that too. */ goto retry; } /* * In this case, the runstate_info struct will be assembled on * the kernel stack (compat or not as appropriate) and will * be copied to GPC1/GPC2 with a dual memcpy. Set up the three * rs pointers accordingly. */ rs_times = &rs.state_entry_time; /* * The rs_state pointer points to the start of what we'll * copy to the guest, which in the case of a compat guest * is the 32-bit field that the compiler thinks is padding. */ rs_state = ((void *)rs_times) - times_ofs; /* * The update_bit is still directly in the guest memory, * via one GPC or the other. */ if (v->kvm->arch.xen.runstate_update_flag) { if (user_len1 >= times_ofs + sizeof(uint64_t)) update_bit = gpc1->khva + times_ofs + sizeof(uint64_t) - 1; else update_bit = gpc2->khva + times_ofs + sizeof(uint64_t) - 1 - user_len1; } #ifdef CONFIG_X86_64 /* * Don't leak kernel memory through the padding in the 64-bit * version of the struct. */ memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); #endif } /* * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the * state_entry_time field, directly in the guest. We need to set * that (and write-barrier) before writing to the rest of the * structure, and clear it last. Just as Xen does, we address the * single *byte* in which it resides because it might be in a * different cache line to the rest of the 64-bit word, due to * the (lack of) alignment constraints. */ entry_time = vx->runstate_entry_time; if (update_bit) { entry_time |= XEN_RUNSTATE_UPDATE; *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; smp_wmb(); } /* * Now assemble the actual structure, either on our kernel stack * or directly in the guest according to how the rs_state and * rs_times pointers were set up above. */ *rs_state = vx->current_runstate; rs_times[0] = entry_time; memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); /* For the split case, we have to then copy it to the guest. */ if (user_len2) { memcpy(gpc1->khva, rs_state, user_len1); memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); } smp_wmb(); /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ if (update_bit) { entry_time &= ~XEN_RUNSTATE_UPDATE; *update_bit = entry_time >> 56; smp_wmb(); } if (user_len2) { kvm_gpc_mark_dirty_in_slot(gpc2); read_unlock(&gpc2->lock); } kvm_gpc_mark_dirty_in_slot(gpc1); read_unlock_irqrestore(&gpc1->lock, flags); } void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) { struct kvm_vcpu_xen *vx = &v->arch.xen; u64 now = get_kvmclock_ns(v->kvm); u64 delta_ns = now - vx->runstate_entry_time; u64 run_delay = current->sched_info.run_delay; if (unlikely(!vx->runstate_entry_time)) vx->current_runstate = RUNSTATE_offline; /* * Time waiting for the scheduler isn't "stolen" if the * vCPU wasn't running anyway. */ if (vx->current_runstate == RUNSTATE_running) { u64 steal_ns = run_delay - vx->last_steal; delta_ns -= steal_ns; vx->runstate_times[RUNSTATE_runnable] += steal_ns; } vx->last_steal = run_delay; vx->runstate_times[vx->current_runstate] += delta_ns; vx->current_runstate = state; vx->runstate_entry_time = now; if (vx->runstate_cache.active) kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); } void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) { struct kvm_lapic_irq irq = { }; irq.dest_id = v->vcpu_id; irq.vector = v->arch.xen.upcall_vector; irq.dest_mode = APIC_DEST_PHYSICAL; irq.shorthand = APIC_DEST_NOSHORT; irq.delivery_mode = APIC_DM_FIXED; irq.level = 1; kvm_irq_delivery_to_apic(v->kvm, NULL, &irq, NULL); } /* * On event channel delivery, the vcpu_info may not have been accessible. * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which * need to be marked into the vcpu_info (and evtchn_upcall_pending set). * Do so now that we can sleep in the context of the vCPU to bring the * page in, and refresh the pfn cache for it. */ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) { unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel); struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; unsigned long flags; if (!evtchn_pending_sel) return; /* * Yes, this is an open-coded loop. But that's just what put_user() * does anyway. Page it in and retry the instruction. We're just a * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); } /* Now gpc->khva is a valid kernel address for the vcpu_info */ if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { struct vcpu_info *vi = gpc->khva; asm volatile(LOCK_PREFIX "orq %0, %1\n" "notq %0\n" LOCK_PREFIX "andq %0, %2\n" : "=r" (evtchn_pending_sel), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) : "0" (evtchn_pending_sel)); WRITE_ONCE(vi->evtchn_upcall_pending, 1); } else { u32 evtchn_pending_sel32 = evtchn_pending_sel; struct compat_vcpu_info *vi = gpc->khva; asm volatile(LOCK_PREFIX "orl %0, %1\n" "notl %0\n" LOCK_PREFIX "andl %0, %2\n" : "=r" (evtchn_pending_sel32), "+m" (vi->evtchn_pending_sel), "+m" (v->arch.xen.evtchn_pending_sel) : "0" (evtchn_pending_sel32)); WRITE_ONCE(vi->evtchn_upcall_pending, 1); } kvm_gpc_mark_dirty_in_slot(gpc); read_unlock_irqrestore(&gpc->lock, flags); /* For the per-vCPU lapic vector, deliver it as MSI. */ if (v->arch.xen.upcall_vector) kvm_xen_inject_vcpu_vector(v); } int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache; unsigned long flags; u8 rc = 0; /* * If the global upcall vector (HVMIRQ_callback_vector) is set and * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. */ /* No need for compat handling here */ BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != sizeof_field(struct vcpu_info, evtchn_upcall_pending)); BUILD_BUG_ON(sizeof(rc) != sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* * This function gets called from kvm_vcpu_block() after setting the * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately * from a HLT. So we really mustn't sleep. If the page ended up absent * at that point, just return 1 in order to trigger an immediate wake, * and we'll end up getting called again from a context where we *can* * fault in the page and wait for it. */ if (in_atomic() || !task_is_running(current)) return 1; if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. */ return 0; } read_lock_irqsave(&gpc->lock, flags); } rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending; read_unlock_irqrestore(&gpc->lock, flags); return rc; } int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { r = -EINVAL; } else { mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.long_mode = !!data->u.long_mode; /* * Re-initialize shared_info to put the wallclock in the * correct place. Whilst it's not necessary to do this * unless the mode is actually changed, it does no harm * to make the call anyway. */ r = kvm->arch.xen.shinfo_cache.active ? kvm_xen_shared_info_init(kvm) : 0; mutex_unlock(&kvm->arch.xen.xen_lock); } break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: { int idx; mutex_lock(&kvm->arch.xen.xen_lock); idx = srcu_read_lock(&kvm->srcu); if (data->type == KVM_XEN_ATTR_TYPE_SHARED_INFO) { gfn_t gfn = data->u.shared_info.gfn; if (gfn == KVM_XEN_INVALID_GFN) { kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); r = 0; } else { r = kvm_gpc_activate(&kvm->arch.xen.shinfo_cache, gfn_to_gpa(gfn), PAGE_SIZE); } } else { void __user * hva = u64_to_user_ptr(data->u.shared_info.hva); if (!PAGE_ALIGNED(hva)) { r = -EINVAL; } else if (!hva) { kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); r = 0; } else { r = kvm_gpc_activate_hva(&kvm->arch.xen.shinfo_cache, (unsigned long)hva, PAGE_SIZE); } } srcu_read_unlock(&kvm->srcu, idx); if (!r && kvm->arch.xen.shinfo_cache.active) r = kvm_xen_shared_info_init(kvm); mutex_unlock(&kvm->arch.xen.xen_lock); break; } case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.upcall_vector = data->u.vector; mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; } break; case KVM_XEN_ATTR_TYPE_EVTCHN: r = kvm_xen_setattr_evtchn(kvm, data); break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.xen_version = data->u.xen_version; mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } mutex_lock(&kvm->arch.xen.xen_lock); kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; mutex_unlock(&kvm->arch.xen.xen_lock); r = 0; break; default: break; } return r; } int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { int r = -ENOENT; mutex_lock(&kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_ATTR_TYPE_LONG_MODE: data->u.long_mode = kvm->arch.xen.long_mode; r = 0; break; case KVM_XEN_ATTR_TYPE_SHARED_INFO: if (kvm_gpc_is_gpa_active(&kvm->arch.xen.shinfo_cache)) data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); else data->u.shared_info.gfn = KVM_XEN_INVALID_GFN; r = 0; break; case KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA: if (kvm_gpc_is_hva_active(&kvm->arch.xen.shinfo_cache)) data->u.shared_info.hva = kvm->arch.xen.shinfo_cache.uhva; else data->u.shared_info.hva = 0; r = 0; break; case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: data->u.vector = kvm->arch.xen.upcall_vector; r = 0; break; case KVM_XEN_ATTR_TYPE_XEN_VERSION: data->u.xen_version = kvm->arch.xen.xen_version; r = 0; break; case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; r = 0; break; default: break; } mutex_unlock(&kvm->arch.xen.xen_lock); return r; } int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int idx, r = -ENOENT; mutex_lock(&vcpu->kvm->arch.xen.xen_lock); idx = srcu_read_lock(&vcpu->kvm->srcu); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: /* No compat necessary here. */ BUILD_BUG_ON(sizeof(struct vcpu_info) != sizeof(struct compat_vcpu_info)); BUILD_BUG_ON(offsetof(struct vcpu_info, time) != offsetof(struct compat_vcpu_info, time)); if (data->type == KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO) { if (data->u.gpa == KVM_XEN_INVALID_GPA) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, data->u.gpa, sizeof(struct vcpu_info)); } else { if (data->u.hva == 0) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } r = kvm_gpc_activate_hva(&vcpu->arch.xen.vcpu_info_cache, data->u.hva, sizeof(struct vcpu_info)); } if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (data->u.gpa == KVM_XEN_INVALID_GPA) { kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); r = 0; break; } r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache, data->u.gpa, sizeof(struct pvclock_vcpu_time_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { size_t sz, sz1, sz2; if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == KVM_XEN_INVALID_GPA) { r = 0; deactivate_out: kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); break; } /* * If the guest switches to 64-bit mode after setting the runstate * address, that's actually OK. kvm_xen_update_runstate_guest() * will cope. */ if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) sz = sizeof(struct vcpu_runstate_info); else sz = sizeof(struct compat_vcpu_runstate_info); /* How much fits in the (first) page? */ sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache, data->u.gpa, sz1); if (r) goto deactivate_out; /* Either map the second page, or deactivate the second GPC */ if (sz1 >= sz) { kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); } else { sz2 = sz - sz1; BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache, data->u.gpa + sz1, sz2); if (r) goto deactivate_out; } kvm_xen_update_runstate_guest(vcpu, false); break; } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.runstate.state > RUNSTATE_offline) { r = -EINVAL; break; } kvm_xen_update_runstate(vcpu, data->u.runstate.state); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.runstate.state > RUNSTATE_offline) { r = -EINVAL; break; } if (data->u.runstate.state_entry_time != (data->u.runstate.time_running + data->u.runstate.time_runnable + data->u.runstate.time_blocked + data->u.runstate.time_offline)) { r = -EINVAL; break; } if (get_kvmclock_ns(vcpu->kvm) < data->u.runstate.state_entry_time) { r = -EINVAL; break; } vcpu->arch.xen.current_runstate = data->u.runstate.state; vcpu->arch.xen.runstate_entry_time = data->u.runstate.state_entry_time; vcpu->arch.xen.runstate_times[RUNSTATE_running] = data->u.runstate.time_running; vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = data->u.runstate.time_runnable; vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = data->u.runstate.time_blocked; vcpu->arch.xen.runstate_times[RUNSTATE_offline] = data->u.runstate.time_offline; vcpu->arch.xen.last_steal = current->sched_info.run_delay; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.runstate.state > RUNSTATE_offline && data->u.runstate.state != (u64)-1) { r = -EINVAL; break; } /* The adjustment must add up */ if (data->u.runstate.state_entry_time != (data->u.runstate.time_running + data->u.runstate.time_runnable + data->u.runstate.time_blocked + data->u.runstate.time_offline)) { r = -EINVAL; break; } if (get_kvmclock_ns(vcpu->kvm) < (vcpu->arch.xen.runstate_entry_time + data->u.runstate.state_entry_time)) { r = -EINVAL; break; } vcpu->arch.xen.runstate_entry_time += data->u.runstate.state_entry_time; vcpu->arch.xen.runstate_times[RUNSTATE_running] += data->u.runstate.time_running; vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += data->u.runstate.time_runnable; vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += data->u.runstate.time_blocked; vcpu->arch.xen.runstate_times[RUNSTATE_offline] += data->u.runstate.time_offline; if (data->u.runstate.state <= RUNSTATE_offline) kvm_xen_update_runstate(vcpu, data->u.runstate.state); else if (vcpu->arch.xen.runstate_cache.active) kvm_xen_update_runstate_guest(vcpu, false); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: if (data->u.vcpu_id >= KVM_MAX_VCPUS) r = -EINVAL; else { vcpu->arch.xen.vcpu_id = data->u.vcpu_id; r = 0; } break; case KVM_XEN_VCPU_ATTR_TYPE_TIMER: if (data->u.timer.port && data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { r = -EINVAL; break; } /* Stop the timer (if it's running) before changing the vector */ kvm_xen_stop_timer(vcpu); vcpu->arch.xen.timer_virq = data->u.timer.port; /* Start the timer if the new value has a valid vector+expiry. */ if (data->u.timer.port && data->u.timer.expires_ns) kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, false); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: if (data->u.vector && data->u.vector < 0x10) r = -EINVAL; else { vcpu->arch.xen.upcall_vector = data->u.vector; r = 0; } break; default: break; } srcu_read_unlock(&vcpu->kvm->srcu, idx); mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) { int r = -ENOENT; mutex_lock(&vcpu->kvm->arch.xen.xen_lock); switch (data->type) { case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: if (kvm_gpc_is_gpa_active(&vcpu->arch.xen.vcpu_info_cache)) data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; else data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA: if (kvm_gpc_is_hva_active(&vcpu->arch.xen.vcpu_info_cache)) data->u.hva = vcpu->arch.xen.vcpu_info_cache.uhva; else data->u.hva = 0; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (vcpu->arch.xen.vcpu_time_info_cache.active) data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; else data->u.gpa = KVM_XEN_INVALID_GPA; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (vcpu->arch.xen.runstate_cache.active) { data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; r = 0; } break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } data->u.runstate.state = vcpu->arch.xen.current_runstate; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: if (!sched_info_on()) { r = -EOPNOTSUPP; break; } data->u.runstate.state = vcpu->arch.xen.current_runstate; data->u.runstate.state_entry_time = vcpu->arch.xen.runstate_entry_time; data->u.runstate.time_running = vcpu->arch.xen.runstate_times[RUNSTATE_running]; data->u.runstate.time_runnable = vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; data->u.runstate.time_blocked = vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; data->u.runstate.time_offline = vcpu->arch.xen.runstate_times[RUNSTATE_offline]; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: r = -EINVAL; break; case KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID: data->u.vcpu_id = vcpu->arch.xen.vcpu_id; r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_TIMER: /* * Ensure a consistent snapshot of state is captured, with a * timer either being pending, or the event channel delivered * to the corresponding bit in the shared_info. Not still * lurking in the timer_pending flag for deferred delivery. * Purely as an optimisation, if the timer_expires field is * zero, that means the timer isn't active (or even in the * timer_pending flag) and there is no need to cancel it. */ if (vcpu->arch.xen.timer_expires) { hrtimer_cancel(&vcpu->arch.xen.timer); kvm_xen_inject_timer_irqs(vcpu); } data->u.timer.port = vcpu->arch.xen.timer_virq; data->u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL; data->u.timer.expires_ns = vcpu->arch.xen.timer_expires; /* * The hrtimer may trigger and raise the IRQ immediately, * while the returned state causes it to be set up and * raised again on the destination system after migration. * That's fine, as the guest won't even have had a chance * to run and handle the interrupt. Asserting an already * pending event channel is idempotent. */ if (vcpu->arch.xen.timer_expires) hrtimer_start_expires(&vcpu->arch.xen.timer, HRTIMER_MODE_ABS_HARD); r = 0; break; case KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR: data->u.vector = vcpu->arch.xen.upcall_vector; r = 0; break; default: break; } mutex_unlock(&vcpu->kvm->arch.xen.xen_lock); return r; } int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) { struct kvm *kvm = vcpu->kvm; u32 page_num = data & ~PAGE_MASK; u64 page_addr = data & PAGE_MASK; bool lm = is_long_mode(vcpu); int r = 0; mutex_lock(&kvm->arch.xen.xen_lock); if (kvm->arch.xen.long_mode != lm) { kvm->arch.xen.long_mode = lm; /* * Re-initialize shared_info to put the wallclock in the * correct place. */ if (kvm->arch.xen.shinfo_cache.active && kvm_xen_shared_info_init(kvm)) r = 1; } mutex_unlock(&kvm->arch.xen.xen_lock); if (r) return r; /* * If Xen hypercall intercept is enabled, fill the hypercall * page with VMCALL/VMMCALL instructions since that's what * we catch. Else the VMM has provided the hypercall pages * with instructions of its own choosing, so use those. */ if (kvm_xen_hypercall_enabled(kvm)) { u8 instructions[32]; int i; if (page_num) return 1; /* mov imm32, %eax */ instructions[0] = 0xb8; /* vmcall / vmmcall */ kvm_x86_call(patch_hypercall)(vcpu, instructions + 5); /* ret */ instructions[8] = 0xc3; /* int3 to pad */ memset(instructions + 9, 0xcc, sizeof(instructions) - 9); for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { *(u32 *)&instructions[1] = i; if (kvm_vcpu_write_guest(vcpu, page_addr + (i * sizeof(instructions)), instructions, sizeof(instructions))) return 1; } } else { /* * Note, truncation is a non-issue as 'lm' is guaranteed to be * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. */ hva_t blob_addr = lm ? kvm->arch.xen.hvm_config.blob_addr_64 : kvm->arch.xen.hvm_config.blob_addr_32; u8 blob_size = lm ? kvm->arch.xen.hvm_config.blob_size_64 : kvm->arch.xen.hvm_config.blob_size_32; u8 *page; int ret; if (page_num >= blob_size) return 1; blob_addr += page_num * PAGE_SIZE; page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); if (IS_ERR(page)) return PTR_ERR(page); ret = kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE); kfree(page); if (ret) return 1; } return 0; } int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) { /* Only some feature flags need to be *enabled* by userspace */ u32 permitted_flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_EVTCHN_SEND | KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE; u32 old_flags; if (xhc->flags & ~permitted_flags) return -EINVAL; /* * With hypercall interception the kernel generates its own * hypercall page so it must not be provided. */ if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && (xhc->blob_addr_32 || xhc->blob_addr_64 || xhc->blob_size_32 || xhc->blob_size_64)) return -EINVAL; /* * Restrict the MSR to the range that is unofficially reserved for * synthetic, virtualization-defined MSRs, e.g. to prevent confusing * KVM by colliding with a real MSR that requires special handling. */ if (xhc->msr && (xhc->msr < KVM_XEN_MSR_MIN_INDEX || xhc->msr > KVM_XEN_MSR_MAX_INDEX)) return -EINVAL; mutex_lock(&kvm->arch.xen.xen_lock); if (xhc->msr && !kvm->arch.xen.hvm_config.msr) static_branch_inc(&kvm_xen_enabled.key); else if (!xhc->msr && kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); old_flags = kvm->arch.xen.hvm_config.flags; memcpy(&kvm->arch.xen.hvm_config, xhc, sizeof(*xhc)); mutex_unlock(&kvm->arch.xen.xen_lock); if ((old_flags ^ xhc->flags) & KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE) kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE); return 0; } static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) { kvm_rax_write(vcpu, result); return kvm_skip_emulated_instruction(vcpu); } static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) return 1; return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); } static inline int max_evtchn_port(struct kvm *kvm) { if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) return EVTCHN_2L_NR_CHANNELS; else return COMPAT_EVTCHN_2L_NR_CHANNELS; } static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, evtchn_port_t *ports) { struct kvm *kvm = vcpu->kvm; struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; unsigned long *pending_bits; unsigned long flags; bool ret = true; int idx, i; idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; ret = false; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; } else { struct compat_shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; } for (i = 0; i < nr_ports; i++) { if (test_bit(ports[i], pending_bits)) { ret = true; break; } } out_rcu: read_unlock_irqrestore(&gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); return ret; } static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, u64 param, u64 *r) { struct sched_poll sched_poll; evtchn_port_t port, *ports; struct x86_exception e; int i; if (!lapic_in_kernel(vcpu) || !(vcpu->kvm->arch.xen.hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; if (IS_ENABLED(CONFIG_64BIT) && !longmode) { struct compat_sched_poll sp32; /* Sanity check that the compat struct definition is correct */ BUILD_BUG_ON(sizeof(sp32) != 16); if (kvm_read_guest_virt(vcpu, param, &sp32, sizeof(sp32), &e)) { *r = -EFAULT; return true; } /* * This is a 32-bit pointer to an array of evtchn_port_t which * are uint32_t, so once it's converted no further compat * handling is needed. */ sched_poll.ports = (void *)(unsigned long)(sp32.ports); sched_poll.nr_ports = sp32.nr_ports; sched_poll.timeout = sp32.timeout; } else { if (kvm_read_guest_virt(vcpu, param, &sched_poll, sizeof(sched_poll), &e)) { *r = -EFAULT; return true; } } if (unlikely(sched_poll.nr_ports > 1)) { /* Xen (unofficially) limits number of pollers to 128 */ if (sched_poll.nr_ports > 128) { *r = -EINVAL; return true; } ports = kmalloc_array(sched_poll.nr_ports, sizeof(*ports), GFP_KERNEL); if (!ports) { *r = -ENOMEM; return true; } } else ports = &port; if (kvm_read_guest_virt(vcpu, (gva_t)sched_poll.ports, ports, sched_poll.nr_ports * sizeof(*ports), &e)) { *r = -EFAULT; return true; } for (i = 0; i < sched_poll.nr_ports; i++) { if (ports[i] >= max_evtchn_port(vcpu->kvm)) { *r = -EINVAL; goto out; } } if (sched_poll.nr_ports == 1) vcpu->arch.xen.poll_evtchn = port; else vcpu->arch.xen.poll_evtchn = -1; set_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); if (!wait_pending_event(vcpu, sched_poll.nr_ports, ports)) { kvm_set_mp_state(vcpu, KVM_MP_STATE_HALTED); if (sched_poll.timeout) mod_timer(&vcpu->arch.xen.poll_timer, jiffies + nsecs_to_jiffies(sched_poll.timeout)); kvm_vcpu_halt(vcpu); if (sched_poll.timeout) timer_delete(&vcpu->arch.xen.poll_timer); kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE); } vcpu->arch.xen.poll_evtchn = 0; *r = 0; out: /* Really, this is only needed in case of timeout */ clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask); if (unlikely(sched_poll.nr_ports > 1)) kfree(ports); return true; } static void cancel_evtchn_poll(struct timer_list *t) { struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.xen.poll_timer); kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); } static bool kvm_xen_hcall_sched_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, u64 param, u64 *r) { switch (cmd) { case SCHEDOP_poll: if (kvm_xen_schedop_poll(vcpu, longmode, param, r)) return true; fallthrough; case SCHEDOP_yield: kvm_vcpu_on_spin(vcpu, true); *r = 0; return true; default: break; } return false; } struct compat_vcpu_set_singleshot_timer { uint64_t timeout_abs_ns; uint32_t flags; } __attribute__((packed)); static bool kvm_xen_hcall_vcpu_op(struct kvm_vcpu *vcpu, bool longmode, int cmd, int vcpu_id, u64 param, u64 *r) { struct vcpu_set_singleshot_timer oneshot; struct x86_exception e; if (!kvm_xen_timer_enabled(vcpu)) return false; switch (cmd) { case VCPUOP_set_singleshot_timer: if (vcpu->arch.xen.vcpu_id != vcpu_id) { *r = -EINVAL; return true; } /* * The only difference for 32-bit compat is the 4 bytes of * padding after the interesting part of the structure. So * for a faithful emulation of Xen we have to *try* to copy * the padding and return -EFAULT if we can't. Otherwise we * might as well just have copied the 12-byte 32-bit struct. */ BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != offsetof(struct vcpu_set_singleshot_timer, timeout_abs_ns)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, timeout_abs_ns) != sizeof_field(struct vcpu_set_singleshot_timer, timeout_abs_ns)); BUILD_BUG_ON(offsetof(struct compat_vcpu_set_singleshot_timer, flags) != offsetof(struct vcpu_set_singleshot_timer, flags)); BUILD_BUG_ON(sizeof_field(struct compat_vcpu_set_singleshot_timer, flags) != sizeof_field(struct vcpu_set_singleshot_timer, flags)); if (kvm_read_guest_virt(vcpu, param, &oneshot, longmode ? sizeof(oneshot) : sizeof(struct compat_vcpu_set_singleshot_timer), &e)) { *r = -EFAULT; return true; } kvm_xen_start_timer(vcpu, oneshot.timeout_abs_ns, false); *r = 0; return true; case VCPUOP_stop_singleshot_timer: if (vcpu->arch.xen.vcpu_id != vcpu_id) { *r = -EINVAL; return true; } kvm_xen_stop_timer(vcpu); *r = 0; return true; } return false; } static bool kvm_xen_hcall_set_timer_op(struct kvm_vcpu *vcpu, uint64_t timeout, u64 *r) { if (!kvm_xen_timer_enabled(vcpu)) return false; if (timeout) kvm_xen_start_timer(vcpu, timeout, true); else kvm_xen_stop_timer(vcpu); *r = 0; return true; } int kvm_xen_hypercall(struct kvm_vcpu *vcpu) { bool longmode; u64 input, params[6], r = -ENOSYS; bool handled = false; u8 cpl; input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); /* Hyper-V hypercalls get bit 31 set in EAX */ if ((input & 0x80000000) && kvm_hv_hypercall_enabled(vcpu)) return kvm_hv_hypercall(vcpu); longmode = is_64_bit_hypercall(vcpu); if (!longmode) { params[0] = (u32)kvm_rbx_read(vcpu); params[1] = (u32)kvm_rcx_read(vcpu); params[2] = (u32)kvm_rdx_read(vcpu); params[3] = (u32)kvm_rsi_read(vcpu); params[4] = (u32)kvm_rdi_read(vcpu); params[5] = (u32)kvm_rbp_read(vcpu); } #ifdef CONFIG_X86_64 else { params[0] = (u64)kvm_rdi_read(vcpu); params[1] = (u64)kvm_rsi_read(vcpu); params[2] = (u64)kvm_rdx_read(vcpu); params[3] = (u64)kvm_r10_read(vcpu); params[4] = (u64)kvm_r8_read(vcpu); params[5] = (u64)kvm_r9_read(vcpu); } #endif cpl = kvm_x86_call(get_cpl)(vcpu); trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], params[3], params[4], params[5]); /* * Only allow hypercall acceleration for CPL0. The rare hypercalls that * are permitted in guest userspace can be handled by the VMM. */ if (unlikely(cpl > 0)) goto handle_in_userspace; switch (input) { case __HYPERVISOR_xen_version: if (params[0] == XENVER_version && vcpu->kvm->arch.xen.xen_version) { r = vcpu->kvm->arch.xen.xen_version; handled = true; } break; case __HYPERVISOR_event_channel_op: if (params[0] == EVTCHNOP_send) handled = kvm_xen_hcall_evtchn_send(vcpu, params[1], &r); break; case __HYPERVISOR_sched_op: handled = kvm_xen_hcall_sched_op(vcpu, longmode, params[0], params[1], &r); break; case __HYPERVISOR_vcpu_op: handled = kvm_xen_hcall_vcpu_op(vcpu, longmode, params[0], params[1], params[2], &r); break; case __HYPERVISOR_set_timer_op: { u64 timeout = params[0]; /* In 32-bit mode, the 64-bit timeout is in two 32-bit params. */ if (!longmode) timeout |= params[1] << 32; handled = kvm_xen_hcall_set_timer_op(vcpu, timeout, &r); break; } default: break; } if (handled) return kvm_xen_hypercall_set_result(vcpu, r); handle_in_userspace: vcpu->run->exit_reason = KVM_EXIT_XEN; vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; vcpu->run->xen.u.hcall.longmode = longmode; vcpu->run->xen.u.hcall.cpl = cpl; vcpu->run->xen.u.hcall.input = input; vcpu->run->xen.u.hcall.params[0] = params[0]; vcpu->run->xen.u.hcall.params[1] = params[1]; vcpu->run->xen.u.hcall.params[2] = params[2]; vcpu->run->xen.u.hcall.params[3] = params[3]; vcpu->run->xen.u.hcall.params[4] = params[4]; vcpu->run->xen.u.hcall.params[5] = params[5]; vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = kvm_xen_hypercall_complete_userspace; return 0; } static void kvm_xen_check_poller(struct kvm_vcpu *vcpu, int port) { int poll_evtchn = vcpu->arch.xen.poll_evtchn; if ((poll_evtchn == port || poll_evtchn == -1) && test_and_clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.xen.poll_mask)) { kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); } } /* * The return value from this function is propagated to kvm_set_irq() API, * so it returns: * < 0 Interrupt was ignored (masked or not delivered for other reasons) * = 0 Interrupt was coalesced (previous irq is still pending) * > 0 Number of CPUs interrupt was delivered to * * It is also called directly from kvm_arch_set_irq_inatomic(), where the * only check on its return value is a comparison with -EWOULDBLOCK'. */ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; struct kvm_vcpu *vcpu; unsigned long *pending_bits, *mask_bits; unsigned long flags; int port_word_bit; bool kick_vcpu = false; int vcpu_idx, idx, rc; vcpu_idx = READ_ONCE(xe->vcpu_idx); if (vcpu_idx >= 0) vcpu = kvm_get_vcpu(kvm, vcpu_idx); else { vcpu = kvm_get_vcpu_by_id(kvm, xe->vcpu_id); if (!vcpu) return -EINVAL; WRITE_ONCE(xe->vcpu_idx, vcpu->vcpu_idx); } if (xe->port >= max_evtchn_port(kvm)) return -EINVAL; rc = -EWOULDBLOCK; idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; mask_bits = (unsigned long *)&shinfo->evtchn_mask; port_word_bit = xe->port / 64; } else { struct compat_shared_info *shinfo = gpc->khva; pending_bits = (unsigned long *)&shinfo->evtchn_pending; mask_bits = (unsigned long *)&shinfo->evtchn_mask; port_word_bit = xe->port / 32; } /* * If this port wasn't already set, and if it isn't masked, then * we try to set the corresponding bit in the in-kernel shadow of * evtchn_pending_sel for the target vCPU. And if *that* wasn't * already set, then we kick the vCPU in question to write to the * *real* evtchn_pending_sel in its own guest vcpu_info struct. */ if (test_and_set_bit(xe->port, pending_bits)) { rc = 0; /* It was already raised */ } else if (test_bit(xe->port, mask_bits)) { rc = -ENOTCONN; /* Masked */ kvm_xen_check_poller(vcpu, xe->port); } else { rc = 1; /* Delivered to the bitmap in shared_info. */ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */ read_unlock_irqrestore(&gpc->lock, flags); gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. */ if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel)) kick_vcpu = true; goto out_rcu; } if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { struct vcpu_info *vcpu_info = gpc->khva; if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) { WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); kick_vcpu = true; } } else { struct compat_vcpu_info *vcpu_info = gpc->khva; if (!test_and_set_bit(port_word_bit, (unsigned long *)&vcpu_info->evtchn_pending_sel)) { WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1); kick_vcpu = true; } } /* For the per-vCPU lapic vector, deliver it as MSI. */ if (kick_vcpu && vcpu->arch.xen.upcall_vector) { kvm_xen_inject_vcpu_vector(vcpu); kick_vcpu = false; } } out_rcu: read_unlock_irqrestore(&gpc->lock, flags); srcu_read_unlock(&kvm->srcu, idx); if (kick_vcpu) { kvm_make_request(KVM_REQ_UNBLOCK, vcpu); kvm_vcpu_kick(vcpu); } return rc; } static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) { bool mm_borrowed = false; int rc; rc = kvm_xen_set_evtchn_fast(xe, kvm); if (rc != -EWOULDBLOCK) return rc; if (current->mm != kvm->mm) { /* * If not on a thread which already belongs to this KVM, * we'd better be in the irqfd workqueue. */ if (WARN_ON_ONCE(current->mm)) return -EINVAL; kthread_use_mm(kvm->mm); mm_borrowed = true; } /* * It is theoretically possible for the page to be unmapped * and the MMU notifier to invalidate the shared_info before * we even get to use it. In that case, this looks like an * infinite loop. It was tempting to do it via the userspace * HVA instead... but that just *hides* the fact that it's * an infinite loop, because if a fault occurs and it waits * for the page to come back, it can *still* immediately * fault and have to wait again, repeatedly. * * Conversely, the page could also have been reinstated by * another thread before we even obtain the mutex above, so * check again *first* before remapping it. */ do { struct gfn_to_pfn_cache *gpc = &kvm->arch.xen.shinfo_cache; int idx; rc = kvm_xen_set_evtchn_fast(xe, kvm); if (rc != -EWOULDBLOCK) break; idx = srcu_read_lock(&kvm->srcu); rc = kvm_gpc_refresh(gpc, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); if (mm_borrowed) kthread_unuse_mm(kvm->mm); return rc; } /* This is the version called from kvm_set_irq() as the .set function */ static int evtchn_set_fn(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { if (!level) return -EINVAL; return kvm_xen_set_evtchn(&e->xen_evtchn, kvm); } /* * Set up an event channel interrupt from the KVM IRQ routing table. * Used for e.g. PIRQ from passed through physical devices. */ int kvm_xen_setup_evtchn(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { struct kvm_vcpu *vcpu; if (ue->u.xen_evtchn.port >= max_evtchn_port(kvm)) return -EINVAL; /* We only support 2 level event channels for now */ if (ue->u.xen_evtchn.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) return -EINVAL; /* * Xen gives us interesting mappings from vCPU index to APIC ID, * which means kvm_get_vcpu_by_id() has to iterate over all vCPUs * to find it. Do that once at setup time, instead of every time. * But beware that on live update / live migration, the routing * table might be reinstated before the vCPU threads have finished * recreating their vCPUs. */ vcpu = kvm_get_vcpu_by_id(kvm, ue->u.xen_evtchn.vcpu); if (vcpu) e->xen_evtchn.vcpu_idx = vcpu->vcpu_idx; else e->xen_evtchn.vcpu_idx = -1; e->xen_evtchn.port = ue->u.xen_evtchn.port; e->xen_evtchn.vcpu_id = ue->u.xen_evtchn.vcpu; e->xen_evtchn.priority = ue->u.xen_evtchn.priority; e->set = evtchn_set_fn; return 0; } /* * Explicit event sending from userspace with KVM_XEN_HVM_EVTCHN_SEND ioctl. */ int kvm_xen_hvm_evtchn_send(struct kvm *kvm, struct kvm_irq_routing_xen_evtchn *uxe) { struct kvm_xen_evtchn e; int ret; if (!uxe->port || uxe->port >= max_evtchn_port(kvm)) return -EINVAL; /* We only support 2 level event channels for now */ if (uxe->priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) return -EINVAL; e.port = uxe->port; e.vcpu_id = uxe->vcpu; e.vcpu_idx = -1; e.priority = uxe->priority; ret = kvm_xen_set_evtchn(&e, kvm); /* * None of that 'return 1 if it actually got delivered' nonsense. * We don't care if it was masked (-ENOTCONN) either. */ if (ret > 0 || ret == -ENOTCONN) ret = 0; return ret; } /* * Support for *outbound* event channel events via the EVTCHNOP_send hypercall. */ struct evtchnfd { u32 send_port; u32 type; union { struct kvm_xen_evtchn port; struct { u32 port; /* zero */ struct eventfd_ctx *ctx; } eventfd; } deliver; }; /* * Update target vCPU or priority for a registered sending channel. */ static int kvm_xen_eventfd_update(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { u32 port = data->u.evtchn.send_port; struct evtchnfd *evtchnfd; int ret; /* Protect writes to evtchnfd as well as the idr lookup. */ mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_find(&kvm->arch.xen.evtchn_ports, port); ret = -ENOENT; if (!evtchnfd) goto out_unlock; /* For an UPDATE, nothing may change except the priority/vcpu */ ret = -EINVAL; if (evtchnfd->type != data->u.evtchn.type) goto out_unlock; /* * Port cannot change, and if it's zero that was an eventfd * which can't be changed either. */ if (!evtchnfd->deliver.port.port || evtchnfd->deliver.port.port != data->u.evtchn.deliver.port.port) goto out_unlock; /* We only support 2 level event channels for now */ if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) goto out_unlock; evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; if (evtchnfd->deliver.port.vcpu_id != data->u.evtchn.deliver.port.vcpu) { evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; evtchnfd->deliver.port.vcpu_idx = -1; } ret = 0; out_unlock: mutex_unlock(&kvm->arch.xen.xen_lock); return ret; } /* * Configure the target (eventfd or local port delivery) for sending on * a given event channel. */ static int kvm_xen_eventfd_assign(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { u32 port = data->u.evtchn.send_port; struct eventfd_ctx *eventfd = NULL; struct evtchnfd *evtchnfd; int ret = -EINVAL; evtchnfd = kzalloc(sizeof(struct evtchnfd), GFP_KERNEL); if (!evtchnfd) return -ENOMEM; switch(data->u.evtchn.type) { case EVTCHNSTAT_ipi: /* IPI must map back to the same port# */ if (data->u.evtchn.deliver.port.port != data->u.evtchn.send_port) goto out_noeventfd; /* -EINVAL */ break; case EVTCHNSTAT_interdomain: if (data->u.evtchn.deliver.port.port) { if (data->u.evtchn.deliver.port.port >= max_evtchn_port(kvm)) goto out_noeventfd; /* -EINVAL */ } else { eventfd = eventfd_ctx_fdget(data->u.evtchn.deliver.eventfd.fd); if (IS_ERR(eventfd)) { ret = PTR_ERR(eventfd); goto out_noeventfd; } } break; case EVTCHNSTAT_virq: case EVTCHNSTAT_closed: case EVTCHNSTAT_unbound: case EVTCHNSTAT_pirq: default: /* Unknown event channel type */ goto out; /* -EINVAL */ } evtchnfd->send_port = data->u.evtchn.send_port; evtchnfd->type = data->u.evtchn.type; if (eventfd) { evtchnfd->deliver.eventfd.ctx = eventfd; } else { /* We only support 2 level event channels for now */ if (data->u.evtchn.deliver.port.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) goto out; /* -EINVAL; */ evtchnfd->deliver.port.port = data->u.evtchn.deliver.port.port; evtchnfd->deliver.port.vcpu_id = data->u.evtchn.deliver.port.vcpu; evtchnfd->deliver.port.vcpu_idx = -1; evtchnfd->deliver.port.priority = data->u.evtchn.deliver.port.priority; } mutex_lock(&kvm->arch.xen.xen_lock); ret = idr_alloc(&kvm->arch.xen.evtchn_ports, evtchnfd, port, port + 1, GFP_KERNEL); mutex_unlock(&kvm->arch.xen.xen_lock); if (ret >= 0) return 0; if (ret == -ENOSPC) ret = -EEXIST; out: if (eventfd) eventfd_ctx_put(eventfd); out_noeventfd: kfree(evtchnfd); return ret; } static int kvm_xen_eventfd_deassign(struct kvm *kvm, u32 port) { struct evtchnfd *evtchnfd; mutex_lock(&kvm->arch.xen.xen_lock); evtchnfd = idr_remove(&kvm->arch.xen.evtchn_ports, port); mutex_unlock(&kvm->arch.xen.xen_lock); if (!evtchnfd) return -ENOENT; synchronize_srcu(&kvm->srcu); if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); return 0; } static int kvm_xen_eventfd_reset(struct kvm *kvm) { struct evtchnfd *evtchnfd, **all_evtchnfds; int i; int n = 0; mutex_lock(&kvm->arch.xen.xen_lock); /* * Because synchronize_srcu() cannot be called inside the * critical section, first collect all the evtchnfd objects * in an array as they are removed from evtchn_ports. */ idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) n++; all_evtchnfds = kmalloc_array(n, sizeof(struct evtchnfd *), GFP_KERNEL); if (!all_evtchnfds) { mutex_unlock(&kvm->arch.xen.xen_lock); return -ENOMEM; } n = 0; idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { all_evtchnfds[n++] = evtchnfd; idr_remove(&kvm->arch.xen.evtchn_ports, evtchnfd->send_port); } mutex_unlock(&kvm->arch.xen.xen_lock); synchronize_srcu(&kvm->srcu); while (n--) { evtchnfd = all_evtchnfds[n]; if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); } kfree(all_evtchnfds); return 0; } static int kvm_xen_setattr_evtchn(struct kvm *kvm, struct kvm_xen_hvm_attr *data) { u32 port = data->u.evtchn.send_port; if (data->u.evtchn.flags == KVM_XEN_EVTCHN_RESET) return kvm_xen_eventfd_reset(kvm); if (!port || port >= max_evtchn_port(kvm)) return -EINVAL; if (data->u.evtchn.flags == KVM_XEN_EVTCHN_DEASSIGN) return kvm_xen_eventfd_deassign(kvm, port); if (data->u.evtchn.flags == KVM_XEN_EVTCHN_UPDATE) return kvm_xen_eventfd_update(kvm, data); if (data->u.evtchn.flags) return -EINVAL; return kvm_xen_eventfd_assign(kvm, data); } static bool kvm_xen_hcall_evtchn_send(struct kvm_vcpu *vcpu, u64 param, u64 *r) { struct evtchnfd *evtchnfd; struct evtchn_send send; struct x86_exception e; /* Sanity check: this structure is the same for 32-bit and 64-bit */ BUILD_BUG_ON(sizeof(send) != 4); if (kvm_read_guest_virt(vcpu, param, &send, sizeof(send), &e)) { *r = -EFAULT; return true; } /* * evtchnfd is protected by kvm->srcu; the idr lookup instead * is protected by RCU. */ rcu_read_lock(); evtchnfd = idr_find(&vcpu->kvm->arch.xen.evtchn_ports, send.port); rcu_read_unlock(); if (!evtchnfd) return false; if (evtchnfd->deliver.port.port) { int ret = kvm_xen_set_evtchn(&evtchnfd->deliver.port, vcpu->kvm); if (ret < 0 && ret != -ENOTCONN) return false; } else { eventfd_signal(evtchnfd->deliver.eventfd.ctx); } *r = 0; return true; } void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) { vcpu->arch.xen.vcpu_id = vcpu->vcpu_idx; vcpu->arch.xen.poll_evtchn = 0; timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); hrtimer_setup(&vcpu->arch.xen.timer, xen_timer_callback, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm); kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm); kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm); kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) { if (kvm_xen_timer_enabled(vcpu)) kvm_xen_stop_timer(vcpu); kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); timer_delete_sync(&vcpu->arch.xen.poll_timer); } void kvm_xen_init_vm(struct kvm *kvm) { mutex_init(&kvm->arch.xen.xen_lock); idr_init(&kvm->arch.xen.evtchn_ports); kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm); } void kvm_xen_destroy_vm(struct kvm *kvm) { struct evtchnfd *evtchnfd; int i; kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { if (!evtchnfd->deliver.port.port) eventfd_ctx_put(evtchnfd->deliver.eventfd.ctx); kfree(evtchnfd); } idr_destroy(&kvm->arch.xen.evtchn_ports); if (kvm->arch.xen.hvm_config.msr) static_branch_slow_dec_deferred(&kvm_xen_enabled); }
2 2 2 4 4 2 4 6 6 6 4 6 6 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 // SPDX-License-Identifier: GPL-2.0-or-later /* * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation * * Authors: Thomas Graf <tgraf@suug.ch> * * ========================================================================== * * Implements a linear-time string-matching algorithm due to Knuth, * Morris, and Pratt [1]. Their algorithm avoids the explicit * computation of the transition function DELTA altogether. Its * matching time is O(n), for n being length(text), using just an * auxiliary function PI[1..m], for m being length(pattern), * precomputed from the pattern in time O(m). The array PI allows * the transition function DELTA to be computed efficiently * "on the fly" as needed. Roughly speaking, for any state * "q" = 0,1,...,m and any character "a" in SIGMA, the value * PI["q"] contains the information that is independent of "a" and * is needed to compute DELTA("q", "a") [2]. Since the array PI * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we * save a factor of |SIGMA| in the preprocessing time by computing * PI rather than DELTA. * * [1] Cormen, Leiserson, Rivest, Stein * Introdcution to Algorithms, 2nd Edition, MIT Press * [2] See finite automaton theory */ #include <linux/module.h> #include <linux/types.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/textsearch.h> struct ts_kmp { u8 * pattern; unsigned int pattern_len; unsigned int prefix_tbl[]; }; static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state) { struct ts_kmp *kmp = ts_config_priv(conf); unsigned int i, q = 0, text_len, consumed = state->offset; const u8 *text; const int icase = conf->flags & TS_IGNORECASE; for (;;) { text_len = conf->get_next_block(consumed, &text, conf, state); if (unlikely(text_len == 0)) break; for (i = 0; i < text_len; i++) { while (q > 0 && kmp->pattern[q] != (icase ? toupper(text[i]) : text[i])) q = kmp->prefix_tbl[q - 1]; if (kmp->pattern[q] == (icase ? toupper(text[i]) : text[i])) q++; if (unlikely(q == kmp->pattern_len)) { state->offset = consumed + i + 1; return state->offset - kmp->pattern_len; } } consumed += text_len; } return UINT_MAX; } static inline void compute_prefix_tbl(const u8 *pattern, unsigned int len, unsigned int *prefix_tbl, int flags) { unsigned int k, q; const u8 icase = flags & TS_IGNORECASE; for (k = 0, q = 1; q < len; q++) { while (k > 0 && (icase ? toupper(pattern[k]) : pattern[k]) != (icase ? toupper(pattern[q]) : pattern[q])) k = prefix_tbl[k-1]; if ((icase ? toupper(pattern[k]) : pattern[k]) == (icase ? toupper(pattern[q]) : pattern[q])) k++; prefix_tbl[q] = k; } } static struct ts_config *kmp_init(const void *pattern, unsigned int len, gfp_t gfp_mask, int flags) { struct ts_config *conf; struct ts_kmp *kmp; int i; unsigned int prefix_tbl_len = len * sizeof(unsigned int); size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len; conf = alloc_ts_config(priv_size, gfp_mask); if (IS_ERR(conf)) return conf; conf->flags = flags; kmp = ts_config_priv(conf); kmp->pattern_len = len; compute_prefix_tbl(pattern, len, kmp->prefix_tbl, flags); kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len; if (flags & TS_IGNORECASE) for (i = 0; i < len; i++) kmp->pattern[i] = toupper(((u8 *)pattern)[i]); else memcpy(kmp->pattern, pattern, len); return conf; } static void *kmp_get_pattern(struct ts_config *conf) { struct ts_kmp *kmp = ts_config_priv(conf); return kmp->pattern; } static unsigned int kmp_get_pattern_len(struct ts_config *conf) { struct ts_kmp *kmp = ts_config_priv(conf); return kmp->pattern_len; } static struct ts_ops kmp_ops = { .name = "kmp", .find = kmp_find, .init = kmp_init, .get_pattern = kmp_get_pattern, .get_pattern_len = kmp_get_pattern_len, .owner = THIS_MODULE, .list = LIST_HEAD_INIT(kmp_ops.list) }; static int __init init_kmp(void) { return textsearch_register(&kmp_ops); } static void __exit exit_kmp(void) { textsearch_unregister(&kmp_ops); } MODULE_DESCRIPTION("Knuth-Morris-Pratt text search implementation"); MODULE_LICENSE("GPL"); module_init(init_kmp); module_exit(exit_kmp);
1 1 1 1 1 1 1 1 1 1 1 38 36 37 37 38 38 38 7 31 31 31 30 31 31 31 30 1 1 1 1 1 1 38 1 1 37 38 37 9 9 175 174 173 173 174 171 37 37 38 37 38 38 38 38 38 38 38 38 8 4 8 139 139 61 63 62 63 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 // SPDX-License-Identifier: GPL-2.0-or-later #include <net/gro.h> #include <net/dst_metadata.h> #include <net/busy_poll.h> #include <trace/events/net.h> #include <linux/skbuff_ref.h> #define MAX_GRO_SKBS 8 static DEFINE_SPINLOCK(offload_lock); /** * dev_add_offload - register offload handlers * @po: protocol offload declaration * * Add protocol offload handlers to the networking stack. The passed * &proto_offload is linked into kernel lists and may not be freed until * it has been removed from the kernel lists. * * This call does not sleep therefore it can not * guarantee all CPU's that are in middle of receiving packets * will see the new offload handlers (until the next received packet). */ void dev_add_offload(struct packet_offload *po) { struct packet_offload *elem; spin_lock(&offload_lock); list_for_each_entry(elem, &net_hotdata.offload_base, list) { if (po->priority < elem->priority) break; } list_add_rcu(&po->list, elem->list.prev); spin_unlock(&offload_lock); } EXPORT_SYMBOL(dev_add_offload); /** * __dev_remove_offload - remove offload handler * @po: packet offload declaration * * Remove a protocol offload handler that was previously added to the * kernel offload handlers by dev_add_offload(). The passed &offload_type * is removed from the kernel lists and can be freed or reused once this * function returns. * * The packet type might still be in use by receivers * and must not be freed until after all the CPU's have gone * through a quiescent state. */ static void __dev_remove_offload(struct packet_offload *po) { struct list_head *head = &net_hotdata.offload_base; struct packet_offload *po1; spin_lock(&offload_lock); list_for_each_entry(po1, head, list) { if (po == po1) { list_del_rcu(&po->list); goto out; } } pr_warn("dev_remove_offload: %p not found\n", po); out: spin_unlock(&offload_lock); } /** * dev_remove_offload - remove packet offload handler * @po: packet offload declaration * * Remove a packet offload handler that was previously added to the kernel * offload handlers by dev_add_offload(). The passed &offload_type is * removed from the kernel lists and can be freed or reused once this * function returns. * * This call sleeps to guarantee that no CPU is looking at the packet * type after return. */ void dev_remove_offload(struct packet_offload *po) { __dev_remove_offload(po); synchronize_net(); } EXPORT_SYMBOL(dev_remove_offload); int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) { struct skb_shared_info *pinfo, *skbinfo = skb_shinfo(skb); unsigned int offset = skb_gro_offset(skb); unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; unsigned int new_truesize; struct sk_buff *lp; int segs; /* Do not splice page pool based packets w/ non-page pool * packets. This can result in reference count issues as page * pool pages will not decrement the reference count and will * instead be immediately returned to the pool or have frag * count decremented. */ if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || NAPI_GRO_CB(skb)->flush)) return -E2BIG; if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) { if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP || (p->protocol == htons(ETH_P_IPV6) && skb_headroom(p) < sizeof(struct hop_jumbo_hdr)) || p->encapsulation) return -E2BIG; } segs = NAPI_GRO_CB(skb)->count; lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); if (headlen <= offset) { skb_frag_t *frag; skb_frag_t *frag2; int i = skbinfo->nr_frags; int nr_frags = pinfo->nr_frags + i; if (nr_frags > MAX_SKB_FRAGS) goto merge; offset -= headlen; pinfo->nr_frags = nr_frags; skbinfo->nr_frags = 0; frag = pinfo->frags + nr_frags; frag2 = skbinfo->frags + i; do { *--frag = *--frag2; } while (--i); skb_frag_off_add(frag, offset); skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ new_truesize = SKB_TRUESIZE(skb_end_offset(skb)); delta_truesize = skb->truesize - new_truesize; skb->truesize = new_truesize; skb->len -= skb->data_len; skb->data_len = 0; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE; goto done; } else if (skb->head_frag) { int nr_frags = pinfo->nr_frags; skb_frag_t *frag = pinfo->frags + nr_frags; struct page *page = virt_to_head_page(skb->head); unsigned int first_size = headlen - offset; unsigned int first_offset; if (nr_frags + 1 + skbinfo->nr_frags > MAX_SKB_FRAGS) goto merge; first_offset = skb->data - (unsigned char *)page_address(page) + offset; pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; skb_frag_fill_page_desc(frag, page, first_offset, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); /* We dont need to clear skbinfo->nr_frags here */ new_truesize = SKB_DATA_ALIGN(sizeof(struct sk_buff)); delta_truesize = skb->truesize - new_truesize; skb->truesize = new_truesize; NAPI_GRO_CB(skb)->free = NAPI_GRO_FREE_STOLEN_HEAD; goto done; } merge: /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; skb->sk = NULL; delta_truesize = skb->truesize; if (offset > headlen) { unsigned int eat = offset - headlen; skb_frag_off_add(&skbinfo->frags[0], eat); skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; offset = headlen; } __skb_pull(skb, offset); if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; NAPI_GRO_CB(p)->last = skb; __skb_header_release(skb); lp = p; done: NAPI_GRO_CB(p)->count += segs; p->data_len += len; p->truesize += delta_truesize; p->len += len; if (lp != p) { lp->data_len += len; lp->truesize += delta_truesize; lp->len += len; } NAPI_GRO_CB(skb)->same_flow = 1; return 0; } int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) { if (unlikely(p->len + skb->len >= 65536)) return -E2BIG; if (NAPI_GRO_CB(p)->last == p) skb_shinfo(p)->frag_list = skb; else NAPI_GRO_CB(p)->last->next = skb; skb_pull(skb, skb_gro_offset(skb)); NAPI_GRO_CB(p)->last = skb; NAPI_GRO_CB(p)->count++; p->data_len += skb->len; /* sk ownership - if any - completely transferred to the aggregated packet */ skb->destructor = NULL; skb->sk = NULL; p->truesize += skb->truesize; p->len += skb->len; NAPI_GRO_CB(skb)->same_flow = 1; return 0; } static void gro_complete(struct gro_node *gro, struct sk_buff *skb) { struct list_head *head = &net_hotdata.offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; int err = -ENOENT; BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); if (NAPI_GRO_CB(skb)->count == 1) { skb_shinfo(skb)->gso_size = 0; goto out; } rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type != type || !ptype->callbacks.gro_complete) continue; err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete, ipv6_gro_complete, inet_gro_complete, skb, 0); break; } rcu_read_unlock(); if (err) { WARN_ON(&ptype->list == head); kfree_skb(skb); return; } out: gro_normal_one(gro, skb, NAPI_GRO_CB(skb)->count); } static void __gro_flush_chain(struct gro_node *gro, u32 index, bool flush_old) { struct list_head *head = &gro->hash[index].list; struct sk_buff *skb, *p; list_for_each_entry_safe_reverse(skb, p, head, list) { if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) return; skb_list_del_init(skb); gro_complete(gro, skb); gro->hash[index].count--; } if (!gro->hash[index].count) __clear_bit(index, &gro->bitmask); } /* * gro->hash[].list contains packets ordered by age. * youngest packets at the head of it. * Complete skbs in reverse order to reduce latencies. */ void __gro_flush(struct gro_node *gro, bool flush_old) { unsigned long bitmask = gro->bitmask; unsigned int i, base = ~0U; while ((i = ffs(bitmask)) != 0) { bitmask >>= i; base += i; __gro_flush_chain(gro, base, flush_old); } } EXPORT_SYMBOL(__gro_flush); static unsigned long gro_list_prepare_tc_ext(const struct sk_buff *skb, const struct sk_buff *p, unsigned long diffs) { #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) struct tc_skb_ext *skb_ext; struct tc_skb_ext *p_ext; skb_ext = skb_ext_find(skb, TC_SKB_EXT); p_ext = skb_ext_find(p, TC_SKB_EXT); diffs |= (!!p_ext) ^ (!!skb_ext); if (!diffs && unlikely(skb_ext)) diffs |= p_ext->chain ^ skb_ext->chain; #endif return diffs; } static void gro_list_prepare(const struct list_head *head, const struct sk_buff *skb) { unsigned int maclen = skb->dev->hard_header_len; u32 hash = skb_get_hash_raw(skb); struct sk_buff *p; list_for_each_entry(p, head, list) { unsigned long diffs; if (hash != skb_get_hash_raw(p)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_all ^ skb->vlan_all; diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); else if (!diffs) diffs = memcmp(skb_mac_header(p), skb_mac_header(skb), maclen); /* in most common scenarios 'slow_gro' is 0 * otherwise we are already on some slower paths * either skip all the infrequent tests altogether or * avoid trying too hard to skip each of them individually */ if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) { diffs |= p->sk != skb->sk; diffs |= skb_metadata_dst_cmp(p, skb); diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb); diffs |= gro_list_prepare_tc_ext(skb, p, diffs); } NAPI_GRO_CB(p)->same_flow = !diffs; } } static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) { const struct skb_shared_info *pinfo; const skb_frag_t *frag0; unsigned int headlen; NAPI_GRO_CB(skb)->network_offset = 0; NAPI_GRO_CB(skb)->data_offset = 0; headlen = skb_headlen(skb); NAPI_GRO_CB(skb)->frag0 = skb->data; NAPI_GRO_CB(skb)->frag0_len = headlen; if (headlen) return; pinfo = skb_shinfo(skb); frag0 = &pinfo->frags[0]; if (pinfo->nr_frags && skb_frag_page(frag0) && !PageHighMem(skb_frag_page(frag0)) && (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, skb_frag_size(frag0), skb->end - skb->tail); } } static void gro_pull_from_frag0(struct sk_buff *skb, int grow) { struct skb_shared_info *pinfo = skb_shinfo(skb); BUG_ON(skb->end - skb->tail < grow); memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow); skb->data_len -= grow; skb->tail += grow; skb_frag_off_add(&pinfo->frags[0], grow); skb_frag_size_sub(&pinfo->frags[0], grow); if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { skb_frag_unref(skb, 0); memmove(pinfo->frags, pinfo->frags + 1, --pinfo->nr_frags * sizeof(pinfo->frags[0])); } } static void gro_try_pull_from_frag0(struct sk_buff *skb) { int grow = skb_gro_offset(skb) - skb_headlen(skb); if (grow > 0) gro_pull_from_frag0(skb, grow); } static void gro_flush_oldest(struct gro_node *gro, struct list_head *head) { struct sk_buff *oldest; oldest = list_last_entry(head, struct sk_buff, list); /* We are called with head length >= MAX_GRO_SKBS, so this is * impossible. */ if (WARN_ON_ONCE(!oldest)) return; /* Do not adjust napi->gro_hash[].count, caller is adding a new * SKB to the chain. */ skb_list_del_init(oldest); gro_complete(gro, oldest); } static enum gro_result dev_gro_receive(struct gro_node *gro, struct sk_buff *skb) { u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); struct list_head *head = &net_hotdata.offload_base; struct gro_list *gro_list = &gro->hash[bucket]; struct packet_offload *ptype; __be16 type = skb->protocol; struct sk_buff *pp = NULL; enum gro_result ret; int same_flow; if (netif_elide_gro(skb->dev)) goto normal; gro_list_prepare(&gro_list->list, skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { if (ptype->type == type && ptype->callbacks.gro_receive) goto found_ptype; } rcu_read_unlock(); goto normal; found_ptype: skb_set_network_header(skb, skb_gro_offset(skb)); skb_reset_mac_len(skb); BUILD_BUG_ON(sizeof_field(struct napi_gro_cb, zeroed) != sizeof(u32)); BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), sizeof(u32))); /* Avoid slow unaligned acc */ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); NAPI_GRO_CB(skb)->count = 1; if (unlikely(skb_is_gso(skb))) { NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; /* Only support TCP and non DODGY users. */ if (!skb_is_gso_tcp(skb) || (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY)) NAPI_GRO_CB(skb)->flush = 1; } /* Setup for GRO checksum validation */ switch (skb->ip_summed) { case CHECKSUM_COMPLETE: NAPI_GRO_CB(skb)->csum = skb->csum; NAPI_GRO_CB(skb)->csum_valid = 1; break; case CHECKSUM_UNNECESSARY: NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1; break; } pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, ipv6_gro_receive, inet_gro_receive, &gro_list->list, skb); rcu_read_unlock(); if (PTR_ERR(pp) == -EINPROGRESS) { ret = GRO_CONSUMED; goto ok; } same_flow = NAPI_GRO_CB(skb)->same_flow; ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { skb_list_del_init(pp); gro_complete(gro, pp); gro_list->count--; } if (same_flow) goto ok; if (NAPI_GRO_CB(skb)->flush) goto normal; if (unlikely(gro_list->count >= MAX_GRO_SKBS)) gro_flush_oldest(gro, &gro_list->list); else gro_list->count++; /* Must be called before setting NAPI_GRO_CB(skb)->{age|last} */ gro_try_pull_from_frag0(skb); NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; if (!skb_is_gso(skb)) skb_shinfo(skb)->gso_size = skb_gro_len(skb); list_add(&skb->list, &gro_list->list); ret = GRO_HELD; ok: if (gro_list->count) { if (!test_bit(bucket, &gro->bitmask)) __set_bit(bucket, &gro->bitmask); } else if (test_bit(bucket, &gro->bitmask)) { __clear_bit(bucket, &gro->bitmask); } return ret; normal: ret = GRO_NORMAL; gro_try_pull_from_frag0(skb); goto ok; } struct packet_offload *gro_find_receive_by_type(__be16 type) { struct list_head *offload_head = &net_hotdata.offload_base; struct packet_offload *ptype; list_for_each_entry_rcu(ptype, offload_head, list) { if (ptype->type != type || !ptype->callbacks.gro_receive) continue; return ptype; } return NULL; } EXPORT_SYMBOL(gro_find_receive_by_type); struct packet_offload *gro_find_complete_by_type(__be16 type) { struct list_head *offload_head = &net_hotdata.offload_base; struct packet_offload *ptype; list_for_each_entry_rcu(ptype, offload_head, list) { if (ptype->type != type || !ptype->callbacks.gro_complete) continue; return ptype; } return NULL; } EXPORT_SYMBOL(gro_find_complete_by_type); static gro_result_t gro_skb_finish(struct gro_node *gro, struct sk_buff *skb, gro_result_t ret) { switch (ret) { case GRO_NORMAL: gro_normal_one(gro, skb, 1); break; case GRO_MERGED_FREE: if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) napi_skb_free_stolen_head(skb); else if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); else __napi_kfree_skb(skb, SKB_CONSUMED); break; case GRO_HELD: case GRO_MERGED: case GRO_CONSUMED: break; } return ret; } gro_result_t gro_receive_skb(struct gro_node *gro, struct sk_buff *skb) { gro_result_t ret; __skb_mark_napi_id(skb, gro); trace_napi_gro_receive_entry(skb); skb_gro_reset_offset(skb, 0); ret = gro_skb_finish(gro, skb, dev_gro_receive(gro, skb)); trace_napi_gro_receive_exit(ret); return ret; } EXPORT_SYMBOL(gro_receive_skb); static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { if (unlikely(skb->pfmemalloc)) { consume_skb(skb); return; } __skb_pull(skb, skb_headlen(skb)); /* restore the reserve we had after netdev_alloc_skb_ip_align() */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb)); __vlan_hwaccel_clear_tag(skb); skb->dev = napi->dev; skb->skb_iif = 0; /* eth_type_trans() assumes pkt_type is PACKET_HOST */ skb->pkt_type = PACKET_HOST; skb->encapsulation = 0; skb->ip_summed = CHECKSUM_NONE; skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->gso_size = 0; if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); nf_reset_ct(skb); skb->slow_gro = 0; } napi->skb = skb; } struct sk_buff *napi_get_frags(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; if (!skb) { skb = napi_alloc_skb(napi, GRO_MAX_HEAD); if (skb) { napi->skb = skb; skb_mark_napi_id(skb, napi); } } return skb; } EXPORT_SYMBOL(napi_get_frags); static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) { switch (ret) { case GRO_NORMAL: case GRO_HELD: __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); if (ret == GRO_NORMAL) gro_normal_one(&napi->gro, skb, 1); break; case GRO_MERGED_FREE: if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) napi_skb_free_stolen_head(skb); else napi_reuse_skb(napi, skb); break; case GRO_MERGED: case GRO_CONSUMED: break; } return ret; } /* Upper GRO stack assumes network header starts at gro_offset=0 * Drivers could call both napi_gro_frags() and napi_gro_receive() * We copy ethernet header into skb->data to have a common layout. */ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { struct sk_buff *skb = napi->skb; const struct ethhdr *eth; unsigned int hlen = sizeof(*eth); napi->skb = NULL; skb_reset_mac_header(skb); skb_gro_reset_offset(skb, hlen); if (unlikely(!skb_gro_may_pull(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0); if (unlikely(!eth)) { net_warn_ratelimited("%s: dropping impossible skb from %s\n", __func__, napi->dev->name); napi_reuse_skb(napi, skb); return NULL; } } else { eth = (const struct ethhdr *)skb->data; if (NAPI_GRO_CB(skb)->frag0 != skb->data) gro_pull_from_frag0(skb, hlen); NAPI_GRO_CB(skb)->frag0 += hlen; NAPI_GRO_CB(skb)->frag0_len -= hlen; } __skb_pull(skb, hlen); /* * This works because the only protocols we care about don't require * special handling. * We'll fix it up properly in napi_frags_finish() */ skb->protocol = eth->h_proto; return skb; } gro_result_t napi_gro_frags(struct napi_struct *napi) { gro_result_t ret; struct sk_buff *skb = napi_frags_skb(napi); trace_napi_gro_frags_entry(skb); ret = napi_frags_finish(napi, skb, dev_gro_receive(&napi->gro, skb)); trace_napi_gro_frags_exit(ret); return ret; } EXPORT_SYMBOL(napi_gro_frags); /* Compute the checksum from gro_offset and return the folded value * after adding in any pseudo checksum. */ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb) { __wsum wsum; __sum16 sum; wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0); /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */ sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum)); /* See comments in __skb_checksum_complete(). */ if (likely(!sum)) { if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && !skb->csum_complete_sw) netdev_rx_csum_fault(skb->dev, skb); } NAPI_GRO_CB(skb)->csum = wsum; NAPI_GRO_CB(skb)->csum_valid = 1; return sum; } EXPORT_SYMBOL(__skb_gro_checksum_complete); void gro_init(struct gro_node *gro) { for (u32 i = 0; i < GRO_HASH_BUCKETS; i++) { INIT_LIST_HEAD(&gro->hash[i].list); gro->hash[i].count = 0; } gro->bitmask = 0; gro->cached_napi_id = 0; INIT_LIST_HEAD(&gro->rx_list); gro->rx_count = 0; } void gro_cleanup(struct gro_node *gro) { struct sk_buff *skb, *n; for (u32 i = 0; i < GRO_HASH_BUCKETS; i++) { list_for_each_entry_safe(skb, n, &gro->hash[i].list, list) kfree_skb(skb); gro->hash[i].count = 0; } gro->bitmask = 0; gro->cached_napi_id = 0; list_for_each_entry_safe(skb, n, &gro->rx_list, list) kfree_skb(skb); gro->rx_count = 0; }
22 19 19 22 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/osf.c * * Code extracted from drivers/block/genhd.c * * Copyright (C) 1991-1998 Linus Torvalds * Re-organised Feb 1998 Russell King */ #include "check.h" #define MAX_OSF_PARTITIONS 18 #define DISKLABELMAGIC (0x82564557UL) int osf_partition(struct parsed_partitions *state) { int i; int slot = 1; unsigned int npartitions; Sector sect; unsigned char *data; struct disklabel { __le32 d_magic; __le16 d_type,d_subtype; u8 d_typename[16]; u8 d_packname[16]; __le32 d_secsize; __le32 d_nsectors; __le32 d_ntracks; __le32 d_ncylinders; __le32 d_secpercyl; __le32 d_secprtunit; __le16 d_sparespertrack; __le16 d_sparespercyl; __le32 d_acylinders; __le16 d_rpm, d_interleave, d_trackskew, d_cylskew; __le32 d_headswitch, d_trkseek, d_flags; __le32 d_drivedata[5]; __le32 d_spare[5]; __le32 d_magic2; __le16 d_checksum; __le16 d_npartitions; __le32 d_bbsize, d_sbsize; struct d_partition { __le32 p_size; __le32 p_offset; __le32 p_fsize; u8 p_fstype; u8 p_frag; __le16 p_cpg; } d_partitions[MAX_OSF_PARTITIONS]; } * label; struct d_partition * partition; data = read_part_sector(state, 0, &sect); if (!data) return -1; label = (struct disklabel *) (data+64); partition = label->d_partitions; if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) { put_dev_sector(sect); return 0; } if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) { put_dev_sector(sect); return 0; } npartitions = le16_to_cpu(label->d_npartitions); if (npartitions > MAX_OSF_PARTITIONS) { put_dev_sector(sect); return 0; } for (i = 0 ; i < npartitions; i++, partition++) { if (slot == state->limit) break; if (le32_to_cpu(partition->p_size)) put_partition(state, slot, le32_to_cpu(partition->p_offset), le32_to_cpu(partition->p_size)); slot++; } strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; }
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 // SPDX-License-Identifier: GPL-2.0 #include <linux/acpi.h> #include <linux/array_size.h> #include <linux/bitmap.h> #include <linux/cleanup.h> #include <linux/compat.h> #include <linux/debugfs.h> #include <linux/device.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/idr.h> #include <linux/interrupt.h> #include <linux/irq.h> #include <linux/irqdesc.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/module.h> #include <linux/nospec.h> #include <linux/of.h> #include <linux/pinctrl/consumer.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/srcu.h> #include <linux/string.h> #include <linux/string_choices.h> #include <linux/gpio.h> #include <linux/gpio/driver.h> #include <linux/gpio/machine.h> #include <uapi/linux/gpio.h> #include "gpiolib-acpi.h" #include "gpiolib-cdev.h" #include "gpiolib-of.h" #include "gpiolib-swnode.h" #include "gpiolib-sysfs.h" #include "gpiolib.h" #define CREATE_TRACE_POINTS #include <trace/events/gpio.h> /* Implementation infrastructure for GPIO interfaces. * * The GPIO programming interface allows for inlining speed-critical * get/set operations for common cases, so that access to SOC-integrated * GPIOs can sometimes cost only an instruction or two per bit. */ /* Device and char device-related information */ static DEFINE_IDA(gpio_ida); static dev_t gpio_devt; #define GPIO_DEV_MAX 256 /* 256 GPIO chip devices supported */ static int gpio_bus_match(struct device *dev, const struct device_driver *drv) { struct fwnode_handle *fwnode = dev_fwnode(dev); /* * Only match if the fwnode doesn't already have a proper struct device * created for it. */ if (fwnode && fwnode->dev != dev) return 0; return 1; } static const struct bus_type gpio_bus_type = { .name = "gpio", .match = gpio_bus_match, }; /* * Number of GPIOs to use for the fast path in set array */ #define FASTPATH_NGPIO CONFIG_GPIOLIB_FASTPATH_LIMIT static DEFINE_MUTEX(gpio_lookup_lock); static LIST_HEAD(gpio_lookup_list); static LIST_HEAD(gpio_devices); /* Protects the GPIO device list against concurrent modifications. */ static DEFINE_MUTEX(gpio_devices_lock); /* Ensures coherence during read-only accesses to the list of GPIO devices. */ DEFINE_STATIC_SRCU(gpio_devices_srcu); static DEFINE_MUTEX(gpio_machine_hogs_mutex); static LIST_HEAD(gpio_machine_hogs); const char *const gpio_suffixes[] = { "gpios", "gpio", NULL }; static void gpiochip_free_hogs(struct gpio_chip *gc); static int gpiochip_add_irqchip(struct gpio_chip *gc, struct lock_class_key *lock_key, struct lock_class_key *request_key); static void gpiochip_irqchip_remove(struct gpio_chip *gc); static int gpiochip_irqchip_init_hw(struct gpio_chip *gc); static int gpiochip_irqchip_init_valid_mask(struct gpio_chip *gc); static void gpiochip_irqchip_free_valid_mask(struct gpio_chip *gc); static bool gpiolib_initialized; const char *gpiod_get_label(struct gpio_desc *desc) { struct gpio_desc_label *label; unsigned long flags; flags = READ_ONCE(desc->flags); label = srcu_dereference_check(desc->label, &desc->gdev->desc_srcu, srcu_read_lock_held(&desc->gdev->desc_srcu)); if (test_bit(FLAG_USED_AS_IRQ, &flags)) return label ? label->str : "interrupt"; if (!test_bit(FLAG_REQUESTED, &flags)) return NULL; return label ? label->str : NULL; } static void desc_free_label(struct rcu_head *rh) { kfree(container_of(rh, struct gpio_desc_label, rh)); } static int desc_set_label(struct gpio_desc *desc, const char *label) { struct gpio_desc_label *new = NULL, *old; if (label) { new = kzalloc(struct_size(new, str, strlen(label) + 1), GFP_KERNEL); if (!new) return -ENOMEM; strcpy(new->str, label); } old = rcu_replace_pointer(desc->label, new, 1); if (old) call_srcu(&desc->gdev->desc_srcu, &old->rh, desc_free_label); return 0; } /** * gpio_to_desc - Convert a GPIO number to its descriptor * @gpio: global GPIO number * * Returns: * The GPIO descriptor associated with the given GPIO, or %NULL if no GPIO * with the given number exists in the system. */ struct gpio_desc *gpio_to_desc(unsigned gpio) { struct gpio_device *gdev; scoped_guard(srcu, &gpio_devices_srcu) { list_for_each_entry_srcu(gdev, &gpio_devices, list, srcu_read_lock_held(&gpio_devices_srcu)) { if (gdev->base <= gpio && gdev->base + gdev->ngpio > gpio) return &gdev->descs[gpio - gdev->base]; } } return NULL; } EXPORT_SYMBOL_GPL(gpio_to_desc); /* This function is deprecated and will be removed soon, don't use. */ struct gpio_desc *gpiochip_get_desc(struct gpio_chip *gc, unsigned int hwnum) { return gpio_device_get_desc(gc->gpiodev, hwnum); } /** * gpio_device_get_desc() - get the GPIO descriptor corresponding to the given * hardware number for this GPIO device * @gdev: GPIO device to get the descriptor from * @hwnum: hardware number of the GPIO for this chip * * Returns: * A pointer to the GPIO descriptor or %EINVAL if no GPIO exists in the given * chip for the specified hardware number or %ENODEV if the underlying chip * already vanished. * * The reference count of struct gpio_device is *NOT* increased like when the * GPIO is being requested for exclusive usage. It's up to the caller to make * sure the GPIO device will stay alive together with the descriptor returned * by this function. */ struct gpio_desc * gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum) { if (hwnum >= gdev->ngpio) return ERR_PTR(-EINVAL); return &gdev->descs[array_index_nospec(hwnum, gdev->ngpio)]; } EXPORT_SYMBOL_GPL(gpio_device_get_desc); /** * desc_to_gpio - convert a GPIO descriptor to the integer namespace * @desc: GPIO descriptor * * This should disappear in the future but is needed since we still * use GPIO numbers for error messages and sysfs nodes. * * Returns: * The global GPIO number for the GPIO specified by its descriptor. */ int desc_to_gpio(const struct gpio_desc *desc) { return desc->gdev->base + (desc - &desc->gdev->descs[0]); } EXPORT_SYMBOL_GPL(desc_to_gpio); /** * gpiod_to_chip - Return the GPIO chip to which a GPIO descriptor belongs * @desc: descriptor to return the chip of * * *DEPRECATED* * This function is unsafe and should not be used. Using the chip address * without taking the SRCU read lock may result in dereferencing a dangling * pointer. * * Returns: * Address of the GPIO chip backing this device. */ struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc) { if (!desc) return NULL; return gpio_device_get_chip(desc->gdev); } EXPORT_SYMBOL_GPL(gpiod_to_chip); /** * gpiod_to_gpio_device() - Return the GPIO device to which this descriptor * belongs. * @desc: Descriptor for which to return the GPIO device. * * This *DOES NOT* increase the reference count of the GPIO device as it's * expected that the descriptor is requested and the users already holds a * reference to the device. * * Returns: * Address of the GPIO device owning this descriptor. */ struct gpio_device *gpiod_to_gpio_device(struct gpio_desc *desc) { if (!desc) return NULL; return desc->gdev; } EXPORT_SYMBOL_GPL(gpiod_to_gpio_device); /** * gpio_device_get_base() - Get the base GPIO number allocated by this device * @gdev: GPIO device * * Returns: * First GPIO number in the global GPIO numberspace for this device. */ int gpio_device_get_base(struct gpio_device *gdev) { return gdev->base; } EXPORT_SYMBOL_GPL(gpio_device_get_base); /** * gpio_device_get_label() - Get the label of this GPIO device * @gdev: GPIO device * * Returns: * Pointer to the string containing the GPIO device label. The string's * lifetime is tied to that of the underlying GPIO device. */ const char *gpio_device_get_label(struct gpio_device *gdev) { return gdev->label; } EXPOR